diff --git a/sys/netpfil/ipfw/ip_fw2.c b/sys/netpfil/ipfw/ip_fw2.c --- a/sys/netpfil/ipfw/ip_fw2.c +++ b/sys/netpfil/ipfw/ip_fw2.c @@ -3741,12 +3741,9 @@ last = IS_DEFAULT_VNET(curvnet) ? 1 : 0; IPFW_UH_WLOCK(chain); - IPFW_UH_WUNLOCK(chain); ipfw_dyn_uninit(0); /* run the callout_drain */ - IPFW_UH_WLOCK(chain); - reap = NULL; IPFW_WLOCK(chain); for (i = 0; i < chain->n_rules; i++) diff --git a/sys/netpfil/ipfw/ip_fw_dynamic.c b/sys/netpfil/ipfw/ip_fw_dynamic.c --- a/sys/netpfil/ipfw/ip_fw_dynamic.c +++ b/sys/netpfil/ipfw/ip_fw_dynamic.c @@ -663,6 +663,8 @@ ipfw_obj_ntlv *ntlv; char *name; + IPFW_UH_WLOCK_ASSERT(ch); + DYN_DEBUG("uidx %u", ti->uidx); if (ti->uidx != 0) { if (ti->tlvs == NULL) @@ -681,7 +683,6 @@ obj->no.etlv = IPFW_TLV_STATE_NAME; strlcpy(obj->name, name, sizeof(obj->name)); - IPFW_UH_WLOCK(ch); no = ipfw_objhash_lookup_name_type(ni, 0, IPFW_TLV_STATE_NAME, name); if (no != NULL) { @@ -691,14 +692,12 @@ */ *pkidx = no->kidx; no->refcnt++; - IPFW_UH_WUNLOCK(ch); free(obj, M_IPFW); DYN_DEBUG("\tfound kidx %u for name '%s'", *pkidx, no->name); return (0); } if (ipfw_objhash_alloc_idx(ni, &obj->no.kidx) != 0) { DYN_DEBUG("\talloc_idx failed for %s", name); - IPFW_UH_WUNLOCK(ch); free(obj, M_IPFW); return (ENOSPC); } @@ -706,7 +705,6 @@ SRV_OBJECT(ch, obj->no.kidx) = obj; obj->no.refcnt++; *pkidx = obj->no.kidx; - IPFW_UH_WUNLOCK(ch); DYN_DEBUG("\tcreated kidx %u for name '%s'", *pkidx, name); return (0); } @@ -2145,9 +2143,6 @@ * Userland can invoke ipfw_expire_dyn_states() to delete * specific states, this will lead to modification of expired * lists. - * - * XXXAE: do we need DYN_EXPIRED_LOCK? We can just use - * IPFW_UH_WLOCK to protect access to these lists. */ DYN_EXPIRED_LOCK(); DYN_FREE_STATES(s4, s4n, ipv4); @@ -2306,8 +2301,6 @@ void *rule; int bucket, removed, length, max_length; - IPFW_UH_WLOCK_ASSERT(ch); - /* * Unlink expired states from each bucket. * With acquired bucket lock iterate entries of each lists: @@ -2733,10 +2726,6 @@ s, entry); \ } \ } while (0) - /* - * Prevent rules changing from userland. - */ - IPFW_UH_WLOCK(chain); /* * Hold traffic processing until we finish resize to * prevent access to states lists. @@ -2780,7 +2769,6 @@ V_curr_dyn_buckets = new; IPFW_WUNLOCK(chain); - IPFW_UH_WUNLOCK(chain); /* Release old resources */ while (bucket-- != 0) @@ -2818,15 +2806,8 @@ * First free states unlinked in previous passes. */ dyn_free_states(&V_layer3_chain); - /* - * Now unlink others expired states. - * We use IPFW_UH_WLOCK to avoid concurrent call of - * dyn_expire_states(). It is the only function that does - * deletion of state entries from states lists. - */ - IPFW_UH_WLOCK(&V_layer3_chain); dyn_expire_states(&V_layer3_chain, NULL); - IPFW_UH_WUNLOCK(&V_layer3_chain); + /* * Send keepalives if they are enabled and the time has come. */ @@ -2863,14 +2844,24 @@ void ipfw_expire_dyn_states(struct ip_fw_chain *chain, ipfw_range_tlv *rt) { + IPFW_RLOCK_TRACKER; + /* * Do not perform any checks if we currently have no dynamic states */ if (V_dyn_count == 0) return; - IPFW_UH_WLOCK_ASSERT(chain); + /* + * Acquire read lock to prevent race with dyn_grow_hashtable() called + * via dyn_tick(). Note that dyn_tick() also calls dyn_expire_states(), + * but doesn't acquire the chain lock. A race between dyn_tick() and + * this function should be safe, as dyn_expire_states() does all proper + * locking of buckets and expire lists. + */ + IPFW_RLOCK(chain); dyn_expire_states(chain, rt); + IPFW_RUNLOCK(chain); } /* diff --git a/sys/netpfil/ipfw/ip_fw_iface.c b/sys/netpfil/ipfw/ip_fw_iface.c --- a/sys/netpfil/ipfw/ip_fw_iface.c +++ b/sys/netpfil/ipfw/ip_fw_iface.c @@ -320,9 +320,7 @@ * First request to subsystem. * Let's perform init. */ - IPFW_UH_WUNLOCK(ch); vnet_ipfw_iface_init(ch); - IPFW_UH_WLOCK(ch); ii = CHAIN_TO_II(ch); } @@ -335,8 +333,6 @@ return (0); } - IPFW_UH_WUNLOCK(ch); - /* Not found. Let's create one */ iif = malloc(sizeof(struct ipfw_iface), M_IPFW, M_WAITOK | M_ZERO); TAILQ_INIT(&iif->consumers); @@ -350,7 +346,6 @@ * are not holding any locks. */ iif->no.refcnt = 1; - IPFW_UH_WLOCK(ch); tmp = (struct ipfw_iface *)ipfw_objhash_lookup_name(ii, 0, name); if (tmp != NULL) { diff --git a/sys/netpfil/ipfw/ip_fw_nat.c b/sys/netpfil/ipfw/ip_fw_nat.c --- a/sys/netpfil/ipfw/ip_fw_nat.c +++ b/sys/netpfil/ipfw/ip_fw_nat.c @@ -503,7 +503,6 @@ gencnt = chain->gencnt; ptr = lookup_nat_name(&chain->nat, ucfg->name); if (ptr == NULL) { - IPFW_UH_WUNLOCK(chain); /* New rule: allocate and init new instance. */ ptr = malloc(sizeof(struct cfg_nat), M_IPFW, M_WAITOK | M_ZERO); ptr->lib = LibAliasInit(NULL); @@ -514,7 +513,6 @@ LIST_REMOVE(ptr, _next); flush_nat_ptrs(chain, ptr->id); IPFW_WUNLOCK(chain); - IPFW_UH_WUNLOCK(chain); } /* @@ -543,7 +541,6 @@ del_redir_spool_cfg(ptr, &ptr->redir_chain); /* Add new entries. */ add_redir_spool_cfg((char *)(ucfg + 1), ptr); - IPFW_UH_WLOCK(chain); /* Extra check to avoid race with another ipfw_nat_cfg() */ tcfg = NULL; @@ -1049,7 +1046,6 @@ len += sizeof(struct cfg_spool_legacy); } } - IPFW_UH_RUNLOCK(chain); data = malloc(len, M_TEMP, M_WAITOK | M_ZERO); bcopy(&nat_cnt, data, sizeof(nat_cnt)); @@ -1057,7 +1053,6 @@ nat_cnt = 0; len = sizeof(nat_cnt); - IPFW_UH_RLOCK(chain); if (gencnt != chain->gencnt) { free(data, M_TEMP); goto retry; diff --git a/sys/netpfil/ipfw/ip_fw_private.h b/sys/netpfil/ipfw/ip_fw_private.h --- a/sys/netpfil/ipfw/ip_fw_private.h +++ b/sys/netpfil/ipfw/ip_fw_private.h @@ -323,7 +323,7 @@ #if defined( __linux__ ) || defined( _WIN32 ) spinlock_t uh_lock; #else - struct rwlock uh_lock; /* lock for upper half */ + struct sx uh_lock; /* lock for upper half */ #endif }; @@ -451,12 +451,12 @@ #else /* FreeBSD */ #define IPFW_LOCK_INIT(_chain) do { \ rm_init_flags(&(_chain)->rwmtx, "IPFW static rules", RM_RECURSE); \ - rw_init(&(_chain)->uh_lock, "IPFW UH lock"); \ + sx_init(&(_chain)->uh_lock, "IPFW UH lock"); \ } while (0) #define IPFW_LOCK_DESTROY(_chain) do { \ rm_destroy(&(_chain)->rwmtx); \ - rw_destroy(&(_chain)->uh_lock); \ + sx_destroy(&(_chain)->uh_lock); \ } while (0) #define IPFW_RLOCK_ASSERT(_chain) rm_assert(&(_chain)->rwmtx, RA_RLOCKED) @@ -471,14 +471,14 @@ #define IPFW_PF_RUNLOCK(p) IPFW_RUNLOCK(p) #endif -#define IPFW_UH_RLOCK_ASSERT(_chain) rw_assert(&(_chain)->uh_lock, RA_RLOCKED) -#define IPFW_UH_WLOCK_ASSERT(_chain) rw_assert(&(_chain)->uh_lock, RA_WLOCKED) -#define IPFW_UH_UNLOCK_ASSERT(_chain) rw_assert(&(_chain)->uh_lock, RA_UNLOCKED) +#define IPFW_UH_RLOCK_ASSERT(_chain) sx_assert(&(_chain)->uh_lock, SA_SLOCKED) +#define IPFW_UH_WLOCK_ASSERT(_chain) sx_assert(&(_chain)->uh_lock, SA_XLOCKED) +#define IPFW_UH_UNLOCK_ASSERT(_chain) sx_assert(&(_chain)->uh_lock, SA_UNLOCKED) -#define IPFW_UH_RLOCK(p) rw_rlock(&(p)->uh_lock) -#define IPFW_UH_RUNLOCK(p) rw_runlock(&(p)->uh_lock) -#define IPFW_UH_WLOCK(p) rw_wlock(&(p)->uh_lock) -#define IPFW_UH_WUNLOCK(p) rw_wunlock(&(p)->uh_lock) +#define IPFW_UH_RLOCK(p) sx_slock(&(p)->uh_lock) +#define IPFW_UH_RUNLOCK(p) sx_sunlock(&(p)->uh_lock) +#define IPFW_UH_WLOCK(p) sx_xlock(&(p)->uh_lock) +#define IPFW_UH_WUNLOCK(p) sx_xunlock(&(p)->uh_lock) struct obj_idx { uint32_t uidx; /* internal index supplied by userland */ diff --git a/sys/netpfil/ipfw/ip_fw_sockopt.c b/sys/netpfil/ipfw/ip_fw_sockopt.c --- a/sys/netpfil/ipfw/ip_fw_sockopt.c +++ b/sys/netpfil/ipfw/ip_fw_sockopt.c @@ -337,44 +337,15 @@ } /* - * allocate a new map, returns the chain locked. extra is the number - * of entries to add or delete. - */ -static struct ip_fw ** -get_map(struct ip_fw_chain *chain, int extra, int locked) -{ - - for (;;) { - struct ip_fw **map; - u_int i, mflags; - - mflags = M_ZERO | ((locked != 0) ? M_NOWAIT : M_WAITOK); - - i = chain->n_rules + extra; - map = malloc(i * sizeof(struct ip_fw *), M_IPFW, mflags); - if (map == NULL) { - printf("%s: cannot allocate map\n", __FUNCTION__); - return NULL; - } - if (!locked) - IPFW_UH_WLOCK(chain); - if (i >= chain->n_rules + extra) /* good */ - return map; - /* otherwise we lost the race, free and retry */ - if (!locked) - IPFW_UH_WUNLOCK(chain); - free(map, M_IPFW); - } -} - -/* - * swap the maps. It is supposed to be called with IPFW_UH_WLOCK + * swap the maps. */ static struct ip_fw ** swap_map(struct ip_fw_chain *chain, struct ip_fw **new_map, int new_len) { struct ip_fw **old_map; + IPFW_UH_WLOCK_ASSERT(chain); + IPFW_WLOCK(chain); chain->id++; chain->n_rules = new_len; @@ -459,6 +430,7 @@ struct ip_fw *krule; struct ip_fw **map; /* the new array of pointers */ + IPFW_UH_WLOCK(chain); /* Check if we need to do table/obj index remap */ tcount = 0; for (ci = rci, i = 0; i < count; ci++, i++) { @@ -484,8 +456,6 @@ * We have some more table rules * we need to rollback. */ - - IPFW_UH_WLOCK(chain); while (ci != rci) { ci--; if (ci->object_opcodes == 0) @@ -493,33 +463,16 @@ unref_rule_objects(chain,ci->krule); } - IPFW_UH_WUNLOCK(chain); - } - + IPFW_UH_WUNLOCK(chain); return (error); } tcount++; } - /* get_map returns with IPFW_UH_WLOCK if successful */ - map = get_map(chain, count, 0 /* not locked */); - if (map == NULL) { - if (tcount > 0) { - /* Unbind tables */ - IPFW_UH_WLOCK(chain); - for (ci = rci, i = 0; i < count; ci++, i++) { - if (ci->object_opcodes == 0) - continue; - - unref_rule_objects(chain, ci->krule); - } - IPFW_UH_WUNLOCK(chain); - } - - return (ENOSPC); - } + map = malloc((chain->n_rules + count) * sizeof(struct ip_fw *), + M_IPFW, M_ZERO | M_WAITOK); if (V_autoinc_step < 1) V_autoinc_step = 1; @@ -572,9 +525,9 @@ { struct ip_fw **map; - map = get_map(chain, 1, 0); - if (map == NULL) - return (ENOMEM); + IPFW_UH_WLOCK(chain); + map = malloc((chain->n_rules + 1) * sizeof(struct ip_fw *), + M_IPFW, M_ZERO | M_WAITOK); if (chain->n_rules > 0) bcopy(chain->map, map, chain->n_rules * sizeof(struct ip_fw *)); @@ -812,12 +765,8 @@ } /* Allocate new map of the same size */ - map = get_map(chain, 0, 1 /* locked */); - if (map == NULL) { - IPFW_UH_WUNLOCK(chain); - return (ENOMEM); - } - + map = malloc(chain->n_rules * sizeof(struct ip_fw *), + M_IPFW, M_ZERO | M_WAITOK); n = 0; ndyn = 0; ofs = start; @@ -2227,7 +2176,7 @@ cmdlen = 0; error = 0; - IPFW_UH_WLOCK(ch); + IPFW_UH_WLOCK_ASSERT(ch); /* Increase refcount on each existing referenced table. */ for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { @@ -2250,10 +2199,8 @@ if (error != 0) { /* Unref everything we have already done */ unref_oib_objects(ch, rule->cmd, oib, pidx); - IPFW_UH_WUNLOCK(ch); return (error); } - IPFW_UH_WUNLOCK(ch); /* Perform auto-creation for non-existing objects */ if (pidx != oib) diff --git a/sys/netpfil/ipfw/ip_fw_table.c b/sys/netpfil/ipfw/ip_fw_table.c --- a/sys/netpfil/ipfw/ip_fw_table.c +++ b/sys/netpfil/ipfw/ip_fw_table.c @@ -277,7 +277,6 @@ * creating new one. * * Saves found table config into @ptc. - * Note function may drop/acquire UH_WLOCK. * Returns 0 if table was found/created and referenced * or non-zero return code. */ @@ -321,9 +320,7 @@ if ((tei->flags & TEI_FLAGS_COMPAT) == 0) return (ESRCH); - IPFW_UH_WUNLOCK(ch); error = create_table_compat(ch, ti, &kidx); - IPFW_UH_WLOCK(ch); if (error != 0) return (error); @@ -560,12 +557,10 @@ */ restart: if (ts.modified != 0) { - IPFW_UH_WUNLOCK(ch); flush_batch_buffer(ch, ta, tei, count, rollback, ta_buf_m, ta_buf); memset(&ts, 0, sizeof(ts)); ta = NULL; - IPFW_UH_WLOCK(ch); } error = find_ref_table(ch, ti, tei, count, OP_ADD, &tc); @@ -586,14 +581,12 @@ ts.count = count; rollback = 0; add_toperation_state(ch, &ts); - IPFW_UH_WUNLOCK(ch); /* Allocate memory and prepare record(s) */ /* Pass stack buffer by default */ ta_buf_m = ta_buf; error = prepare_batch_buffer(ch, ta, tei, count, OP_ADD, &ta_buf_m); - IPFW_UH_WLOCK(ch); del_toperation_state(ch, &ts); /* Drop reference we've used in first search */ tc->no.refcnt--; @@ -612,8 +605,6 @@ /* * Link all values values to shared/per-table value array. - * - * May release/reacquire UH_WLOCK. */ error = ipfw_link_table_values(ch, &ts, flags); if (error != 0) @@ -623,7 +614,7 @@ /* * Ensure we are able to add all entries without additional - * memory allocations. May release/reacquire UH_WLOCK. + * memory allocations. */ kidx = tc->no.kidx; error = check_table_space(ch, &ts, tc, KIDX_TO_TI(ch, kidx), count); @@ -730,7 +721,6 @@ return (error); } ta = tc->ta; - IPFW_UH_WUNLOCK(ch); /* Allocate memory and prepare record(s) */ /* Pass stack buffer by default */ @@ -739,8 +729,6 @@ if (error != 0) goto cleanup; - IPFW_UH_WLOCK(ch); - /* Drop reference we've used in first search */ tc->no.refcnt--; @@ -841,12 +829,10 @@ /* We have to shrink/grow table */ if (ts != NULL) add_toperation_state(ch, ts); - IPFW_UH_WUNLOCK(ch); memset(&ta_buf, 0, sizeof(ta_buf)); error = ta->prepare_mod(ta_buf, &pflags); - IPFW_UH_WLOCK(ch); if (ts != NULL) del_toperation_state(ch, ts); @@ -866,8 +852,6 @@ /* Check if we still need to alter table */ ti = KIDX_TO_TI(ch, tc->no.kidx); if (ta->need_modify(tc->astate, ti, count, &pflags) == 0) { - IPFW_UH_WUNLOCK(ch); - /* * Other thread has already performed resize. * Flush our state and return. @@ -1185,7 +1169,6 @@ tflags = tc->tflags; tc->no.refcnt++; add_toperation_state(ch, &ts); - IPFW_UH_WUNLOCK(ch); /* * Stage 1.5: if this is not the first attempt, destroy previous state @@ -1205,7 +1188,6 @@ * Stage 3: swap old state pointers with newly-allocated ones. * Decrease refcount. */ - IPFW_UH_WLOCK(ch); tc->no.refcnt--; del_toperation_state(ch, &ts); @@ -1742,6 +1724,7 @@ char *tname, *aname; struct tid_info ti; struct namedobj_instance *ni; + int rv; if (sd->valsize != sizeof(*oh) + sizeof(ipfw_xtable_info)) return (EINVAL); @@ -1769,14 +1752,15 @@ ni = CHAIN_TO_NI(ch); - IPFW_UH_RLOCK(ch); + IPFW_UH_WLOCK(ch); if (find_table(ni, &ti) != NULL) { - IPFW_UH_RUNLOCK(ch); + IPFW_UH_WUNLOCK(ch); return (EEXIST); } - IPFW_UH_RUNLOCK(ch); + rv = create_table_internal(ch, &ti, aname, i, NULL, 0); + IPFW_UH_WUNLOCK(ch); - return (create_table_internal(ch, &ti, aname, i, NULL, 0)); + return (rv); } /* @@ -1797,6 +1781,8 @@ struct table_algo *ta; uint32_t kidx; + IPFW_UH_WLOCK_ASSERT(ch); + ni = CHAIN_TO_NI(ch); ta = find_table_algo(CHAIN_TO_TCFG(ch), ti, aname); @@ -1814,8 +1800,6 @@ else tc->locked = (i->flags & IPFW_TGFLAGS_LOCKED) != 0; - IPFW_UH_WLOCK(ch); - /* Check if table has been already created */ tc_new = find_table(ni, ti); if (tc_new != NULL) { @@ -1825,7 +1809,6 @@ * which has the same type */ if (compat == 0 || tc_new->no.subtype != tc->no.subtype) { - IPFW_UH_WUNLOCK(ch); free_table_config(ni, tc); return (EEXIST); } @@ -1837,7 +1820,6 @@ } else { /* New table */ if (ipfw_objhash_alloc_idx(ni, &kidx) != 0) { - IPFW_UH_WUNLOCK(ch); printf("Unable to allocate table index." " Consider increasing net.inet.ip.fw.tables_max"); free_table_config(ni, tc); @@ -1854,8 +1836,6 @@ if (pkidx != NULL) *pkidx = tc->no.kidx; - IPFW_UH_WUNLOCK(ch); - if (tc_new != NULL) free_table_config(ni, tc_new); diff --git a/sys/netpfil/ipfw/ip_fw_table_value.c b/sys/netpfil/ipfw/ip_fw_table_value.c --- a/sys/netpfil/ipfw/ip_fw_table_value.c +++ b/sys/netpfil/ipfw/ip_fw_table_value.c @@ -167,7 +167,6 @@ /* * Grows value storage shared among all tables. - * Drops/reacquires UH locks. * Notifies other running adds on @ch shared storage resize. * Note function does not guarantee that free space * will be available after invocation, so one caller needs @@ -200,15 +199,11 @@ if (val_size == (1 << 30)) return (ENOSPC); - IPFW_UH_WUNLOCK(ch); - valuestate = malloc(sizeof(struct table_value) * val_size, M_IPFW, M_WAITOK | M_ZERO); ipfw_objhash_bitmap_alloc(val_size, (void *)&new_idx, &new_blocks); - IPFW_UH_WLOCK(ch); - /* * Check if we still need to resize */ @@ -359,7 +354,6 @@ /* * Allocate new value index in either shared or per-table array. - * Function may drop/reacquire UH lock. * * Returns 0 on success. */ @@ -375,9 +369,7 @@ error = ipfw_objhash_alloc_idx(vi, &vidx); if (error != 0) { /* - * We need to resize array. This involves - * lock/unlock, so we need to check "modified" - * state. + * We need to resize array. */ ts->opstate.func(ts->tc, &ts->opstate); error = resize_shared_value_storage(ch); @@ -525,7 +517,6 @@ add_toperation_state(ch, ts); /* Ensure table won't disappear */ tc_ref(tc); - IPFW_UH_WUNLOCK(ch); /* * Stage 2: allocate objects for non-existing values. @@ -544,7 +535,6 @@ * Stage 3: allocate index numbers for new values * and link them to index. */ - IPFW_UH_WLOCK(ch); tc_unref(tc); del_toperation_state(ch, ts); if (ts->modified != 0) { @@ -572,7 +562,6 @@ continue; } - /* May perform UH unlock/lock */ error = alloc_table_vidx(ch, ts, vi, &vidx, flags); if (error != 0) { ts->opstate.func(ts->tc, &ts->opstate);