Index: sys/kern/subr_witness.c =================================================================== --- sys/kern/subr_witness.c +++ sys/kern/subr_witness.c @@ -561,14 +561,14 @@ /* * UDP/IP */ - { "udp", &lock_class_rw }, + { "udp", &lock_class_mtx_sleep }, { "udpinp", &lock_class_rw }, { "so_snd", &lock_class_mtx_sleep }, { NULL, NULL }, /* * TCP/IP */ - { "tcp", &lock_class_rw }, + { "tcp", &lock_class_mtx_sleep }, { "tcpinp", &lock_class_rw }, { "so_snd", &lock_class_mtx_sleep }, { NULL, NULL }, Index: sys/netinet/in_pcb.h =================================================================== --- sys/netinet/in_pcb.h +++ sys/netinet/in_pcb.h @@ -51,8 +51,11 @@ #include #include #include +#include +#include #include #endif +#include #define in6pcb inpcb /* for KAME src sync over BSD*'s */ #define in6p_sp inp_sp /* for KAME src sync over BSD*'s */ @@ -65,8 +68,8 @@ * numbers, and pointers up (to a socket structure) and down (to a * protocol-specific control block) are stored here. */ -LIST_HEAD(inpcbhead, inpcb); -LIST_HEAD(inpcbporthead, inpcbport); +CK_LIST_HEAD(inpcbhead, inpcb); +CK_LIST_HEAD(inpcbporthead, inpcbport); typedef uint64_t inp_gen_t; /* @@ -156,6 +159,7 @@ * Key: * (b) - Protected by the hpts lock. * (c) - Constant after initialization + * (e) - Protected by the net_epoch_prempt epoch * (g) - Protected by the pcbgroup lock * (i) - Protected by the inpcb lock * (p) - Protected by the pcbinfo lock for the inpcb @@ -230,8 +234,8 @@ struct m_snd_tag; struct inpcb { /* Cache line #1 (amd64) */ - LIST_ENTRY(inpcb) inp_hash; /* (h/i) hash list */ - LIST_ENTRY(inpcb) inp_pcbgrouphash; /* (g/i) hash list */ + CK_LIST_ENTRY(inpcb) inp_hash; /* [w](h/i) [r](e/i) hash list */ + CK_LIST_ENTRY(inpcb) inp_pcbgrouphash; /* (g/i) hash list */ struct rwlock inp_lock; /* Cache line #2 (amd64) */ #define inp_start_zero inp_hpts @@ -313,7 +317,7 @@ int in6p_cksum; short in6p_hops; }; - LIST_ENTRY(inpcb) inp_portlist; /* (i/h) */ + CK_LIST_ENTRY(inpcb) inp_portlist; /* (i/h) */ struct inpcbport *inp_phd; /* (i/h) head of this list */ inp_gen_t inp_gencnt; /* (c) generation count */ struct llentry *inp_lle; /* cached L2 information */ @@ -322,9 +326,10 @@ struct route inp_route; struct route_in6 inp_route6; }; - LIST_ENTRY(inpcb) inp_list; /* (p/l) list for all PCBs for proto */ - /* (p[w]) for list iteration */ - /* (p[r]/l) for addition/removal */ + CK_LIST_ENTRY(inpcb) inp_list; /* (p/l) list for all PCBs for proto */ + /* (e[r]) for list iteration */ + /* (p[w]/l) for addition/removal */ + struct epoch_context inp_epoch_ctx; }; #endif /* _KERNEL */ @@ -400,7 +405,8 @@ #endif /* _SYS_SOCKETVAR_H_ */ struct inpcbport { - LIST_ENTRY(inpcbport) phd_hash; + struct epoch_context phd_epoch_ctx; + CK_LIST_ENTRY(inpcbport) phd_hash; struct inpcbhead phd_pcblist; u_short phd_port; }; @@ -433,22 +439,23 @@ * Locking key: * * (c) Constant or nearly constant after initialisation + * (e) - Protected by the net_epoch_prempt epoch * (g) Locked by ipi_lock * (l) Locked by ipi_list_lock - * (h) Read using either ipi_hash_lock or inpcb lock; write requires both + * (h) Read using either net_epoch_preempt or inpcb lock; write requires both ipi_hash_lock and inpcb lock * (p) Protected by one or more pcbgroup locks * (x) Synchronisation properties poorly defined */ struct inpcbinfo { /* - * Global lock protecting full inpcb list traversal + * Global lock protecting inpcb modification */ - struct rwlock ipi_lock; + struct mtx ipi_lock; /* * Global list of inpcbs on the protocol. */ - struct inpcbhead *ipi_listhead; /* (g/l) */ + struct inpcbhead *ipi_listhead; /* [r](e) [w](g/l) */ u_int ipi_count; /* (l) */ /* @@ -479,9 +486,9 @@ u_int ipi_hashfields; /* (c) */ /* - * Global lock protecting non-pcbgroup hash lookup tables. + * Global lock protecting modification non-pcbgroup hash lookup tables. */ - struct rwlock ipi_hash_lock; + struct mtx ipi_hash_lock; /* * Global hash of inpcbs, hashed by local and foreign addresses and @@ -623,20 +630,19 @@ #endif /* _KERNEL */ #define INP_INFO_LOCK_INIT(ipi, d) \ - rw_init_flags(&(ipi)->ipi_lock, (d), RW_RECURSE) -#define INP_INFO_LOCK_DESTROY(ipi) rw_destroy(&(ipi)->ipi_lock) -#define INP_INFO_RLOCK(ipi) rw_rlock(&(ipi)->ipi_lock) -#define INP_INFO_WLOCK(ipi) rw_wlock(&(ipi)->ipi_lock) -#define INP_INFO_TRY_RLOCK(ipi) rw_try_rlock(&(ipi)->ipi_lock) -#define INP_INFO_TRY_WLOCK(ipi) rw_try_wlock(&(ipi)->ipi_lock) -#define INP_INFO_TRY_UPGRADE(ipi) rw_try_upgrade(&(ipi)->ipi_lock) -#define INP_INFO_WLOCKED(ipi) rw_wowned(&(ipi)->ipi_lock) -#define INP_INFO_RUNLOCK(ipi) rw_runlock(&(ipi)->ipi_lock) -#define INP_INFO_WUNLOCK(ipi) rw_wunlock(&(ipi)->ipi_lock) -#define INP_INFO_LOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_LOCKED) -#define INP_INFO_RLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_RLOCKED) -#define INP_INFO_WLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_WLOCKED) -#define INP_INFO_UNLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_UNLOCKED) + mtx_init(&(ipi)->ipi_lock, (d), NULL, MTX_DEF| MTX_RECURSE) +#define INP_INFO_LOCK_DESTROY(ipi) mtx_destroy(&(ipi)->ipi_lock) +#define INP_INFO_RLOCK(ipi) NET_EPOCH_ENTER() +#define INP_INFO_WLOCK(ipi) mtx_lock(&(ipi)->ipi_lock) +#define INP_INFO_TRY_WLOCK(ipi) mtx_trylock(&(ipi)->ipi_lock) +#define INP_INFO_TRY_UPGRADE(ipi) mtx_trylock(&(ipi)->ipi_lock) +#define INP_INFO_WLOCKED(ipi) mtx_owned(&(ipi)->ipi_lock) +#define INP_INFO_RUNLOCK(ipi) NET_EPOCH_EXIT() +#define INP_INFO_WUNLOCK(ipi) mtx_unlock(&(ipi)->ipi_lock) +#define INP_INFO_LOCK_ASSERT(ipi) MPASS(in_epoch() || mtx_owned(&(ipi)->ipi_lock)) +#define INP_INFO_RLOCK_ASSERT(ipi) MPASS(in_epoch()) +#define INP_INFO_WLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_lock, MA_OWNED) +#define INP_INFO_UNLOCK_ASSERT(ipi) MPASS(!in_epoch() && !mtx_owned(&(ipi)->ipi_lock)) #define INP_LIST_LOCK_INIT(ipi, d) \ rw_init_flags(&(ipi)->ipi_list_lock, (d), 0) @@ -657,17 +663,14 @@ #define INP_LIST_UNLOCK_ASSERT(ipi) \ rw_assert(&(ipi)->ipi_list_lock, RA_UNLOCKED) -#define INP_HASH_LOCK_INIT(ipi, d) \ - rw_init_flags(&(ipi)->ipi_hash_lock, (d), 0) -#define INP_HASH_LOCK_DESTROY(ipi) rw_destroy(&(ipi)->ipi_hash_lock) -#define INP_HASH_RLOCK(ipi) rw_rlock(&(ipi)->ipi_hash_lock) -#define INP_HASH_WLOCK(ipi) rw_wlock(&(ipi)->ipi_hash_lock) -#define INP_HASH_RUNLOCK(ipi) rw_runlock(&(ipi)->ipi_hash_lock) -#define INP_HASH_WUNLOCK(ipi) rw_wunlock(&(ipi)->ipi_hash_lock) -#define INP_HASH_LOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_hash_lock, \ - RA_LOCKED) -#define INP_HASH_WLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_hash_lock, \ - RA_WLOCKED) +#define INP_HASH_LOCK_INIT(ipi, d) mtx_init(&(ipi)->ipi_hash_lock, (d), NULL, MTX_DEF) +#define INP_HASH_LOCK_DESTROY(ipi) mtx_destroy(&(ipi)->ipi_hash_lock) +#define INP_HASH_RLOCK(ipi) NET_EPOCH_ENTER() +#define INP_HASH_WLOCK(ipi) mtx_lock(&(ipi)->ipi_hash_lock) +#define INP_HASH_RUNLOCK(ipi) NET_EPOCH_EXIT() +#define INP_HASH_WUNLOCK(ipi) mtx_unlock(&(ipi)->ipi_hash_lock) +#define INP_HASH_LOCK_ASSERT(ipi) MPASS(in_epoch() || mtx_owned(&(ipi)->ipi_hash_lock)) +#define INP_HASH_WLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_hash_lock, MA_OWNED); #define INP_GROUP_LOCK_INIT(ipg, d) mtx_init(&(ipg)->ipg_lock, (d), NULL, \ MTX_DEF | MTX_DUPOK) Index: sys/netinet/in_pcb.c =================================================================== --- sys/netinet/in_pcb.c +++ sys/netinet/in_pcb.c @@ -456,7 +456,7 @@ pcbinfo->ipi_vnet = curvnet; #endif pcbinfo->ipi_listhead = listhead; - LIST_INIT(pcbinfo->ipi_listhead); + CK_LIST_INIT(pcbinfo->ipi_listhead); pcbinfo->ipi_count = 0; pcbinfo->ipi_hashbase = hashinit(hash_nelements, M_PCB, &pcbinfo->ipi_hashmask); @@ -549,7 +549,7 @@ #endif INP_WLOCK(inp); INP_LIST_WLOCK(pcbinfo); - LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list); + CK_LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list); pcbinfo->ipi_count++; so->so_pcb = (caddr_t)inp; #ifdef INET6 @@ -1569,6 +1569,53 @@ free(il, M_TEMP); } +static void +inpcbport_free(epoch_context_t ctx) +{ + struct inpcbport *phd; + + phd = __containerof(ctx, struct inpcbport, phd_epoch_ctx); + free(phd, M_PCB); +} + +static void +in_pcbfree_deferred(epoch_context_t ctx) +{ + struct inpcb *inp; + struct inpcbinfo *pcbinfo; + int released __unused; + + inp = __containerof(ctx, struct inpcb, inp_epoch_ctx); + pcbinfo = inp->inp_pcbinfo; + + INP_WLOCK(inp); +#ifdef INET + inp_freemoptions(inp->inp_moptions); + inp->inp_moptions = NULL; +#endif + /* XXXRW: Do as much as possible here. */ +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + if (inp->inp_sp != NULL) + ipsec_delete_pcbpolicy(inp); +#endif +#ifdef INET6 + if (inp->inp_vflag & INP_IPV6PROTO) { + ip6_freepcbopts(inp->in6p_outputopts); + ip6_freemoptions(inp->in6p_moptions); + inp->in6p_moptions = NULL; + } +#endif + if (inp->inp_options) + (void)m_free(inp->inp_options); + inp->inp_vflag = 0; + crfree(inp->inp_cred); +#ifdef MAC + mac_inpcb_destroy(inp); +#endif + released = in_pcbrele_wlocked(inp); + MPASS(released); +} + /* * Unconditionally schedule an inpcb to be freed by decrementing its * reference count, which should occur only after the inpcb has been detached @@ -1583,12 +1630,6 @@ { struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; -#ifdef INET6 - struct ip6_moptions *im6o = NULL; -#endif -#ifdef INET - struct ip_moptions *imo = NULL; -#endif KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__)); KASSERT((inp->inp_flags2 & INP_FREED) == 0, @@ -1606,45 +1647,14 @@ } #endif INP_WLOCK_ASSERT(inp); - -#ifdef INET - imo = inp->inp_moptions; - inp->inp_moptions = NULL; -#endif - /* XXXRW: Do as much as possible here. */ -#if defined(IPSEC) || defined(IPSEC_SUPPORT) - if (inp->inp_sp != NULL) - ipsec_delete_pcbpolicy(inp); -#endif INP_LIST_WLOCK(pcbinfo); - inp->inp_gencnt = ++pcbinfo->ipi_gencnt; in_pcbremlists(inp); INP_LIST_WUNLOCK(pcbinfo); -#ifdef INET6 - if (inp->inp_vflag & INP_IPV6PROTO) { - ip6_freepcbopts(inp->in6p_outputopts); - im6o = inp->in6p_moptions; - inp->in6p_moptions = NULL; - } -#endif - if (inp->inp_options) - (void)m_free(inp->inp_options); RO_INVALIDATE_CACHE(&inp->inp_route); - inp->inp_vflag = 0; inp->inp_flags2 |= INP_FREED; - crfree(inp->inp_cred); -#ifdef MAC - mac_inpcb_destroy(inp); -#endif -#ifdef INET6 - ip6_freemoptions(im6o); -#endif -#ifdef INET - inp_freemoptions(imo); -#endif - if (!in_pcbrele_wlocked(inp)) - INP_WUNLOCK(inp); + INP_WUNLOCK(inp); + epoch_call(net_epoch_preempt, &inp->inp_epoch_ctx, in_pcbfree_deferred); } /* @@ -1677,11 +1687,11 @@ INP_HASH_WLOCK(inp->inp_pcbinfo); in_pcbremlbgrouphash(inp); - LIST_REMOVE(inp, inp_hash); - LIST_REMOVE(inp, inp_portlist); - if (LIST_FIRST(&phd->phd_pcblist) == NULL) { - LIST_REMOVE(phd, phd_hash); - free(phd, M_PCB); + CK_LIST_REMOVE(inp, inp_hash); + CK_LIST_REMOVE(inp, inp_portlist); + if (CK_LIST_FIRST(&phd->phd_pcblist) == NULL) { + CK_LIST_REMOVE(phd, phd_hash); + epoch_call(net_epoch_preempt, &phd->phd_epoch_ctx, inpcbport_free); } INP_HASH_WUNLOCK(inp->inp_pcbinfo); inp->inp_flags &= ~INP_INHASHLIST; @@ -1755,7 +1765,7 @@ struct inpcb *inp, *inp_temp; INP_INFO_WLOCK(pcbinfo); - LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, inp_temp) { + CK_LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, inp_temp) { INP_WLOCK(inp); #ifdef INET6 if ((inp->inp_vflag & INP_IPV4) == 0) { @@ -1782,7 +1792,7 @@ int i, gap; INP_INFO_WLOCK(pcbinfo); - LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) { + CK_LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) { INP_WLOCK(inp); imo = inp->inp_moptions; if ((inp->inp_vflag & INP_IPV4) && @@ -1847,7 +1857,7 @@ */ head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->ipi_hashmask)]; - LIST_FOREACH(inp, head, inp_hash) { + CK_LIST_FOREACH(inp, head, inp_hash) { #ifdef INET6 /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV4) == 0) @@ -1881,7 +1891,7 @@ */ porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport, pcbinfo->ipi_porthashmask)]; - LIST_FOREACH(phd, porthash, phd_hash) { + CK_LIST_FOREACH(phd, porthash, phd_hash) { if (phd->phd_port == lport) break; } @@ -1890,7 +1900,7 @@ * Port is in use by one or more PCBs. Look for best * fit. */ - LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { + CK_LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { wildcard = 0; if (cred != NULL && !prison_equal_ip4(inp->inp_cred->cr_prison, @@ -2249,7 +2259,7 @@ tmpinp = NULL; head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->ipi_hashmask)]; - LIST_FOREACH(inp, head, inp_hash) { + CK_LIST_FOREACH(inp, head, inp_hash) { #ifdef INET6 /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV4) == 0) @@ -2306,7 +2316,7 @@ head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->ipi_hashmask)]; - LIST_FOREACH(inp, head, inp_hash) { + CK_LIST_FOREACH(inp, head, inp_hash) { #ifdef INET6 /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV4) == 0) @@ -2382,7 +2392,14 @@ locked = INP_TRY_RLOCK(inp); else panic("%s: locking bug", __func__); - if (!locked) + if (__predict_false(locked && (inp->inp_flags2 & INP_FREED))) { + if (lookupflags & INPLOOKUP_WLOCKPCB) + INP_WUNLOCK(inp); + else + INP_RUNLOCK(inp); + INP_HASH_RUNLOCK(pcbinfo); + return (NULL); + } else if (!locked) in_pcbref(inp); INP_HASH_RUNLOCK(pcbinfo); if (!locked) { @@ -2539,7 +2556,7 @@ /* * Go through port list and look for a head for this lport. */ - LIST_FOREACH(phd, pcbporthash, phd_hash) { + CK_LIST_FOREACH(phd, pcbporthash, phd_hash) { if (phd->phd_port == inp->inp_lport) break; } @@ -2547,17 +2564,17 @@ * If none exists, malloc one and tack it on. */ if (phd == NULL) { - phd = malloc(sizeof(struct inpcbport), M_PCB, M_NOWAIT); + phd = malloc(sizeof(struct inpcbport), M_PCB, M_NOWAIT|M_ZERO); if (phd == NULL) { return (ENOBUFS); /* XXX */ } phd->phd_port = inp->inp_lport; - LIST_INIT(&phd->phd_pcblist); - LIST_INSERT_HEAD(pcbporthash, phd, phd_hash); + CK_LIST_INIT(&phd->phd_pcblist); + CK_LIST_INSERT_HEAD(pcbporthash, phd, phd_hash); } inp->inp_phd = phd; - LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); - LIST_INSERT_HEAD(pcbhash, inp, inp_hash); + CK_LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); + CK_LIST_INSERT_HEAD(pcbhash, inp, inp_hash); inp->inp_flags |= INP_INHASHLIST; #ifdef PCBGROUP if (do_pcbgroup_update) @@ -2620,8 +2637,8 @@ head = &pcbinfo->ipi_hashbase[INP_PCBHASH(hashkey_faddr, inp->inp_lport, inp->inp_fport, pcbinfo->ipi_hashmask)]; - LIST_REMOVE(inp, inp_hash); - LIST_INSERT_HEAD(head, inp, inp_hash); + CK_LIST_REMOVE(inp, inp_hash); + CK_LIST_INSERT_HEAD(head, inp, inp_hash); #ifdef PCBGROUP if (m != NULL) @@ -2638,6 +2655,7 @@ in_pcbrehash_mbuf(inp, NULL); } + /* * Remove PCB from various lists. */ @@ -2662,20 +2680,18 @@ struct inpcbport *phd = inp->inp_phd; INP_HASH_WLOCK(pcbinfo); - /* XXX: Only do if SO_REUSEPORT_LB set? */ in_pcbremlbgrouphash(inp); - - LIST_REMOVE(inp, inp_hash); - LIST_REMOVE(inp, inp_portlist); - if (LIST_FIRST(&phd->phd_pcblist) == NULL) { - LIST_REMOVE(phd, phd_hash); - free(phd, M_PCB); + CK_LIST_REMOVE(inp, inp_hash); + CK_LIST_REMOVE(inp, inp_portlist); + if (CK_LIST_FIRST(&phd->phd_pcblist) == NULL) { + CK_LIST_REMOVE(phd, phd_hash); + epoch_call(net_epoch_preempt, &phd->phd_epoch_ctx, inpcbport_free); } INP_HASH_WUNLOCK(pcbinfo); inp->inp_flags &= ~INP_INHASHLIST; } - LIST_REMOVE(inp, inp_list); + CK_LIST_REMOVE(inp, inp_list); pcbinfo->ipi_count--; #ifdef PCBGROUP in_pcbgroup_remove(inp); @@ -2819,7 +2835,7 @@ struct inpcb *inp; INP_INFO_WLOCK(&V_tcbinfo); - LIST_FOREACH(inp, V_tcbinfo.ipi_listhead, inp_list) { + CK_LIST_FOREACH(inp, V_tcbinfo.ipi_listhead, inp_list) { INP_WLOCK(inp); func(inp, arg); INP_WUNLOCK(inp); Index: sys/netinet/ip_divert.c =================================================================== --- sys/netinet/ip_divert.c +++ sys/netinet/ip_divert.c @@ -273,7 +273,7 @@ sa = NULL; nport = htons((u_int16_t)(((struct ipfw_rule_ref *)(mtag+1))->info)); INP_INFO_RLOCK(&V_divcbinfo); - LIST_FOREACH(inp, &V_divcb, inp_list) { + CK_LIST_FOREACH(inp, &V_divcb, inp_list) { /* XXX why does only one socket match? */ if (inp->inp_lport == nport) { INP_RLOCK(inp); @@ -676,8 +676,8 @@ inp_list = il->il_inp_list; INP_INFO_RLOCK(&V_divcbinfo); - for (inp = LIST_FIRST(V_divcbinfo.ipi_listhead), i = 0; inp && i < n; - inp = LIST_NEXT(inp, inp_list)) { + for (inp = CK_LIST_FIRST(V_divcbinfo.ipi_listhead), i = 0; inp && i < n; + inp = CK_LIST_NEXT(inp, inp_list)) { INP_WLOCK(inp); if (inp->inp_gencnt <= gencnt && cr_canseeinpcb(req->td->td_ucred, inp) == 0) { Index: sys/netinet/raw_ip.c =================================================================== --- sys/netinet/raw_ip.c +++ sys/netinet/raw_ip.c @@ -170,7 +170,7 @@ } else hash = 0; pcbhash = &pcbinfo->ipi_hashbase[hash]; - LIST_INSERT_HEAD(pcbhash, inp, inp_hash); + CK_LIST_INSERT_HEAD(pcbhash, inp, inp_hash); } static void @@ -180,7 +180,7 @@ INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo); INP_WLOCK_ASSERT(inp); - LIST_REMOVE(inp, inp_hash); + CK_LIST_REMOVE(inp, inp_hash); } #endif /* INET */ @@ -300,7 +300,7 @@ hash = INP_PCBHASH_RAW(proto, ip->ip_src.s_addr, ip->ip_dst.s_addr, V_ripcbinfo.ipi_hashmask); INP_INFO_RLOCK(&V_ripcbinfo); - LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[hash], inp_hash) { + CK_LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[hash], inp_hash) { if (inp->inp_ip_p != proto) continue; #ifdef INET6 @@ -332,7 +332,7 @@ INP_RLOCK(inp); last = inp; } - LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[0], inp_hash) { + CK_LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[0], inp_hash) { if (inp->inp_ip_p && inp->inp_ip_p != proto) continue; #ifdef INET6 @@ -1060,8 +1060,8 @@ inp_list = il->il_inp_list; INP_INFO_RLOCK(&V_ripcbinfo); - for (inp = LIST_FIRST(V_ripcbinfo.ipi_listhead), i = 0; inp && i < n; - inp = LIST_NEXT(inp, inp_list)) { + for (inp = CK_LIST_FIRST(V_ripcbinfo.ipi_listhead), i = 0; inp && i < n; + inp = CK_LIST_NEXT(inp, inp_list)) { INP_WLOCK(inp); if (inp->inp_gencnt <= gencnt && cr_canseeinpcb(req->td->td_ucred, inp) == 0) { Index: sys/netinet/tcp_hpts.c =================================================================== --- sys/netinet/tcp_hpts.c +++ sys/netinet/tcp_hpts.c @@ -514,40 +514,6 @@ static int tcp_hptsi_lock_inpinfo(struct inpcb *inp, struct tcpcb **tpp) { - struct tcp_function_block *tfb; - struct tcpcb *tp; - void *ptr; - - /* Try the easy way. */ - if (INP_INFO_TRY_RLOCK(&V_tcbinfo)) - return (0); - - /* - * OK, let's try the hard way. We'll save the function pointer block - * to make sure that doesn't change while we aren't holding the - * lock. - */ - tp = *tpp; - tfb = tp->t_fb; - ptr = tp->t_fb_ptr; - INP_WUNLOCK(inp); - INP_INFO_RLOCK(&V_tcbinfo); - INP_WLOCK(inp); - /* If the session went away, return an error. */ - if ((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) || - (inp->inp_flags2 & INP_FREED)) { - *tpp = NULL; - return (1); - } - /* - * If the function block or stack-specific data block changed, - * report an error. - */ - tp = intotcpcb(inp); - if ((tp->t_fb != tfb) && (tp->t_fb_ptr != ptr)) { - *tpp = NULL; - return (1); - } return (0); } Index: sys/netinet/tcp_input.c =================================================================== --- sys/netinet/tcp_input.c +++ sys/netinet/tcp_input.c @@ -960,25 +960,10 @@ * * XXXRW: It may be time to rethink timewait locking. */ -relocked: if (inp->inp_flags & INP_TIMEWAIT) { if (ti_locked == TI_UNLOCKED) { - if (INP_INFO_TRY_RLOCK(&V_tcbinfo) == 0) { - in_pcbref(inp); - INP_WUNLOCK(inp); - INP_INFO_RLOCK(&V_tcbinfo); - ti_locked = TI_RLOCKED; - INP_WLOCK(inp); - if (in_pcbrele_wlocked(inp)) { - inp = NULL; - goto findpcb; - } else if (inp->inp_flags & INP_DROPPED) { - INP_WUNLOCK(inp); - inp = NULL; - goto findpcb; - } - } else - ti_locked = TI_RLOCKED; + INP_INFO_RLOCK(); + ti_locked = TI_RLOCKED; } INP_INFO_RLOCK_ASSERT(&V_tcbinfo); @@ -1026,23 +1011,8 @@ (tp->t_state == TCPS_LISTEN && (thflags & TH_SYN) && !IS_FASTOPEN(tp->t_flags)))) { if (ti_locked == TI_UNLOCKED) { - if (INP_INFO_TRY_RLOCK(&V_tcbinfo) == 0) { - in_pcbref(inp); - INP_WUNLOCK(inp); - INP_INFO_RLOCK(&V_tcbinfo); - ti_locked = TI_RLOCKED; - INP_WLOCK(inp); - if (in_pcbrele_wlocked(inp)) { - inp = NULL; - goto findpcb; - } else if (inp->inp_flags & INP_DROPPED) { - INP_WUNLOCK(inp); - inp = NULL; - goto findpcb; - } - goto relocked; - } else - ti_locked = TI_RLOCKED; + INP_INFO_RLOCK(); + ti_locked = TI_RLOCKED; } INP_INFO_RLOCK_ASSERT(&V_tcbinfo); } Index: sys/netinet/tcp_stacks/rack.c =================================================================== --- sys/netinet/tcp_stacks/rack.c +++ sys/netinet/tcp_stacks/rack.c @@ -6818,34 +6818,8 @@ * Initial input (ACK to SYN-ACK etc)lets go ahead and get * it processed */ - if (ti_locked != TI_RLOCKED && INP_INFO_TRY_RLOCK(&V_tcbinfo)) - ti_locked = TI_RLOCKED; - if (ti_locked != TI_RLOCKED) { - inp = tp->t_inpcb; - tfb = tp->t_fb; - in_pcbref(inp); - INP_WUNLOCK(inp); - INP_INFO_RLOCK(&V_tcbinfo); - ti_locked = TI_RLOCKED; - INP_WLOCK(inp); - if (in_pcbrele_wlocked(inp)) - inp = NULL; - if (inp == NULL || (inp->inp_flags2 & INP_FREED) || - (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED))) { - /* The TCPCB went away. Free the packet. */ - INP_INFO_RUNLOCK(&V_tcbinfo); - if (inp) - INP_WUNLOCK(inp); - m_freem(m); - return; - } - /* If the stack changed, call the correct stack. */ - if (tp->t_fb != tfb) { - tp->t_fb->tfb_tcp_do_segment(m, th, so, tp, - drop_hdrlen, tlen, iptos, ti_locked); - return; - } - } + INP_INFO_RLOCK(); + ti_locked = TI_RLOCKED; tcp_get_usecs(&tv); rack_hpts_do_segment(m, th, so, tp, drop_hdrlen, tlen, iptos, ti_locked, 0, &tv); Index: sys/netinet/tcp_subr.c =================================================================== --- sys/netinet/tcp_subr.c +++ sys/netinet/tcp_subr.c @@ -947,7 +947,7 @@ VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); INP_INFO_WLOCK(&V_tcbinfo); - LIST_FOREACH(inp, V_tcbinfo.ipi_listhead, inp_list) { + CK_LIST_FOREACH(inp, V_tcbinfo.ipi_listhead, inp_list) { INP_WLOCK(inp); if (inp->inp_flags & INP_TIMEWAIT) { INP_WUNLOCK(inp); @@ -1717,7 +1717,7 @@ * therefore don't enter the loop below until the connection * list has stabilised. */ - LIST_FOREACH(inp, &V_tcb, inp_list) { + CK_LIST_FOREACH(inp, &V_tcb, inp_list) { INP_WLOCK(inp); /* Important to skip tcptw structs. */ if (!(inp->inp_flags & INP_TIMEWAIT) && @@ -2018,7 +2018,7 @@ * useful. */ INP_INFO_WLOCK(&V_tcbinfo); - LIST_FOREACH(inpb, V_tcbinfo.ipi_listhead, inp_list) { + CK_LIST_FOREACH(inpb, V_tcbinfo.ipi_listhead, inp_list) { if (inpb->inp_flags & INP_TIMEWAIT) continue; INP_WLOCK(inpb); @@ -2155,8 +2155,8 @@ inp_list = il->il_inp_list; INP_INFO_WLOCK(&V_tcbinfo); - for (inp = LIST_FIRST(V_tcbinfo.ipi_listhead), i = 0; - inp != NULL && i < n; inp = LIST_NEXT(inp, inp_list)) { + for (inp = CK_LIST_FIRST(V_tcbinfo.ipi_listhead), i = 0; + inp != NULL && i < n; inp = CK_LIST_NEXT(inp, inp_list)) { INP_WLOCK(inp); if (inp->inp_gencnt <= gencnt) { /* Index: sys/netinet/tcp_timewait.c =================================================================== --- sys/netinet/tcp_timewait.c +++ sys/netinet/tcp_timewait.c @@ -707,54 +707,46 @@ in_pcbref(inp); TW_RUNLOCK(V_tw_lock); - if (INP_INFO_TRY_RLOCK(&V_tcbinfo)) { - - INP_WLOCK(inp); - tw = intotw(inp); - if (in_pcbrele_wlocked(inp)) { - if (__predict_true(tw == NULL)) { - INP_INFO_RUNLOCK(&V_tcbinfo); - continue; - } else { - /* This should not happen as in TIMEWAIT - * state the inp should not be destroyed - * before its tcptw. If INVARIANTS is - * defined panic. - */ + INP_INFO_RLOCK(&V_tcbinfo); + INP_WLOCK(inp); + tw = intotw(inp); + if (in_pcbrele_wlocked(inp)) { + if (__predict_true(tw == NULL)) { + INP_INFO_RUNLOCK(&V_tcbinfo); + continue; + } else { + /* This should not happen as in TIMEWAIT + * state the inp should not be destroyed + * before its tcptw. If INVARIANTS is + * defined panic. + */ #ifdef INVARIANTS - panic("%s: Panic before an infinite " - "loop: INP_TIMEWAIT && (INP_FREED " - "|| inp last reference) && tw != " - "NULL", __func__); + panic("%s: Panic before an infinite " + "loop: INP_TIMEWAIT && (INP_FREED " + "|| inp last reference) && tw != " + "NULL", __func__); #else - log(LOG_ERR, "%s: Avoid an infinite " - "loop: INP_TIMEWAIT && (INP_FREED " - "|| inp last reference) && tw != " - "NULL", __func__); + log(LOG_ERR, "%s: Avoid an infinite " + "loop: INP_TIMEWAIT && (INP_FREED " + "|| inp last reference) && tw != " + "NULL", __func__); #endif - INP_INFO_RUNLOCK(&V_tcbinfo); - break; - } - } - - if (tw == NULL) { - /* tcp_twclose() has already been called */ - INP_WUNLOCK(inp); INP_INFO_RUNLOCK(&V_tcbinfo); - continue; + break; } + } - tcp_twclose(tw, reuse); + if (tw == NULL) { + /* tcp_twclose() has already been called */ + INP_WUNLOCK(inp); INP_INFO_RUNLOCK(&V_tcbinfo); - if (reuse) - return tw; - } else { - /* INP_INFO lock is busy, continue later. */ - INP_WLOCK(inp); - if (!in_pcbrele_wlocked(inp)) - INP_WUNLOCK(inp); - break; + continue; } + + tcp_twclose(tw, reuse); + INP_INFO_RUNLOCK(&V_tcbinfo); + if (reuse) + return tw; } return NULL; Index: sys/netinet/udp_usrreq.c =================================================================== --- sys/netinet/udp_usrreq.c +++ sys/netinet/udp_usrreq.c @@ -532,7 +532,7 @@ INP_INFO_RLOCK(pcbinfo); pcblist = udp_get_pcblist(proto); last = NULL; - LIST_FOREACH(inp, pcblist, inp_list) { + CK_LIST_FOREACH(inp, pcblist, inp_list) { if (inp->inp_lport != uh->uh_dport) continue; #ifdef INET6 @@ -802,14 +802,15 @@ INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL); if (inp != NULL) { struct udpcb *up; + void *ctx; + udp_tun_icmp_t func; up = intoudpcb(inp); - if (up->u_icmp_func != NULL) { - INP_RUNLOCK(inp); - (*up->u_icmp_func)(cmd, sa, vip, up->u_tun_ctx); - } else { - INP_RUNLOCK(inp); - } + ctx = up->u_tun_ctx; + func = up->u_icmp_func; + INP_RUNLOCK(inp); + if (func != NULL) + (*func)(cmd, sa, vip, ctx); } } } else @@ -878,8 +879,8 @@ inp_list = il->il_inp_list; INP_INFO_RLOCK(&V_udbinfo); - for (inp = LIST_FIRST(V_udbinfo.ipi_listhead), i = 0; inp && i < n; - inp = LIST_NEXT(inp, inp_list)) { + for (inp = CK_LIST_FIRST(V_udbinfo.ipi_listhead), i = 0; inp && i < n; + inp = CK_LIST_NEXT(inp, inp_list)) { INP_WLOCK(inp); if (inp->inp_gencnt <= gencnt && cr_canseeinpcb(req->td->td_ucred, inp) == 0) { Index: sys/netinet6/icmp6.c =================================================================== --- sys/netinet6/icmp6.c +++ sys/netinet6/icmp6.c @@ -1923,7 +1923,7 @@ } INP_INFO_RLOCK(&V_ripcbinfo); - LIST_FOREACH(in6p, &V_ripcb, inp_list) { + CK_LIST_FOREACH(in6p, &V_ripcb, inp_list) { if ((in6p->inp_vflag & INP_IPV6) == 0) continue; if (in6p->inp_ip_p != IPPROTO_ICMPV6) Index: sys/netinet6/in6_pcb.c =================================================================== --- sys/netinet6/in6_pcb.c +++ sys/netinet6/in6_pcb.c @@ -649,7 +649,7 @@ } errno = inet6ctlerrmap[cmd]; INP_INFO_WLOCK(pcbinfo); - LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, inp_temp) { + CK_LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, inp_temp) { INP_WLOCK(inp); if ((inp->inp_vflag & INP_IPV6) == 0) { INP_WUNLOCK(inp); @@ -726,7 +726,7 @@ head = &pcbinfo->ipi_hashbase[INP_PCBHASH( INP6_PCBHASHKEY(&in6addr_any), lport, 0, pcbinfo->ipi_hashmask)]; - LIST_FOREACH(inp, head, inp_hash) { + CK_LIST_FOREACH(inp, head, inp_hash) { /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV6) == 0) continue; @@ -756,7 +756,7 @@ */ porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport, pcbinfo->ipi_porthashmask)]; - LIST_FOREACH(phd, porthash, phd_hash) { + CK_LIST_FOREACH(phd, porthash, phd_hash) { if (phd->phd_port == lport) break; } @@ -765,7 +765,7 @@ * Port is in use by one or more PCBs. Look for best * fit. */ - LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { + CK_LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { wildcard = 0; if (cred != NULL && !prison_equal_ip6(cred->cr_prison, @@ -807,7 +807,7 @@ int i, gap; INP_INFO_WLOCK(pcbinfo); - LIST_FOREACH(in6p, pcbinfo->ipi_listhead, inp_list) { + CK_LIST_FOREACH(in6p, pcbinfo->ipi_listhead, inp_list) { INP_WLOCK(in6p); im6o = in6p->in6p_moptions; if ((in6p->inp_vflag & INP_IPV6) && im6o != NULL) { @@ -1149,7 +1149,7 @@ tmpinp = NULL; head = &pcbinfo->ipi_hashbase[INP_PCBHASH( INP6_PCBHASHKEY(faddr), lport, fport, pcbinfo->ipi_hashmask)]; - LIST_FOREACH(inp, head, inp_hash) { + CK_LIST_FOREACH(inp, head, inp_hash) { /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV6) == 0) continue; @@ -1201,7 +1201,7 @@ head = &pcbinfo->ipi_hashbase[INP_PCBHASH( INP6_PCBHASHKEY(&in6addr_any), lport, 0, pcbinfo->ipi_hashmask)]; - LIST_FOREACH(inp, head, inp_hash) { + CK_LIST_FOREACH(inp, head, inp_hash) { /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV6) == 0) continue; Index: sys/netinet6/raw_ip6.c =================================================================== --- sys/netinet6/raw_ip6.c +++ sys/netinet6/raw_ip6.c @@ -173,7 +173,7 @@ ifp = m->m_pkthdr.rcvif; INP_INFO_RLOCK(&V_ripcbinfo); - LIST_FOREACH(in6p, &V_ripcb, inp_list) { + CK_LIST_FOREACH(in6p, &V_ripcb, inp_list) { /* XXX inp locking */ if ((in6p->inp_vflag & INP_IPV6) == 0) continue; Index: sys/netinet6/udp6_usrreq.c =================================================================== --- sys/netinet6/udp6_usrreq.c +++ sys/netinet6/udp6_usrreq.c @@ -318,7 +318,7 @@ */ pcblist = udp_get_pcblist(nxt); last = NULL; - LIST_FOREACH(inp, pcblist, inp_list) { + CK_LIST_FOREACH(inp, pcblist, inp_list) { if ((inp->inp_vflag & INP_IPV6) == 0) continue; if (inp->inp_lport != uh->uh_dport)