Page MenuHomeFreeBSD

D15686.id43681.diff
No OneTemporary

D15686.id43681.diff

Index: sys/kern/subr_witness.c
===================================================================
--- sys/kern/subr_witness.c
+++ sys/kern/subr_witness.c
@@ -561,14 +561,14 @@
/*
* UDP/IP
*/
- { "udp", &lock_class_rw },
+ { "udp", &lock_class_mtx_sleep },
{ "udpinp", &lock_class_rw },
{ "so_snd", &lock_class_mtx_sleep },
{ NULL, NULL },
/*
* TCP/IP
*/
- { "tcp", &lock_class_rw },
+ { "tcp", &lock_class_mtx_sleep },
{ "tcpinp", &lock_class_rw },
{ "so_snd", &lock_class_mtx_sleep },
{ NULL, NULL },
Index: sys/netinet/in_pcb.h
===================================================================
--- sys/netinet/in_pcb.h
+++ sys/netinet/in_pcb.h
@@ -51,6 +51,8 @@
#include <sys/lock.h>
#include <sys/rwlock.h>
#include <net/vnet.h>
+#include <net/if.h>
+#include <net/if_var.h>
#include <vm/uma.h>
#endif
#include <sys/ck.h>
@@ -157,6 +159,7 @@
* Key:
* (b) - Protected by the hpts lock.
* (c) - Constant after initialization
+ * (e) - Protected by the net_epoch_prempt epoch
* (g) - Protected by the pcbgroup lock
* (i) - Protected by the inpcb lock
* (p) - Protected by the pcbinfo lock for the inpcb
@@ -231,7 +234,7 @@
struct m_snd_tag;
struct inpcb {
/* Cache line #1 (amd64) */
- CK_LIST_ENTRY(inpcb) inp_hash; /* (h/i) hash list */
+ CK_LIST_ENTRY(inpcb) inp_hash; /* [w](h/i) [r](e/i) hash list */
CK_LIST_ENTRY(inpcb) inp_pcbgrouphash; /* (g/i) hash list */
struct rwlock inp_lock;
/* Cache line #2 (amd64) */
@@ -324,8 +327,8 @@
struct route_in6 inp_route6;
};
CK_LIST_ENTRY(inpcb) inp_list; /* (p/l) list for all PCBs for proto */
- /* (p[w]) for list iteration */
- /* (p[r]/l) for addition/removal */
+ /* (e[r]) for list iteration */
+ /* (p[w]/l) for addition/removal */
struct epoch_context inp_epoch_ctx;
};
#endif /* _KERNEL */
@@ -436,22 +439,23 @@
* Locking key:
*
* (c) Constant or nearly constant after initialisation
+ * (e) - Protected by the net_epoch_prempt epoch
* (g) Locked by ipi_lock
* (l) Locked by ipi_list_lock
- * (h) Read using either ipi_hash_lock or inpcb lock; write requires both
+ * (h) Read using either net_epoch_preempt or inpcb lock; write requires both ipi_hash_lock and inpcb lock
* (p) Protected by one or more pcbgroup locks
* (x) Synchronisation properties poorly defined
*/
struct inpcbinfo {
/*
- * Global lock protecting full inpcb list traversal
+ * Global lock protecting inpcb modification
*/
- struct rwlock ipi_lock;
+ struct mtx ipi_lock;
/*
* Global list of inpcbs on the protocol.
*/
- struct inpcbhead *ipi_listhead; /* (g/l) */
+ struct inpcbhead *ipi_listhead; /* [r](e) [w](g/l) */
u_int ipi_count; /* (l) */
/*
@@ -482,9 +486,9 @@
u_int ipi_hashfields; /* (c) */
/*
- * Global lock protecting non-pcbgroup hash lookup tables.
+ * Global lock protecting modification non-pcbgroup hash lookup tables.
*/
- struct rwlock ipi_hash_lock;
+ struct mtx ipi_hash_lock;
/*
* Global hash of inpcbs, hashed by local and foreign addresses and
@@ -626,20 +630,18 @@
#endif /* _KERNEL */
#define INP_INFO_LOCK_INIT(ipi, d) \
- rw_init_flags(&(ipi)->ipi_lock, (d), RW_RECURSE)
-#define INP_INFO_LOCK_DESTROY(ipi) rw_destroy(&(ipi)->ipi_lock)
-#define INP_INFO_RLOCK(ipi) rw_rlock(&(ipi)->ipi_lock)
-#define INP_INFO_WLOCK(ipi) rw_wlock(&(ipi)->ipi_lock)
-#define INP_INFO_TRY_RLOCK(ipi) rw_try_rlock(&(ipi)->ipi_lock)
-#define INP_INFO_TRY_WLOCK(ipi) rw_try_wlock(&(ipi)->ipi_lock)
-#define INP_INFO_TRY_UPGRADE(ipi) rw_try_upgrade(&(ipi)->ipi_lock)
-#define INP_INFO_WLOCKED(ipi) rw_wowned(&(ipi)->ipi_lock)
-#define INP_INFO_RUNLOCK(ipi) rw_runlock(&(ipi)->ipi_lock)
-#define INP_INFO_WUNLOCK(ipi) rw_wunlock(&(ipi)->ipi_lock)
-#define INP_INFO_LOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_LOCKED)
-#define INP_INFO_RLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_RLOCKED)
-#define INP_INFO_WLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_WLOCKED)
-#define INP_INFO_UNLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_UNLOCKED)
+ mtx_init(&(ipi)->ipi_lock, (d), NULL, MTX_DEF| MTX_RECURSE)
+#define INP_INFO_LOCK_DESTROY(ipi) mtx_destroy(&(ipi)->ipi_lock)
+#define INP_INFO_RLOCK(ipi) NET_EPOCH_ENTER()
+#define INP_INFO_WLOCK(ipi) mtx_lock(&(ipi)->ipi_lock)
+#define INP_INFO_TRY_WLOCK(ipi) mtx_trylock(&(ipi)->ipi_lock)
+#define INP_INFO_WLOCKED(ipi) mtx_owned(&(ipi)->ipi_lock)
+#define INP_INFO_RUNLOCK(ipi) NET_EPOCH_EXIT()
+#define INP_INFO_WUNLOCK(ipi) mtx_unlock(&(ipi)->ipi_lock)
+#define INP_INFO_LOCK_ASSERT(ipi) MPASS(in_epoch() || mtx_owned(&(ipi)->ipi_lock))
+#define INP_INFO_RLOCK_ASSERT(ipi) MPASS(in_epoch())
+#define INP_INFO_WLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_lock, MA_OWNED)
+#define INP_INFO_UNLOCK_ASSERT(ipi) MPASS(!in_epoch() && !mtx_owned(&(ipi)->ipi_lock))
#define INP_LIST_LOCK_INIT(ipi, d) \
rw_init_flags(&(ipi)->ipi_list_lock, (d), 0)
@@ -660,17 +662,14 @@
#define INP_LIST_UNLOCK_ASSERT(ipi) \
rw_assert(&(ipi)->ipi_list_lock, RA_UNLOCKED)
-#define INP_HASH_LOCK_INIT(ipi, d) \
- rw_init_flags(&(ipi)->ipi_hash_lock, (d), 0)
-#define INP_HASH_LOCK_DESTROY(ipi) rw_destroy(&(ipi)->ipi_hash_lock)
-#define INP_HASH_RLOCK(ipi) rw_rlock(&(ipi)->ipi_hash_lock)
-#define INP_HASH_WLOCK(ipi) rw_wlock(&(ipi)->ipi_hash_lock)
-#define INP_HASH_RUNLOCK(ipi) rw_runlock(&(ipi)->ipi_hash_lock)
-#define INP_HASH_WUNLOCK(ipi) rw_wunlock(&(ipi)->ipi_hash_lock)
-#define INP_HASH_LOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_hash_lock, \
- RA_LOCKED)
-#define INP_HASH_WLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_hash_lock, \
- RA_WLOCKED)
+#define INP_HASH_LOCK_INIT(ipi, d) mtx_init(&(ipi)->ipi_hash_lock, (d), NULL, MTX_DEF)
+#define INP_HASH_LOCK_DESTROY(ipi) mtx_destroy(&(ipi)->ipi_hash_lock)
+#define INP_HASH_RLOCK(ipi) NET_EPOCH_ENTER()
+#define INP_HASH_WLOCK(ipi) mtx_lock(&(ipi)->ipi_hash_lock)
+#define INP_HASH_RUNLOCK(ipi) NET_EPOCH_EXIT()
+#define INP_HASH_WUNLOCK(ipi) mtx_unlock(&(ipi)->ipi_hash_lock)
+#define INP_HASH_LOCK_ASSERT(ipi) MPASS(in_epoch() || mtx_owned(&(ipi)->ipi_hash_lock))
+#define INP_HASH_WLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_hash_lock, MA_OWNED);
#define INP_GROUP_LOCK_INIT(ipg, d) mtx_init(&(ipg)->ipg_lock, (d), NULL, \
MTX_DEF | MTX_DUPOK)
Index: sys/netinet/in_pcb.c
===================================================================
--- sys/netinet/in_pcb.c
+++ sys/netinet/in_pcb.c
@@ -2209,7 +2209,14 @@
locked = INP_TRY_RLOCK(inp);
else
panic("%s: locking bug", __func__);
- if (!locked)
+ if (__predict_false(locked && (inp->inp_flags2 & INP_FREED))) {
+ if (lookupflags & INPLOOKUP_WLOCKPCB)
+ INP_WUNLOCK(inp);
+ else
+ INP_RUNLOCK(inp);
+ INP_HASH_RUNLOCK(pcbinfo);
+ return (NULL);
+ } else if (!locked)
in_pcbref(inp);
INP_GROUP_UNLOCK(pcbgroup);
if (!locked) {
Index: sys/netinet/tcp_hpts.c
===================================================================
--- sys/netinet/tcp_hpts.c
+++ sys/netinet/tcp_hpts.c
@@ -185,9 +185,6 @@
static struct tcp_hptsi tcp_pace;
-static int
-tcp_hptsi_lock_inpinfo(struct inpcb *inp,
- struct tcpcb **tp);
static void tcp_wakehpts(struct tcp_hpts_entry *p);
static void tcp_wakeinput(struct tcp_hpts_entry *p);
static void tcp_input_data(struct tcp_hpts_entry *hpts, struct timeval *tv);
@@ -499,59 +496,6 @@
0, 0, sysctl_tcp_hpts_log, "A", "tcp hptsi log");
-/*
- * Try to get the INP_INFO lock.
- *
- * This function always succeeds in getting the lock. It will clear
- * *tpp and return (1) if something critical changed while the inpcb
- * was unlocked. Otherwise, it will leave *tpp unchanged and return (0).
- *
- * This function relies on the fact that the hpts always holds a
- * reference on the inpcb while the segment is on the hptsi wheel and
- * in the input queue.
- *
- */
-static int
-tcp_hptsi_lock_inpinfo(struct inpcb *inp, struct tcpcb **tpp)
-{
- struct tcp_function_block *tfb;
- struct tcpcb *tp;
- void *ptr;
-
- /* Try the easy way. */
- if (INP_INFO_TRY_RLOCK(&V_tcbinfo))
- return (0);
-
- /*
- * OK, let's try the hard way. We'll save the function pointer block
- * to make sure that doesn't change while we aren't holding the
- * lock.
- */
- tp = *tpp;
- tfb = tp->t_fb;
- ptr = tp->t_fb_ptr;
- INP_WUNLOCK(inp);
- INP_INFO_RLOCK(&V_tcbinfo);
- INP_WLOCK(inp);
- /* If the session went away, return an error. */
- if ((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) ||
- (inp->inp_flags2 & INP_FREED)) {
- *tpp = NULL;
- return (1);
- }
- /*
- * If the function block or stack-specific data block changed,
- * report an error.
- */
- tp = intotcpcb(inp);
- if ((tp->t_fb != tfb) && (tp->t_fb_ptr != ptr)) {
- *tpp = NULL;
- return (1);
- }
- return (0);
-}
-
-
static void
tcp_wakehpts(struct tcp_hpts_entry *hpts)
{
@@ -1289,10 +1233,7 @@
(m->m_pkthdr.pace_lock == TI_RLOCKED ||
tp->t_state != TCPS_ESTABLISHED)) {
ti_locked = TI_RLOCKED;
- if (tcp_hptsi_lock_inpinfo(inp, &tp)) {
- CURVNET_RESTORE();
- goto out;
- }
+ INP_INFO_RLOCK(&V_tcbinfo);
m = tp->t_in_pkt;
}
if (in_newts_every_tcb) {
@@ -1359,7 +1300,6 @@
*/
if ((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) ||
(inp->inp_flags2 & INP_FREED)) {
- out_free:
while (m) {
m_freem(m);
m = n;
@@ -1376,8 +1316,7 @@
if (ti_locked == TI_UNLOCKED &&
(tp->t_state != TCPS_ESTABLISHED)) {
ti_locked = TI_RLOCKED;
- if (tcp_hptsi_lock_inpinfo(inp, &tp))
- goto out_free;
+ INP_INFO_RLOCK(&V_tcbinfo);
}
} /** end while(m) */
} /** end if ((m != NULL) && (m == tp->t_in_pkt)) */
Index: sys/netinet/tcp_input.c
===================================================================
--- sys/netinet/tcp_input.c
+++ sys/netinet/tcp_input.c
@@ -960,25 +960,10 @@
*
* XXXRW: It may be time to rethink timewait locking.
*/
-relocked:
if (inp->inp_flags & INP_TIMEWAIT) {
if (ti_locked == TI_UNLOCKED) {
- if (INP_INFO_TRY_RLOCK(&V_tcbinfo) == 0) {
- in_pcbref(inp);
- INP_WUNLOCK(inp);
- INP_INFO_RLOCK(&V_tcbinfo);
- ti_locked = TI_RLOCKED;
- INP_WLOCK(inp);
- if (in_pcbrele_wlocked(inp)) {
- inp = NULL;
- goto findpcb;
- } else if (inp->inp_flags & INP_DROPPED) {
- INP_WUNLOCK(inp);
- inp = NULL;
- goto findpcb;
- }
- } else
- ti_locked = TI_RLOCKED;
+ INP_INFO_RLOCK();
+ ti_locked = TI_RLOCKED;
}
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
@@ -1026,23 +1011,8 @@
(tp->t_state == TCPS_LISTEN && (thflags & TH_SYN) &&
!IS_FASTOPEN(tp->t_flags)))) {
if (ti_locked == TI_UNLOCKED) {
- if (INP_INFO_TRY_RLOCK(&V_tcbinfo) == 0) {
- in_pcbref(inp);
- INP_WUNLOCK(inp);
- INP_INFO_RLOCK(&V_tcbinfo);
- ti_locked = TI_RLOCKED;
- INP_WLOCK(inp);
- if (in_pcbrele_wlocked(inp)) {
- inp = NULL;
- goto findpcb;
- } else if (inp->inp_flags & INP_DROPPED) {
- INP_WUNLOCK(inp);
- inp = NULL;
- goto findpcb;
- }
- goto relocked;
- } else
- ti_locked = TI_RLOCKED;
+ INP_INFO_RLOCK();
+ ti_locked = TI_RLOCKED;
}
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
}
Index: sys/netinet/tcp_stacks/rack.c
===================================================================
--- sys/netinet/tcp_stacks/rack.c
+++ sys/netinet/tcp_stacks/rack.c
@@ -6814,34 +6814,8 @@
* Initial input (ACK to SYN-ACK etc)lets go ahead and get
* it processed
*/
- if (ti_locked != TI_RLOCKED && INP_INFO_TRY_RLOCK(&V_tcbinfo))
- ti_locked = TI_RLOCKED;
- if (ti_locked != TI_RLOCKED) {
- inp = tp->t_inpcb;
- tfb = tp->t_fb;
- in_pcbref(inp);
- INP_WUNLOCK(inp);
- INP_INFO_RLOCK(&V_tcbinfo);
- ti_locked = TI_RLOCKED;
- INP_WLOCK(inp);
- if (in_pcbrele_wlocked(inp))
- inp = NULL;
- if (inp == NULL || (inp->inp_flags2 & INP_FREED) ||
- (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED))) {
- /* The TCPCB went away. Free the packet. */
- INP_INFO_RUNLOCK(&V_tcbinfo);
- if (inp)
- INP_WUNLOCK(inp);
- m_freem(m);
- return;
- }
- /* If the stack changed, call the correct stack. */
- if (tp->t_fb != tfb) {
- tp->t_fb->tfb_tcp_do_segment(m, th, so, tp,
- drop_hdrlen, tlen, iptos, ti_locked);
- return;
- }
- }
+ INP_INFO_RLOCK();
+ ti_locked = TI_RLOCKED;
tcp_get_usecs(&tv);
rack_hpts_do_segment(m, th, so, tp, drop_hdrlen,
tlen, iptos, ti_locked, 0, &tv);
Index: sys/netinet/tcp_timewait.c
===================================================================
--- sys/netinet/tcp_timewait.c
+++ sys/netinet/tcp_timewait.c
@@ -707,54 +707,46 @@
in_pcbref(inp);
TW_RUNLOCK(V_tw_lock);
- if (INP_INFO_TRY_RLOCK(&V_tcbinfo)) {
-
- INP_WLOCK(inp);
- tw = intotw(inp);
- if (in_pcbrele_wlocked(inp)) {
- if (__predict_true(tw == NULL)) {
- INP_INFO_RUNLOCK(&V_tcbinfo);
- continue;
- } else {
- /* This should not happen as in TIMEWAIT
- * state the inp should not be destroyed
- * before its tcptw. If INVARIANTS is
- * defined panic.
- */
+ INP_INFO_RLOCK(&V_tcbinfo);
+ INP_WLOCK(inp);
+ tw = intotw(inp);
+ if (in_pcbrele_wlocked(inp)) {
+ if (__predict_true(tw == NULL)) {
+ INP_INFO_RUNLOCK(&V_tcbinfo);
+ continue;
+ } else {
+ /* This should not happen as in TIMEWAIT
+ * state the inp should not be destroyed
+ * before its tcptw. If INVARIANTS is
+ * defined panic.
+ */
#ifdef INVARIANTS
- panic("%s: Panic before an infinite "
- "loop: INP_TIMEWAIT && (INP_FREED "
- "|| inp last reference) && tw != "
- "NULL", __func__);
+ panic("%s: Panic before an infinite "
+ "loop: INP_TIMEWAIT && (INP_FREED "
+ "|| inp last reference) && tw != "
+ "NULL", __func__);
#else
- log(LOG_ERR, "%s: Avoid an infinite "
- "loop: INP_TIMEWAIT && (INP_FREED "
- "|| inp last reference) && tw != "
- "NULL", __func__);
+ log(LOG_ERR, "%s: Avoid an infinite "
+ "loop: INP_TIMEWAIT && (INP_FREED "
+ "|| inp last reference) && tw != "
+ "NULL", __func__);
#endif
- INP_INFO_RUNLOCK(&V_tcbinfo);
- break;
- }
- }
-
- if (tw == NULL) {
- /* tcp_twclose() has already been called */
- INP_WUNLOCK(inp);
INP_INFO_RUNLOCK(&V_tcbinfo);
- continue;
+ break;
}
+ }
- tcp_twclose(tw, reuse);
+ if (tw == NULL) {
+ /* tcp_twclose() has already been called */
+ INP_WUNLOCK(inp);
INP_INFO_RUNLOCK(&V_tcbinfo);
- if (reuse)
- return tw;
- } else {
- /* INP_INFO lock is busy, continue later. */
- INP_WLOCK(inp);
- if (!in_pcbrele_wlocked(inp))
- INP_WUNLOCK(inp);
- break;
+ continue;
}
+
+ tcp_twclose(tw, reuse);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
+ if (reuse)
+ return tw;
}
return NULL;

File Metadata

Mime Type
text/plain
Expires
Sun, Dec 15, 4:58 PM (9 h, 39 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
9091790
Default Alt Text
D15686.id43681.diff (14 KB)

Event Timeline