Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F81970303
D15686.id43681.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
14 KB
Referenced Files
None
Subscribers
None
D15686.id43681.diff
View Options
Index: sys/kern/subr_witness.c
===================================================================
--- sys/kern/subr_witness.c
+++ sys/kern/subr_witness.c
@@ -561,14 +561,14 @@
/*
* UDP/IP
*/
- { "udp", &lock_class_rw },
+ { "udp", &lock_class_mtx_sleep },
{ "udpinp", &lock_class_rw },
{ "so_snd", &lock_class_mtx_sleep },
{ NULL, NULL },
/*
* TCP/IP
*/
- { "tcp", &lock_class_rw },
+ { "tcp", &lock_class_mtx_sleep },
{ "tcpinp", &lock_class_rw },
{ "so_snd", &lock_class_mtx_sleep },
{ NULL, NULL },
Index: sys/netinet/in_pcb.h
===================================================================
--- sys/netinet/in_pcb.h
+++ sys/netinet/in_pcb.h
@@ -51,6 +51,8 @@
#include <sys/lock.h>
#include <sys/rwlock.h>
#include <net/vnet.h>
+#include <net/if.h>
+#include <net/if_var.h>
#include <vm/uma.h>
#endif
#include <sys/ck.h>
@@ -157,6 +159,7 @@
* Key:
* (b) - Protected by the hpts lock.
* (c) - Constant after initialization
+ * (e) - Protected by the net_epoch_prempt epoch
* (g) - Protected by the pcbgroup lock
* (i) - Protected by the inpcb lock
* (p) - Protected by the pcbinfo lock for the inpcb
@@ -231,7 +234,7 @@
struct m_snd_tag;
struct inpcb {
/* Cache line #1 (amd64) */
- CK_LIST_ENTRY(inpcb) inp_hash; /* (h/i) hash list */
+ CK_LIST_ENTRY(inpcb) inp_hash; /* [w](h/i) [r](e/i) hash list */
CK_LIST_ENTRY(inpcb) inp_pcbgrouphash; /* (g/i) hash list */
struct rwlock inp_lock;
/* Cache line #2 (amd64) */
@@ -324,8 +327,8 @@
struct route_in6 inp_route6;
};
CK_LIST_ENTRY(inpcb) inp_list; /* (p/l) list for all PCBs for proto */
- /* (p[w]) for list iteration */
- /* (p[r]/l) for addition/removal */
+ /* (e[r]) for list iteration */
+ /* (p[w]/l) for addition/removal */
struct epoch_context inp_epoch_ctx;
};
#endif /* _KERNEL */
@@ -436,22 +439,23 @@
* Locking key:
*
* (c) Constant or nearly constant after initialisation
+ * (e) - Protected by the net_epoch_prempt epoch
* (g) Locked by ipi_lock
* (l) Locked by ipi_list_lock
- * (h) Read using either ipi_hash_lock or inpcb lock; write requires both
+ * (h) Read using either net_epoch_preempt or inpcb lock; write requires both ipi_hash_lock and inpcb lock
* (p) Protected by one or more pcbgroup locks
* (x) Synchronisation properties poorly defined
*/
struct inpcbinfo {
/*
- * Global lock protecting full inpcb list traversal
+ * Global lock protecting inpcb modification
*/
- struct rwlock ipi_lock;
+ struct mtx ipi_lock;
/*
* Global list of inpcbs on the protocol.
*/
- struct inpcbhead *ipi_listhead; /* (g/l) */
+ struct inpcbhead *ipi_listhead; /* [r](e) [w](g/l) */
u_int ipi_count; /* (l) */
/*
@@ -482,9 +486,9 @@
u_int ipi_hashfields; /* (c) */
/*
- * Global lock protecting non-pcbgroup hash lookup tables.
+ * Global lock protecting modification non-pcbgroup hash lookup tables.
*/
- struct rwlock ipi_hash_lock;
+ struct mtx ipi_hash_lock;
/*
* Global hash of inpcbs, hashed by local and foreign addresses and
@@ -626,20 +630,18 @@
#endif /* _KERNEL */
#define INP_INFO_LOCK_INIT(ipi, d) \
- rw_init_flags(&(ipi)->ipi_lock, (d), RW_RECURSE)
-#define INP_INFO_LOCK_DESTROY(ipi) rw_destroy(&(ipi)->ipi_lock)
-#define INP_INFO_RLOCK(ipi) rw_rlock(&(ipi)->ipi_lock)
-#define INP_INFO_WLOCK(ipi) rw_wlock(&(ipi)->ipi_lock)
-#define INP_INFO_TRY_RLOCK(ipi) rw_try_rlock(&(ipi)->ipi_lock)
-#define INP_INFO_TRY_WLOCK(ipi) rw_try_wlock(&(ipi)->ipi_lock)
-#define INP_INFO_TRY_UPGRADE(ipi) rw_try_upgrade(&(ipi)->ipi_lock)
-#define INP_INFO_WLOCKED(ipi) rw_wowned(&(ipi)->ipi_lock)
-#define INP_INFO_RUNLOCK(ipi) rw_runlock(&(ipi)->ipi_lock)
-#define INP_INFO_WUNLOCK(ipi) rw_wunlock(&(ipi)->ipi_lock)
-#define INP_INFO_LOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_LOCKED)
-#define INP_INFO_RLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_RLOCKED)
-#define INP_INFO_WLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_WLOCKED)
-#define INP_INFO_UNLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_UNLOCKED)
+ mtx_init(&(ipi)->ipi_lock, (d), NULL, MTX_DEF| MTX_RECURSE)
+#define INP_INFO_LOCK_DESTROY(ipi) mtx_destroy(&(ipi)->ipi_lock)
+#define INP_INFO_RLOCK(ipi) NET_EPOCH_ENTER()
+#define INP_INFO_WLOCK(ipi) mtx_lock(&(ipi)->ipi_lock)
+#define INP_INFO_TRY_WLOCK(ipi) mtx_trylock(&(ipi)->ipi_lock)
+#define INP_INFO_WLOCKED(ipi) mtx_owned(&(ipi)->ipi_lock)
+#define INP_INFO_RUNLOCK(ipi) NET_EPOCH_EXIT()
+#define INP_INFO_WUNLOCK(ipi) mtx_unlock(&(ipi)->ipi_lock)
+#define INP_INFO_LOCK_ASSERT(ipi) MPASS(in_epoch() || mtx_owned(&(ipi)->ipi_lock))
+#define INP_INFO_RLOCK_ASSERT(ipi) MPASS(in_epoch())
+#define INP_INFO_WLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_lock, MA_OWNED)
+#define INP_INFO_UNLOCK_ASSERT(ipi) MPASS(!in_epoch() && !mtx_owned(&(ipi)->ipi_lock))
#define INP_LIST_LOCK_INIT(ipi, d) \
rw_init_flags(&(ipi)->ipi_list_lock, (d), 0)
@@ -660,17 +662,14 @@
#define INP_LIST_UNLOCK_ASSERT(ipi) \
rw_assert(&(ipi)->ipi_list_lock, RA_UNLOCKED)
-#define INP_HASH_LOCK_INIT(ipi, d) \
- rw_init_flags(&(ipi)->ipi_hash_lock, (d), 0)
-#define INP_HASH_LOCK_DESTROY(ipi) rw_destroy(&(ipi)->ipi_hash_lock)
-#define INP_HASH_RLOCK(ipi) rw_rlock(&(ipi)->ipi_hash_lock)
-#define INP_HASH_WLOCK(ipi) rw_wlock(&(ipi)->ipi_hash_lock)
-#define INP_HASH_RUNLOCK(ipi) rw_runlock(&(ipi)->ipi_hash_lock)
-#define INP_HASH_WUNLOCK(ipi) rw_wunlock(&(ipi)->ipi_hash_lock)
-#define INP_HASH_LOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_hash_lock, \
- RA_LOCKED)
-#define INP_HASH_WLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_hash_lock, \
- RA_WLOCKED)
+#define INP_HASH_LOCK_INIT(ipi, d) mtx_init(&(ipi)->ipi_hash_lock, (d), NULL, MTX_DEF)
+#define INP_HASH_LOCK_DESTROY(ipi) mtx_destroy(&(ipi)->ipi_hash_lock)
+#define INP_HASH_RLOCK(ipi) NET_EPOCH_ENTER()
+#define INP_HASH_WLOCK(ipi) mtx_lock(&(ipi)->ipi_hash_lock)
+#define INP_HASH_RUNLOCK(ipi) NET_EPOCH_EXIT()
+#define INP_HASH_WUNLOCK(ipi) mtx_unlock(&(ipi)->ipi_hash_lock)
+#define INP_HASH_LOCK_ASSERT(ipi) MPASS(in_epoch() || mtx_owned(&(ipi)->ipi_hash_lock))
+#define INP_HASH_WLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_hash_lock, MA_OWNED);
#define INP_GROUP_LOCK_INIT(ipg, d) mtx_init(&(ipg)->ipg_lock, (d), NULL, \
MTX_DEF | MTX_DUPOK)
Index: sys/netinet/in_pcb.c
===================================================================
--- sys/netinet/in_pcb.c
+++ sys/netinet/in_pcb.c
@@ -2209,7 +2209,14 @@
locked = INP_TRY_RLOCK(inp);
else
panic("%s: locking bug", __func__);
- if (!locked)
+ if (__predict_false(locked && (inp->inp_flags2 & INP_FREED))) {
+ if (lookupflags & INPLOOKUP_WLOCKPCB)
+ INP_WUNLOCK(inp);
+ else
+ INP_RUNLOCK(inp);
+ INP_HASH_RUNLOCK(pcbinfo);
+ return (NULL);
+ } else if (!locked)
in_pcbref(inp);
INP_GROUP_UNLOCK(pcbgroup);
if (!locked) {
Index: sys/netinet/tcp_hpts.c
===================================================================
--- sys/netinet/tcp_hpts.c
+++ sys/netinet/tcp_hpts.c
@@ -185,9 +185,6 @@
static struct tcp_hptsi tcp_pace;
-static int
-tcp_hptsi_lock_inpinfo(struct inpcb *inp,
- struct tcpcb **tp);
static void tcp_wakehpts(struct tcp_hpts_entry *p);
static void tcp_wakeinput(struct tcp_hpts_entry *p);
static void tcp_input_data(struct tcp_hpts_entry *hpts, struct timeval *tv);
@@ -499,59 +496,6 @@
0, 0, sysctl_tcp_hpts_log, "A", "tcp hptsi log");
-/*
- * Try to get the INP_INFO lock.
- *
- * This function always succeeds in getting the lock. It will clear
- * *tpp and return (1) if something critical changed while the inpcb
- * was unlocked. Otherwise, it will leave *tpp unchanged and return (0).
- *
- * This function relies on the fact that the hpts always holds a
- * reference on the inpcb while the segment is on the hptsi wheel and
- * in the input queue.
- *
- */
-static int
-tcp_hptsi_lock_inpinfo(struct inpcb *inp, struct tcpcb **tpp)
-{
- struct tcp_function_block *tfb;
- struct tcpcb *tp;
- void *ptr;
-
- /* Try the easy way. */
- if (INP_INFO_TRY_RLOCK(&V_tcbinfo))
- return (0);
-
- /*
- * OK, let's try the hard way. We'll save the function pointer block
- * to make sure that doesn't change while we aren't holding the
- * lock.
- */
- tp = *tpp;
- tfb = tp->t_fb;
- ptr = tp->t_fb_ptr;
- INP_WUNLOCK(inp);
- INP_INFO_RLOCK(&V_tcbinfo);
- INP_WLOCK(inp);
- /* If the session went away, return an error. */
- if ((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) ||
- (inp->inp_flags2 & INP_FREED)) {
- *tpp = NULL;
- return (1);
- }
- /*
- * If the function block or stack-specific data block changed,
- * report an error.
- */
- tp = intotcpcb(inp);
- if ((tp->t_fb != tfb) && (tp->t_fb_ptr != ptr)) {
- *tpp = NULL;
- return (1);
- }
- return (0);
-}
-
-
static void
tcp_wakehpts(struct tcp_hpts_entry *hpts)
{
@@ -1289,10 +1233,7 @@
(m->m_pkthdr.pace_lock == TI_RLOCKED ||
tp->t_state != TCPS_ESTABLISHED)) {
ti_locked = TI_RLOCKED;
- if (tcp_hptsi_lock_inpinfo(inp, &tp)) {
- CURVNET_RESTORE();
- goto out;
- }
+ INP_INFO_RLOCK(&V_tcbinfo);
m = tp->t_in_pkt;
}
if (in_newts_every_tcb) {
@@ -1359,7 +1300,6 @@
*/
if ((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) ||
(inp->inp_flags2 & INP_FREED)) {
- out_free:
while (m) {
m_freem(m);
m = n;
@@ -1376,8 +1316,7 @@
if (ti_locked == TI_UNLOCKED &&
(tp->t_state != TCPS_ESTABLISHED)) {
ti_locked = TI_RLOCKED;
- if (tcp_hptsi_lock_inpinfo(inp, &tp))
- goto out_free;
+ INP_INFO_RLOCK(&V_tcbinfo);
}
} /** end while(m) */
} /** end if ((m != NULL) && (m == tp->t_in_pkt)) */
Index: sys/netinet/tcp_input.c
===================================================================
--- sys/netinet/tcp_input.c
+++ sys/netinet/tcp_input.c
@@ -960,25 +960,10 @@
*
* XXXRW: It may be time to rethink timewait locking.
*/
-relocked:
if (inp->inp_flags & INP_TIMEWAIT) {
if (ti_locked == TI_UNLOCKED) {
- if (INP_INFO_TRY_RLOCK(&V_tcbinfo) == 0) {
- in_pcbref(inp);
- INP_WUNLOCK(inp);
- INP_INFO_RLOCK(&V_tcbinfo);
- ti_locked = TI_RLOCKED;
- INP_WLOCK(inp);
- if (in_pcbrele_wlocked(inp)) {
- inp = NULL;
- goto findpcb;
- } else if (inp->inp_flags & INP_DROPPED) {
- INP_WUNLOCK(inp);
- inp = NULL;
- goto findpcb;
- }
- } else
- ti_locked = TI_RLOCKED;
+ INP_INFO_RLOCK();
+ ti_locked = TI_RLOCKED;
}
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
@@ -1026,23 +1011,8 @@
(tp->t_state == TCPS_LISTEN && (thflags & TH_SYN) &&
!IS_FASTOPEN(tp->t_flags)))) {
if (ti_locked == TI_UNLOCKED) {
- if (INP_INFO_TRY_RLOCK(&V_tcbinfo) == 0) {
- in_pcbref(inp);
- INP_WUNLOCK(inp);
- INP_INFO_RLOCK(&V_tcbinfo);
- ti_locked = TI_RLOCKED;
- INP_WLOCK(inp);
- if (in_pcbrele_wlocked(inp)) {
- inp = NULL;
- goto findpcb;
- } else if (inp->inp_flags & INP_DROPPED) {
- INP_WUNLOCK(inp);
- inp = NULL;
- goto findpcb;
- }
- goto relocked;
- } else
- ti_locked = TI_RLOCKED;
+ INP_INFO_RLOCK();
+ ti_locked = TI_RLOCKED;
}
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
}
Index: sys/netinet/tcp_stacks/rack.c
===================================================================
--- sys/netinet/tcp_stacks/rack.c
+++ sys/netinet/tcp_stacks/rack.c
@@ -6814,34 +6814,8 @@
* Initial input (ACK to SYN-ACK etc)lets go ahead and get
* it processed
*/
- if (ti_locked != TI_RLOCKED && INP_INFO_TRY_RLOCK(&V_tcbinfo))
- ti_locked = TI_RLOCKED;
- if (ti_locked != TI_RLOCKED) {
- inp = tp->t_inpcb;
- tfb = tp->t_fb;
- in_pcbref(inp);
- INP_WUNLOCK(inp);
- INP_INFO_RLOCK(&V_tcbinfo);
- ti_locked = TI_RLOCKED;
- INP_WLOCK(inp);
- if (in_pcbrele_wlocked(inp))
- inp = NULL;
- if (inp == NULL || (inp->inp_flags2 & INP_FREED) ||
- (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED))) {
- /* The TCPCB went away. Free the packet. */
- INP_INFO_RUNLOCK(&V_tcbinfo);
- if (inp)
- INP_WUNLOCK(inp);
- m_freem(m);
- return;
- }
- /* If the stack changed, call the correct stack. */
- if (tp->t_fb != tfb) {
- tp->t_fb->tfb_tcp_do_segment(m, th, so, tp,
- drop_hdrlen, tlen, iptos, ti_locked);
- return;
- }
- }
+ INP_INFO_RLOCK();
+ ti_locked = TI_RLOCKED;
tcp_get_usecs(&tv);
rack_hpts_do_segment(m, th, so, tp, drop_hdrlen,
tlen, iptos, ti_locked, 0, &tv);
Index: sys/netinet/tcp_timewait.c
===================================================================
--- sys/netinet/tcp_timewait.c
+++ sys/netinet/tcp_timewait.c
@@ -707,54 +707,46 @@
in_pcbref(inp);
TW_RUNLOCK(V_tw_lock);
- if (INP_INFO_TRY_RLOCK(&V_tcbinfo)) {
-
- INP_WLOCK(inp);
- tw = intotw(inp);
- if (in_pcbrele_wlocked(inp)) {
- if (__predict_true(tw == NULL)) {
- INP_INFO_RUNLOCK(&V_tcbinfo);
- continue;
- } else {
- /* This should not happen as in TIMEWAIT
- * state the inp should not be destroyed
- * before its tcptw. If INVARIANTS is
- * defined panic.
- */
+ INP_INFO_RLOCK(&V_tcbinfo);
+ INP_WLOCK(inp);
+ tw = intotw(inp);
+ if (in_pcbrele_wlocked(inp)) {
+ if (__predict_true(tw == NULL)) {
+ INP_INFO_RUNLOCK(&V_tcbinfo);
+ continue;
+ } else {
+ /* This should not happen as in TIMEWAIT
+ * state the inp should not be destroyed
+ * before its tcptw. If INVARIANTS is
+ * defined panic.
+ */
#ifdef INVARIANTS
- panic("%s: Panic before an infinite "
- "loop: INP_TIMEWAIT && (INP_FREED "
- "|| inp last reference) && tw != "
- "NULL", __func__);
+ panic("%s: Panic before an infinite "
+ "loop: INP_TIMEWAIT && (INP_FREED "
+ "|| inp last reference) && tw != "
+ "NULL", __func__);
#else
- log(LOG_ERR, "%s: Avoid an infinite "
- "loop: INP_TIMEWAIT && (INP_FREED "
- "|| inp last reference) && tw != "
- "NULL", __func__);
+ log(LOG_ERR, "%s: Avoid an infinite "
+ "loop: INP_TIMEWAIT && (INP_FREED "
+ "|| inp last reference) && tw != "
+ "NULL", __func__);
#endif
- INP_INFO_RUNLOCK(&V_tcbinfo);
- break;
- }
- }
-
- if (tw == NULL) {
- /* tcp_twclose() has already been called */
- INP_WUNLOCK(inp);
INP_INFO_RUNLOCK(&V_tcbinfo);
- continue;
+ break;
}
+ }
- tcp_twclose(tw, reuse);
+ if (tw == NULL) {
+ /* tcp_twclose() has already been called */
+ INP_WUNLOCK(inp);
INP_INFO_RUNLOCK(&V_tcbinfo);
- if (reuse)
- return tw;
- } else {
- /* INP_INFO lock is busy, continue later. */
- INP_WLOCK(inp);
- if (!in_pcbrele_wlocked(inp))
- INP_WUNLOCK(inp);
- break;
+ continue;
}
+
+ tcp_twclose(tw, reuse);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
+ if (reuse)
+ return tw;
}
return NULL;
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Dec 15, 4:58 PM (9 h, 39 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
9091790
Default Alt Text
D15686.id43681.diff (14 KB)
Attached To
Mode
D15686: convert inpcbhash rlock to epoch
Attached
Detach File
Event Timeline
Log In to Comment