Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F81969015
D2079.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
14 KB
Referenced Files
None
Subscribers
None
D2079.diff
View Options
Index: sys/netinet/tcp_subr.c
===================================================================
--- sys/netinet/tcp_subr.c
+++ sys/netinet/tcp_subr.c
@@ -230,6 +230,7 @@
static struct inpcb *tcp_mtudisc_notify(struct inpcb *, int);
static char * tcp_log_addr(struct in_conninfo *inc, struct tcphdr *th,
void *ip4hdr, const void *ip6hdr);
+static void tcp_timer_discard(struct tcpcb *, uint32_t);
/*
* Target size of TCP PCB hash tables. Must be a power of two.
@@ -801,7 +802,13 @@
if (V_tcp_do_sack)
tp->t_flags |= TF_SACK_PERMIT;
TAILQ_INIT(&tp->snd_holes);
- tp->t_inpcb = inp; /* XXX */
+ /*
+ * The tcpcb will hold a reference on its inpcb until tcp_discardcb()
+ * is called.
+ */
+ in_pcbref(inp); /* Reference for tcpcb */
+ tp->t_inpcb = inp;
+
/*
* Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
* rtt estimate. Set rttvar so that srtt + 4 * rttvar gives
@@ -920,6 +927,7 @@
#ifdef INET6
int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
#endif /* INET6 */
+ int released;
INP_WLOCK_ASSERT(inp);
@@ -927,22 +935,15 @@
* Make sure that all of our timers are stopped before we delete the
* PCB.
*
- * XXXRW: Really, we would like to use callout_drain() here in order
- * to avoid races experienced in tcp_timer.c where a timer is already
- * executing at this point. However, we can't, both because we're
- * running in a context where we can't sleep, and also because we
- * hold locks required by the timers. What we instead need to do is
- * test to see if callout_drain() is required, and if so, defer some
- * portion of the remainder of tcp_discardcb() to an asynchronous
- * context that can callout_drain() and then continue. Some care
- * will be required to ensure that no further processing takes place
- * on the tcpcb, even though it hasn't been freed (a flag?).
+ * If stopping a timer fails, we schedule a discard function in same
+ * callout, and the last discard function called will take care of
+ * deleting the tcpcb.
*/
- callout_stop(&tp->t_timers->tt_rexmt);
- callout_stop(&tp->t_timers->tt_persist);
- callout_stop(&tp->t_timers->tt_keep);
- callout_stop(&tp->t_timers->tt_2msl);
- callout_stop(&tp->t_timers->tt_delack);
+ tcp_timer_stop(tp, TT_REXMT);
+ tcp_timer_stop(tp, TT_PERSIST);
+ tcp_timer_stop(tp, TT_KEEP);
+ tcp_timer_stop(tp, TT_2MSL);
+ tcp_timer_stop(tp, TT_DELACK);
/*
* If we got enough samples through the srtt filter,
@@ -1019,8 +1020,80 @@
CC_ALGO(tp) = NULL;
inp->inp_ppcb = NULL;
- tp->t_inpcb = NULL;
- uma_zfree(V_tcpcb_zone, tp);
+ if ((tp->t_timers->tt_flags & TT_MASK) == 0) {
+ /* We own the last reference on tcpcb, let's free it. */
+ tp->t_inpcb = NULL;
+ uma_zfree(V_tcpcb_zone, tp);
+ released = in_pcbrele_wlocked(inp);
+ KASSERT(!released, ("%s: inp %p should not have been released "
+ "here", __func__, inp));
+ }
+}
+
+void
+tcp_timer_2msl_discard(void *xtp)
+{
+
+ tcp_timer_discard((struct tcpcb *)xtp, TT_2MSL);
+}
+
+void
+tcp_timer_keep_discard(void *xtp)
+{
+
+ tcp_timer_discard((struct tcpcb *)xtp, TT_KEEP);
+}
+
+void
+tcp_timer_persist_discard(void *xtp)
+{
+
+ tcp_timer_discard((struct tcpcb *)xtp, TT_PERSIST);
+}
+
+void
+tcp_timer_rexmt_discard(void *xtp)
+{
+
+ tcp_timer_discard((struct tcpcb *)xtp, TT_REXMT);
+}
+
+void
+tcp_timer_delack_discard(void *xtp)
+{
+
+ tcp_timer_discard((struct tcpcb *)xtp, TT_DELACK);
+}
+
+void
+tcp_timer_discard(struct tcpcb *tp, uint32_t timer_type)
+{
+ struct inpcb *inp;
+
+ CURVNET_SET(tp->t_vnet);
+ INP_INFO_WLOCK(&V_tcbinfo);
+ inp = tp->t_inpcb;
+ KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL",
+ __func__, tp));
+ INP_WLOCK(inp);
+ KASSERT((tp->t_timers->tt_flags & TT_STOPPED) != 0,
+ ("%s: tcpcb has to be stopped here", __func__));
+ KASSERT((tp->t_timers->tt_flags & timer_type) != 0,
+ ("%s: discard callout should be running", __func__));
+ tp->t_timers->tt_flags &= ~timer_type;
+ if ((tp->t_timers->tt_flags & TT_MASK) == 0) {
+ /* We own the last reference on this tcpcb, let's free it. */
+ tp->t_inpcb = NULL;
+ uma_zfree(V_tcpcb_zone, tp);
+ if (in_pcbrele_wlocked(inp)) {
+ INP_INFO_WUNLOCK(&V_tcbinfo);
+ CURVNET_RESTORE();
+ return;
+ }
+ }
+ INP_WUNLOCK(inp);
+ INP_INFO_WUNLOCK(&V_tcbinfo);
+ CURVNET_RESTORE();
}
/*
Index: sys/netinet/tcp_timer.h
===================================================================
--- sys/netinet/tcp_timer.h
+++ sys/netinet/tcp_timer.h
@@ -146,12 +146,21 @@
struct callout tt_keep; /* keepalive */
struct callout tt_2msl; /* 2*msl TIME_WAIT timer */
struct callout tt_delack; /* delayed ACK timer */
+ uint32_t tt_flags; /* Timers flags */
+ uint32_t tt_spare; /* TDB */
};
-#define TT_DELACK 0x01
-#define TT_REXMT 0x02
-#define TT_PERSIST 0x04
-#define TT_KEEP 0x08
-#define TT_2MSL 0x10
+
+/*
+ * Flags for the tt_flags field.
+ */
+#define TT_DELACK 0x0001
+#define TT_REXMT 0x0002
+#define TT_PERSIST 0x0004
+#define TT_KEEP 0x0008
+#define TT_2MSL 0x0010
+#define TT_MASK (TT_DELACK|TT_REXMT|TT_PERSIST|TT_KEEP|TT_2MSL)
+
+#define TT_STOPPED 0x00010000
#define TP_KEEPINIT(tp) ((tp)->t_keepinit ? (tp)->t_keepinit : tcp_keepinit)
#define TP_KEEPIDLE(tp) ((tp)->t_keepidle ? (tp)->t_keepidle : tcp_keepidle)
@@ -183,6 +192,11 @@
void tcp_timer_persist(void *xtp);
void tcp_timer_rexmt(void *xtp);
void tcp_timer_delack(void *xtp);
+void tcp_timer_2msl_discard(void *xtp);
+void tcp_timer_keep_discard(void *xtp);
+void tcp_timer_persist_discard(void *xtp);
+void tcp_timer_rexmt_discard(void *xtp);
+void tcp_timer_delack_discard(void *xtp);
void tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer,
struct xtcp_timer *xtimer);
Index: sys/netinet/tcp_timer.c
===================================================================
--- sys/netinet/tcp_timer.c
+++ sys/netinet/tcp_timer.c
@@ -258,10 +258,6 @@
static int tcp_totbackoff = 2559; /* sum of tcp_backoff[] */
-static int tcp_timer_race;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_race, CTLFLAG_RD, &tcp_timer_race,
- 0, "Count of t_inpcb races on tcp_discardcb");
-
/*
* TCP timer processing.
*/
@@ -274,18 +270,7 @@
CURVNET_SET(tp->t_vnet);
inp = tp->t_inpcb;
- /*
- * XXXRW: While this assert is in fact correct, bugs in the tcpcb
- * tear-down mean we need it as a work-around for races between
- * timers and tcp_discardcb().
- *
- * KASSERT(inp != NULL, ("tcp_timer_delack: inp == NULL"));
- */
- if (inp == NULL) {
- tcp_timer_race++;
- CURVNET_RESTORE();
- return;
- }
+ KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
INP_WLOCK(inp);
if (callout_pending(&tp->t_timers->tt_delack) ||
!callout_active(&tp->t_timers->tt_delack)) {
@@ -299,6 +284,10 @@
CURVNET_RESTORE();
return;
}
+ KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
+ ("%s: tp %p tcpcb can't be stopped here", __func__, tp));
+ KASSERT((tp->t_timers->tt_flags & TT_DELACK) != 0,
+ ("%s: tp %p delack callout should be running", __func__, tp));
tp->t_flags |= TF_ACKNOW;
TCPSTAT_INC(tcps_delack);
@@ -318,24 +307,9 @@
ostate = tp->t_state;
#endif
- /*
- * XXXRW: Does this actually happen?
- */
INP_INFO_WLOCK(&V_tcbinfo);
inp = tp->t_inpcb;
- /*
- * XXXRW: While this assert is in fact correct, bugs in the tcpcb
- * tear-down mean we need it as a work-around for races between
- * timers and tcp_discardcb().
- *
- * KASSERT(inp != NULL, ("tcp_timer_2msl: inp == NULL"));
- */
- if (inp == NULL) {
- tcp_timer_race++;
- INP_INFO_WUNLOCK(&V_tcbinfo);
- CURVNET_RESTORE();
- return;
- }
+ KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
INP_WLOCK(inp);
tcp_free_sackholes(tp);
if (callout_pending(&tp->t_timers->tt_2msl) ||
@@ -352,6 +326,10 @@
CURVNET_RESTORE();
return;
}
+ KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
+ ("%s: tp %p tcpcb can't be stopped here", __func__, tp));
+ KASSERT((tp->t_timers->tt_flags & TT_2MSL) != 0,
+ ("%s: tp %p 2msl callout should be running", __func__, tp));
/*
* 2 MSL timeout in shutdown went off. If we're closed but
* still waiting for peer to close and connection has been idle
@@ -402,19 +380,7 @@
#endif
INP_INFO_WLOCK(&V_tcbinfo);
inp = tp->t_inpcb;
- /*
- * XXXRW: While this assert is in fact correct, bugs in the tcpcb
- * tear-down mean we need it as a work-around for races between
- * timers and tcp_discardcb().
- *
- * KASSERT(inp != NULL, ("tcp_timer_keep: inp == NULL"));
- */
- if (inp == NULL) {
- tcp_timer_race++;
- INP_INFO_WUNLOCK(&V_tcbinfo);
- CURVNET_RESTORE();
- return;
- }
+ KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
INP_WLOCK(inp);
if (callout_pending(&tp->t_timers->tt_keep) ||
!callout_active(&tp->t_timers->tt_keep)) {
@@ -430,6 +396,10 @@
CURVNET_RESTORE();
return;
}
+ KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
+ ("%s: tp %p tcpcb can't be stopped here", __func__, tp));
+ KASSERT((tp->t_timers->tt_flags & TT_KEEP) != 0,
+ ("%s: tp %p keep callout should be running", __func__, tp));
/*
* Keep-alive timer went off; send something
* or drop connection if idle for too long.
@@ -505,19 +475,7 @@
#endif
INP_INFO_WLOCK(&V_tcbinfo);
inp = tp->t_inpcb;
- /*
- * XXXRW: While this assert is in fact correct, bugs in the tcpcb
- * tear-down mean we need it as a work-around for races between
- * timers and tcp_discardcb().
- *
- * KASSERT(inp != NULL, ("tcp_timer_persist: inp == NULL"));
- */
- if (inp == NULL) {
- tcp_timer_race++;
- INP_INFO_WUNLOCK(&V_tcbinfo);
- CURVNET_RESTORE();
- return;
- }
+ KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
INP_WLOCK(inp);
if (callout_pending(&tp->t_timers->tt_persist) ||
!callout_active(&tp->t_timers->tt_persist)) {
@@ -533,6 +491,10 @@
CURVNET_RESTORE();
return;
}
+ KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
+ ("%s: tp %p tcpcb can't be stopped here", __func__, tp));
+ KASSERT((tp->t_timers->tt_flags & TT_PERSIST) != 0,
+ ("%s: tp %p persist callout should be running", __func__, tp));
/*
* Persistance timer into zero window.
* Force a byte to be output, if possible.
@@ -594,19 +556,7 @@
INP_INFO_RLOCK(&V_tcbinfo);
inp = tp->t_inpcb;
- /*
- * XXXRW: While this assert is in fact correct, bugs in the tcpcb
- * tear-down mean we need it as a work-around for races between
- * timers and tcp_discardcb().
- *
- * KASSERT(inp != NULL, ("tcp_timer_rexmt: inp == NULL"));
- */
- if (inp == NULL) {
- tcp_timer_race++;
- INP_INFO_RUNLOCK(&V_tcbinfo);
- CURVNET_RESTORE();
- return;
- }
+ KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
INP_WLOCK(inp);
if (callout_pending(&tp->t_timers->tt_rexmt) ||
!callout_active(&tp->t_timers->tt_rexmt)) {
@@ -622,6 +572,10 @@
CURVNET_RESTORE();
return;
}
+ KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
+ ("%s: tp %p tcpcb can't be stopped here", __func__, tp));
+ KASSERT((tp->t_timers->tt_flags & TT_REXMT) != 0,
+ ("%s: tp %p rexmt callout should be running", __func__, tp));
tcp_free_sackholes(tp);
/*
* Retransmission timer went off. Message has not
@@ -850,7 +804,7 @@
}
void
-tcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta)
+tcp_timer_activate(struct tcpcb *tp, uint32_t timer_type, u_int delta)
{
struct callout *t_callout;
timeout_t *f_callout;
@@ -862,6 +816,9 @@
return;
#endif
+ if (tp->t_timers->tt_flags & TT_STOPPED)
+ return;
+
switch (timer_type) {
case TT_DELACK:
t_callout = &tp->t_timers->tt_delack;
@@ -887,14 +844,23 @@
panic("tp %p bad timer_type %#x", tp, timer_type);
}
if (delta == 0) {
- callout_stop(t_callout);
+ if ((tp->t_timers->tt_flags & timer_type) &&
+ callout_stop(t_callout)) {
+ tp->t_timers->tt_flags &= ~timer_type;
+ }
} else {
- callout_reset_on(t_callout, delta, f_callout, tp, cpu);
+ if ((tp->t_timers->tt_flags & timer_type) == 0) {
+ tp->t_timers->tt_flags |= timer_type;
+ callout_reset_on(t_callout, delta, f_callout, tp, cpu);
+ } else {
+ /* Reset already running callout on the same CPU. */
+ callout_reset(t_callout, delta, f_callout, tp);
+ }
}
}
int
-tcp_timer_active(struct tcpcb *tp, int timer_type)
+tcp_timer_active(struct tcpcb *tp, uint32_t timer_type)
{
struct callout *t_callout;
@@ -920,6 +886,58 @@
return callout_active(t_callout);
}
+void
+tcp_timer_stop(struct tcpcb *tp, uint32_t timer_type)
+{
+ struct callout *t_callout;
+ timeout_t *f_callout;
+
+ tp->t_timers->tt_flags |= TT_STOPPED;
+
+ switch (timer_type) {
+ case TT_DELACK:
+ t_callout = &tp->t_timers->tt_delack;
+ f_callout = tcp_timer_delack_discard;
+ break;
+ case TT_REXMT:
+ t_callout = &tp->t_timers->tt_rexmt;
+ f_callout = tcp_timer_rexmt_discard;
+ break;
+ case TT_PERSIST:
+ t_callout = &tp->t_timers->tt_persist;
+ f_callout = tcp_timer_persist_discard;
+ break;
+ case TT_KEEP:
+ t_callout = &tp->t_timers->tt_keep;
+ f_callout = tcp_timer_keep_discard;
+ break;
+ case TT_2MSL:
+ t_callout = &tp->t_timers->tt_2msl;
+ f_callout = tcp_timer_2msl_discard;
+ break;
+ default:
+ panic("tp %p bad timer_type %#x", tp, timer_type);
+ }
+
+ if (tp->t_timers->tt_flags & timer_type) {
+ if (callout_stop(t_callout)) {
+ tp->t_timers->tt_flags &= ~timer_type;
+ } else {
+ /*
+ * Can't stop the callout, defer tcpcb actual deletion
+ * to the last tcp timer discard callout.
+ * The TT_STOPPED flag will ensure that no tcp timer
+ * callouts can be restarted on our behalf, and
+ * past this point currently running callouts waiting
+ * on inp lock will return right away after the
+ * classical check for callout reset/stop events:
+ * callout_pending() || !callout_active()
+ */
+ callout_reset(t_callout, 1, f_callout, tp);
+ }
+ }
+}
+
#define ticks_to_msecs(t) (1000*(t) / hz)
void
Index: sys/netinet/tcp_var.h
===================================================================
--- sys/netinet/tcp_var.h
+++ sys/netinet/tcp_var.h
@@ -708,8 +708,9 @@
struct tcptemp *
tcpip_maketemplate(struct inpcb *);
void tcpip_fillheaders(struct inpcb *, void *, void *);
-void tcp_timer_activate(struct tcpcb *, int, u_int);
-int tcp_timer_active(struct tcpcb *, int);
+void tcp_timer_activate(struct tcpcb *, uint32_t, u_int);
+int tcp_timer_active(struct tcpcb *, uint32_t);
+void tcp_timer_stop(struct tcpcb *, uint32_t);
void tcp_trace(short, short, struct tcpcb *, void *, struct tcphdr *, int);
/*
* All tcp_hc_* functions are IPv4 and IPv6 (via in_conninfo)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Dec 15, 1:47 AM (9 h, 41 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
9091325
Default Alt Text
D2079.diff (14 KB)
Attached To
Mode
D2079: Fix TCP timers use-after-free old race conditions
Attached
Detach File
Event Timeline
Log In to Comment