Page MenuHomeFreeBSD

D43166.1778443455.diff
No OneTemporary

Size
11 KB
Referenced Files
None
Subscribers
None

D43166.1778443455.diff

diff --git a/sys/netinet/tcp_ecn.h b/sys/netinet/tcp_ecn.h
--- a/sys/netinet/tcp_ecn.h
+++ b/sys/netinet/tcp_ecn.h
@@ -38,6 +38,8 @@
#include <netinet/tcp_var.h>
#include <netinet/tcp_syncache.h>
+static u_int tcp_ecn_tso_cwr_split = 1;
+
void tcp_ecn_input_syn_sent(struct tcpcb *, uint16_t, int);
void tcp_ecn_input_parallel_syn(struct tcpcb *, uint16_t, int);
int tcp_ecn_input_segment(struct tcpcb *, uint16_t, int, int, int);
diff --git a/sys/netinet/tcp_ecn.c b/sys/netinet/tcp_ecn.c
--- a/sys/netinet/tcp_ecn.c
+++ b/sys/netinet/tcp_ecn.c
@@ -110,6 +110,10 @@
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_ecn_maxretries), 0,
"Max retries before giving up on ECN");
+SYSCTL_UINT(_net_inet_tcp_ecn, OID_AUTO, tso_cwr_split,
+ CTLFLAG_RWTUN, &tcp_ecn_tso_cwr_split, 1,
+ "TSO has RFC3168 ECN support masking the CWR flag");
+
/*
* Process incoming SYN,ACK packet
*/
@@ -367,18 +371,12 @@
return thflags;
if (V_tcp_do_ecn == 1) {
/* Send a RFC3168 ECN setup <SYN> packet */
- if (tp->t_rxtshift >= 1) {
- if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
- thflags = TH_ECE|TH_CWR;
- } else
+ if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
thflags = TH_ECE|TH_CWR;
} else
if (V_tcp_do_ecn == 3) {
/* Send an Accurate ECN setup <SYN> packet */
- if (tp->t_rxtshift >= 1) {
- if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
- thflags = TH_ECE|TH_CWR|TH_AE;
- } else
+ if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
thflags = TH_ECE|TH_CWR|TH_AE;
}
@@ -401,12 +399,12 @@
* Ignore pure control packets, retransmissions
* and window probes.
*/
- newdata = (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) &&
- !rxmit &&
+ newdata = (!rxmit && len > 0 &&
+ SEQ_GEQ(tp->snd_nxt, tp->snd_max) &&
!((tp->t_flags & TF_FORCEDATA) && len == 1));
/* RFC3168 ECN marking, only new data segments */
- if (newdata) {
- if (tp->t_flags2 & TF2_ECN_USE_ECT1) {
+ if (__predict_true(newdata)) {
+ if (__predict_false(tp->t_flags2 & TF2_ECN_USE_ECT1)) {
ipecn = IPTOS_ECN_ECT1;
TCPSTAT_INC(tcps_ecn_sndect1);
} else {
@@ -443,8 +441,11 @@
*thflags |= TH_CWR;
tp->t_flags2 &= ~TF2_ECN_SND_CWR;
}
- if (tp->t_flags2 & TF2_ECN_SND_ECE)
+ if (tp->t_flags2 & TF2_ECN_SND_ECE) {
*thflags |= TH_ECE;
+ if (tp->t_state == TCPS_SYN_RECEIVED)
+ tp->t_flags2 &= ~TF2_ECN_SND_ECE;
+ }
}
return ipecn;
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -208,6 +208,7 @@
int sack_rxmit, sack_bytes_rxmt;
struct sackhole *p;
int tso, mtu;
+ int tcp_ect = IPTOS_ECN_NOTECT;
struct tcpopt to;
struct udphdr *udp = NULL;
struct tcp_log_buffer *lgb;
@@ -562,11 +563,12 @@
tso = 1;
if (sack_rxmit) {
- if (SEQ_LT(p->rxmit + len, tp->snd_una + sbused(&so->so_snd)))
+ if (SEQ_LT(p->rxmit + len,
+ tp->snd_una + sbused(&so->so_snd)))
flags &= ~TH_FIN;
} else {
- if (SEQ_LT(tp->snd_nxt + len, tp->snd_una +
- sbused(&so->so_snd)))
+ if (SEQ_LT(tp->snd_nxt + len,
+ tp->snd_una + sbused(&so->so_snd)))
flags &= ~TH_FIN;
}
@@ -895,6 +897,50 @@
}
hdrlen += sizeof(struct udphdr);
}
+ /*
+ * If we are starting a connection, send ECN setup
+ * SYN packet. If we are on a retransmit, we may
+ * resend those bits a number of times as per
+ * RFC 3168.
+ */
+ if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn) {
+ flags |= tcp_ecn_output_syn_sent(tp);
+ }
+ /* Also handle parallel SYN for ECN */
+ if ((TCPS_HAVERCVDSYN(tp->t_state)) &&
+ (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT))) {
+ tcp_ect = tcp_ecn_output_established(tp, &flags, len, sack_rxmit);
+ }
+ /*
+ * Disallow use of TSO while sending segments with the CWR flag set,
+ * as the support for TSO with ECN is inconsistent and frequently
+ * broken - either setting CWR on all packets, dropping the TSO
+ * mbuf entirely, or clearing the CWR bit when it may be inappropriate.
+ */
+ if (__predict_false((tso && flags & TH_CWR)) {
+ if (__predict_true(tcp_ecn_tso_cwr_split)) {
+ if (__predict_false((tp->t_flags2 &
+ (TF2_ECN_PERMIT | TF2_ACE_PERMIT) ==
+ (TF2_ECN_PERMIT | TF2_ACE_PERMIT))) {
+ /*
+ * AccECN case - split CWR when TSO supports
+ * RFC3168 CWR handling (clearing the flag on
+ * all but the first segment).
+ */
+ tso = 0;
+ }
+ } else {
+ if (__predict_false(tp->t_flags2 &
+ (TF2_ECN_PERMIT | TF2_ACE_PERMIT) ==
+ TF2_ECN_PERMIT)) {
+ /*
+ * RFC3168 case - split CWR when TSO hw/dev does not
+ * clear CWR when doing TSO processing.
+ */
+ tso = 0;
+ }
+ }
+ }
/*
* Adjust data length if insertion of options will
* bump the packet length beyond the t_maxseg length.
@@ -1182,32 +1228,17 @@
if (flags & TH_FIN && tp->t_flags & TF_SENTFIN &&
tp->snd_nxt == tp->snd_max)
tp->snd_nxt--;
- /*
- * If we are starting a connection, send ECN setup
- * SYN packet. If we are on a retransmit, we may
- * resend those bits a number of times as per
- * RFC 3168.
- */
- if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn) {
- flags |= tcp_ecn_output_syn_sent(tp);
- }
- /* Also handle parallel SYN for ECN */
- if ((TCPS_HAVERCVDSYN(tp->t_state)) &&
- (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT))) {
- int ect = tcp_ecn_output_established(tp, &flags, len, sack_rxmit);
- if ((tp->t_state == TCPS_SYN_RECEIVED) &&
- (tp->t_flags2 & TF2_ECN_SND_ECE))
- tp->t_flags2 &= ~TF2_ECN_SND_ECE;
+ if (tcp_ect) {
#ifdef INET6
if (isipv6) {
ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << IPV6_FLOWLABEL_LEN);
- ip6->ip6_flow |= htonl(ect << IPV6_FLOWLABEL_LEN);
+ ip6->ip6_flow |= htonl(tcp_ect << IPV6_FLOWLABEL_LEN);
}
else
#endif
{
ip->ip_tos &= ~IPTOS_ECN_MASK;
- ip->ip_tos |= ect;
+ ip->ip_tos |= tcp_ect;
}
}
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -18703,6 +18703,7 @@
u_char opt[TCP_MAXOLEN];
uint32_t hdrlen, optlen;
int32_t slot, segsiz, max_val, tso = 0, error = 0, ulen = 0;
+ int32_t tcp_ect = IPTOS_ECN_NOTECT;
uint16_t flags;
uint32_t if_hw_tsomaxsegcount = 0, startseq;
uint32_t if_hw_tsomaxsegsize;
@@ -18818,6 +18819,28 @@
} else {
tso = 0;
}
+ /*
+ * If we are starting a connection, send ECN setup
+ * SYN packet. If we are on a retransmit, we may
+ * resend those bits a number of times as per
+ * RFC 3168.
+ */
+ if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn) {
+ flags |= tcp_ecn_output_syn_sent(tp);
+ }
+ /* Also handle parallel SYN for ECN */
+ if ((TCPS_HAVERCVDSYN(tp->t_state)) &&
+ (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT))) {
+ tcp_ect = tcp_ecn_output_established(tp, &flags, len, true);
+ }
+ /*
+ * Disallow use of TSO while sending segments with the CWR flag set,
+ * as the support for TSO with ECN is inconsistent and frequently
+ * broken - either setting CWR on all packets, dropping the TSO
+ * mbuf entirely, or clearing the CWR bit when it may be inappropriate.
+ */
+ if (flags & TH_CWR)
+ tso = 0;
if ((tso == 0) && (len > segsiz))
len = segsiz;
(void)tcp_get_usecs(tv);
@@ -18888,22 +18911,17 @@
udp->uh_ulen = htons(ulen);
}
m->m_pkthdr.rcvif = (struct ifnet *)0;
- if (TCPS_HAVERCVDSYN(tp->t_state) &&
- (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT))) {
- int ect = tcp_ecn_output_established(tp, &flags, len, true);
- if ((tp->t_state == TCPS_SYN_RECEIVED) &&
- (tp->t_flags2 & TF2_ECN_SND_ECE))
- tp->t_flags2 &= ~TF2_ECN_SND_ECE;
+ if (tcp_ect) {
#ifdef INET6
if (rack->r_is_v6) {
ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
- ip6->ip6_flow |= htonl(ect << 20);
+ ip6->ip6_flow |= htonl(tcp_ect << 20);
}
else
#endif
{
ip->ip_tos &= ~IPTOS_ECN_MASK;
- ip->ip_tos |= ect;
+ ip->ip_tos |= tcp_ect;
}
}
if (rack->r_ctl.crte != NULL) {
@@ -19306,6 +19324,7 @@
int cnt_thru = 1;
#endif
int32_t slot, segsiz, len, max_val, tso = 0, sb_offset, error, ulen = 0;
+ int32_t tcp_ect = IPTOS_ECN_NOTECT;
uint16_t flags;
uint32_t s_soff;
uint32_t if_hw_tsomaxsegcount = 0, startseq;
@@ -19409,6 +19428,28 @@
} else {
tso = 0;
}
+ /*
+ * If we are starting a connection, send ECN setup
+ * SYN packet. If we are on a retransmit, we may
+ * resend those bits a number of times as per
+ * RFC 3168.
+ */
+ if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn) {
+ flags |= tcp_ecn_output_syn_sent(tp);
+ }
+ /* Also handle parallel SYN for ECN */
+ if ((TCPS_HAVERCVDSYN(tp->t_state)) &&
+ (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT))) {
+ tcp_ect = tcp_ecn_output_established(tp, &flags, len, false);
+ }
+ /*
+ * Disallow use of TSO while sending segments with the CWR flag set,
+ * as the support for TSO with ECN is inconsistent and frequently
+ * broken - either setting CWR on all packets, dropping the TSO
+ * mbuf entirely, or clearing the CWR bit when it may be inappropriate.
+ */
+ if (flags & TH_CWR)
+ tso = 0;
if ((tso == 0) && (len > segsiz))
len = segsiz;
(void)tcp_get_usecs(tv);
@@ -19463,23 +19504,18 @@
udp->uh_ulen = htons(ulen);
}
m->m_pkthdr.rcvif = (struct ifnet *)0;
- if (TCPS_HAVERCVDSYN(tp->t_state) &&
- (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT))) {
- int ect = tcp_ecn_output_established(tp, &flags, len, false);
- if ((tp->t_state == TCPS_SYN_RECEIVED) &&
- (tp->t_flags2 & TF2_ECN_SND_ECE))
- tp->t_flags2 &= ~TF2_ECN_SND_ECE;
+ if (tcp_ect) {
#ifdef INET6
if (rack->r_is_v6) {
ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
- ip6->ip6_flow |= htonl(ect << 20);
+ ip6->ip6_flow |= htonl(tcp_ect << 20);
}
else
#endif
{
#ifdef INET
ip->ip_tos &= ~IPTOS_ECN_MASK;
- ip->ip_tos |= ect;
+ ip->ip_tos |= tcp_ect;
#endif
}
}
@@ -19933,6 +19969,7 @@
volatile int32_t sack_rxmit;
struct rack_sendmap *rsm = NULL;
int32_t tso, mtu;
+ int32_t tcp_ect = IPTOS_ECN_NOTECT;
struct tcpopt to;
int32_t slot = 0;
int32_t sup_rack = 0;
@@ -21433,6 +21470,28 @@
ipoptlen += ipsec_optlen;
#endif
+ /*
+ * If we are starting a connection, send ECN setup
+ * SYN packet. If we are on a retransmit, we may
+ * resend those bits a number of times as per
+ * RFC 3168.
+ */
+ if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn) {
+ flags |= tcp_ecn_output_syn_sent(tp);
+ }
+ /* Also handle parallel SYN for ECN */
+ if ((TCPS_HAVERCVDSYN(tp->t_state)) &&
+ (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT))) {
+ tcp_ect = tcp_ecn_output_established(tp, &flags, len, sack_rxmit);
+ }
+ /*
+ * Disallow use of TSO while sending segments with the CWR flag set,
+ * as the support for TSO with ECN is inconsistent and frequently
+ * broken - either setting CWR on all packets, dropping the TSO
+ * mbuf entirely, or clearing the CWR bit when it may be inappropriate.
+ */
+ if (flags & TH_CWR)
+ tso = 0;
/*
* Adjust data length if insertion of options will bump the packet
* length beyond the t_maxseg length. Clear the FIN bit because we
@@ -21755,32 +21814,18 @@
if (flags & TH_FIN && tp->t_flags & TF_SENTFIN &&
tp->snd_nxt == tp->snd_max)
tp->snd_nxt--;
- /*
- * If we are starting a connection, send ECN setup SYN packet. If we
- * are on a retransmit, we may resend those bits a number of times
- * as per RFC 3168.
- */
- if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn) {
- flags |= tcp_ecn_output_syn_sent(tp);
- }
- /* Also handle parallel SYN for ECN */
- if (TCPS_HAVERCVDSYN(tp->t_state) &&
- (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT))) {
- int ect = tcp_ecn_output_established(tp, &flags, len, sack_rxmit);
- if ((tp->t_state == TCPS_SYN_RECEIVED) &&
- (tp->t_flags2 & TF2_ECN_SND_ECE))
- tp->t_flags2 &= ~TF2_ECN_SND_ECE;
+ if (tcp_ect) {
#ifdef INET6
if (isipv6) {
ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
- ip6->ip6_flow |= htonl(ect << 20);
+ ip6->ip6_flow |= htonl(tcp_ect << 20);
}
else
#endif
{
#ifdef INET
ip->ip_tos &= ~IPTOS_ECN_MASK;
- ip->ip_tos |= ect;
+ ip->ip_tos |= tcp_ect;
#endif
}
}

File Metadata

Mime Type
text/plain
Expires
Sun, May 10, 8:04 PM (2 h, 11 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28646238
Default Alt Text
D43166.1778443455.diff (11 KB)

Event Timeline