Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F145694071
D43166.1778443455.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Flag For Later
Award Token
Size
11 KB
Referenced Files
None
Subscribers
None
D43166.1778443455.diff
View Options
diff --git a/sys/netinet/tcp_ecn.h b/sys/netinet/tcp_ecn.h
--- a/sys/netinet/tcp_ecn.h
+++ b/sys/netinet/tcp_ecn.h
@@ -38,6 +38,8 @@
#include <netinet/tcp_var.h>
#include <netinet/tcp_syncache.h>
+static u_int tcp_ecn_tso_cwr_split = 1;
+
void tcp_ecn_input_syn_sent(struct tcpcb *, uint16_t, int);
void tcp_ecn_input_parallel_syn(struct tcpcb *, uint16_t, int);
int tcp_ecn_input_segment(struct tcpcb *, uint16_t, int, int, int);
diff --git a/sys/netinet/tcp_ecn.c b/sys/netinet/tcp_ecn.c
--- a/sys/netinet/tcp_ecn.c
+++ b/sys/netinet/tcp_ecn.c
@@ -110,6 +110,10 @@
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_ecn_maxretries), 0,
"Max retries before giving up on ECN");
+SYSCTL_UINT(_net_inet_tcp_ecn, OID_AUTO, tso_cwr_split,
+ CTLFLAG_RWTUN, &tcp_ecn_tso_cwr_split, 1,
+ "TSO has RFC3168 ECN support masking the CWR flag");
+
/*
* Process incoming SYN,ACK packet
*/
@@ -367,18 +371,12 @@
return thflags;
if (V_tcp_do_ecn == 1) {
/* Send a RFC3168 ECN setup <SYN> packet */
- if (tp->t_rxtshift >= 1) {
- if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
- thflags = TH_ECE|TH_CWR;
- } else
+ if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
thflags = TH_ECE|TH_CWR;
} else
if (V_tcp_do_ecn == 3) {
/* Send an Accurate ECN setup <SYN> packet */
- if (tp->t_rxtshift >= 1) {
- if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
- thflags = TH_ECE|TH_CWR|TH_AE;
- } else
+ if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
thflags = TH_ECE|TH_CWR|TH_AE;
}
@@ -401,12 +399,12 @@
* Ignore pure control packets, retransmissions
* and window probes.
*/
- newdata = (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) &&
- !rxmit &&
+ newdata = (!rxmit && len > 0 &&
+ SEQ_GEQ(tp->snd_nxt, tp->snd_max) &&
!((tp->t_flags & TF_FORCEDATA) && len == 1));
/* RFC3168 ECN marking, only new data segments */
- if (newdata) {
- if (tp->t_flags2 & TF2_ECN_USE_ECT1) {
+ if (__predict_true(newdata)) {
+ if (__predict_false(tp->t_flags2 & TF2_ECN_USE_ECT1)) {
ipecn = IPTOS_ECN_ECT1;
TCPSTAT_INC(tcps_ecn_sndect1);
} else {
@@ -443,8 +441,11 @@
*thflags |= TH_CWR;
tp->t_flags2 &= ~TF2_ECN_SND_CWR;
}
- if (tp->t_flags2 & TF2_ECN_SND_ECE)
+ if (tp->t_flags2 & TF2_ECN_SND_ECE) {
*thflags |= TH_ECE;
+ if (tp->t_state == TCPS_SYN_RECEIVED)
+ tp->t_flags2 &= ~TF2_ECN_SND_ECE;
+ }
}
return ipecn;
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -208,6 +208,7 @@
int sack_rxmit, sack_bytes_rxmt;
struct sackhole *p;
int tso, mtu;
+ int tcp_ect = IPTOS_ECN_NOTECT;
struct tcpopt to;
struct udphdr *udp = NULL;
struct tcp_log_buffer *lgb;
@@ -562,11 +563,12 @@
tso = 1;
if (sack_rxmit) {
- if (SEQ_LT(p->rxmit + len, tp->snd_una + sbused(&so->so_snd)))
+ if (SEQ_LT(p->rxmit + len,
+ tp->snd_una + sbused(&so->so_snd)))
flags &= ~TH_FIN;
} else {
- if (SEQ_LT(tp->snd_nxt + len, tp->snd_una +
- sbused(&so->so_snd)))
+ if (SEQ_LT(tp->snd_nxt + len,
+ tp->snd_una + sbused(&so->so_snd)))
flags &= ~TH_FIN;
}
@@ -895,6 +897,50 @@
}
hdrlen += sizeof(struct udphdr);
}
+ /*
+ * If we are starting a connection, send ECN setup
+ * SYN packet. If we are on a retransmit, we may
+ * resend those bits a number of times as per
+ * RFC 3168.
+ */
+ if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn) {
+ flags |= tcp_ecn_output_syn_sent(tp);
+ }
+ /* Also handle parallel SYN for ECN */
+ if ((TCPS_HAVERCVDSYN(tp->t_state)) &&
+ (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT))) {
+ tcp_ect = tcp_ecn_output_established(tp, &flags, len, sack_rxmit);
+ }
+ /*
+ * Disallow use of TSO while sending segments with the CWR flag set,
+ * as the support for TSO with ECN is inconsistent and frequently
+ * broken - either setting CWR on all packets, dropping the TSO
+ * mbuf entirely, or clearing the CWR bit when it may be inappropriate.
+ */
+ if (__predict_false((tso && flags & TH_CWR)) {
+ if (__predict_true(tcp_ecn_tso_cwr_split)) {
+ if (__predict_false((tp->t_flags2 &
+ (TF2_ECN_PERMIT | TF2_ACE_PERMIT) ==
+ (TF2_ECN_PERMIT | TF2_ACE_PERMIT))) {
+ /*
+ * AccECN case - split CWR when TSO supports
+ * RFC3168 CWR handling (clearing the flag on
+ * all but the first segment).
+ */
+ tso = 0;
+ }
+ } else {
+ if (__predict_false(tp->t_flags2 &
+ (TF2_ECN_PERMIT | TF2_ACE_PERMIT) ==
+ TF2_ECN_PERMIT)) {
+ /*
+ * RFC3168 case - split CWR when TSO hw/dev does not
+ * clear CWR when doing TSO processing.
+ */
+ tso = 0;
+ }
+ }
+ }
/*
* Adjust data length if insertion of options will
* bump the packet length beyond the t_maxseg length.
@@ -1182,32 +1228,17 @@
if (flags & TH_FIN && tp->t_flags & TF_SENTFIN &&
tp->snd_nxt == tp->snd_max)
tp->snd_nxt--;
- /*
- * If we are starting a connection, send ECN setup
- * SYN packet. If we are on a retransmit, we may
- * resend those bits a number of times as per
- * RFC 3168.
- */
- if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn) {
- flags |= tcp_ecn_output_syn_sent(tp);
- }
- /* Also handle parallel SYN for ECN */
- if ((TCPS_HAVERCVDSYN(tp->t_state)) &&
- (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT))) {
- int ect = tcp_ecn_output_established(tp, &flags, len, sack_rxmit);
- if ((tp->t_state == TCPS_SYN_RECEIVED) &&
- (tp->t_flags2 & TF2_ECN_SND_ECE))
- tp->t_flags2 &= ~TF2_ECN_SND_ECE;
+ if (tcp_ect) {
#ifdef INET6
if (isipv6) {
ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << IPV6_FLOWLABEL_LEN);
- ip6->ip6_flow |= htonl(ect << IPV6_FLOWLABEL_LEN);
+ ip6->ip6_flow |= htonl(tcp_ect << IPV6_FLOWLABEL_LEN);
}
else
#endif
{
ip->ip_tos &= ~IPTOS_ECN_MASK;
- ip->ip_tos |= ect;
+ ip->ip_tos |= tcp_ect;
}
}
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -18703,6 +18703,7 @@
u_char opt[TCP_MAXOLEN];
uint32_t hdrlen, optlen;
int32_t slot, segsiz, max_val, tso = 0, error = 0, ulen = 0;
+ int32_t tcp_ect = IPTOS_ECN_NOTECT;
uint16_t flags;
uint32_t if_hw_tsomaxsegcount = 0, startseq;
uint32_t if_hw_tsomaxsegsize;
@@ -18818,6 +18819,28 @@
} else {
tso = 0;
}
+ /*
+ * If we are starting a connection, send ECN setup
+ * SYN packet. If we are on a retransmit, we may
+ * resend those bits a number of times as per
+ * RFC 3168.
+ */
+ if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn) {
+ flags |= tcp_ecn_output_syn_sent(tp);
+ }
+ /* Also handle parallel SYN for ECN */
+ if ((TCPS_HAVERCVDSYN(tp->t_state)) &&
+ (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT))) {
+ tcp_ect = tcp_ecn_output_established(tp, &flags, len, true);
+ }
+ /*
+ * Disallow use of TSO while sending segments with the CWR flag set,
+ * as the support for TSO with ECN is inconsistent and frequently
+ * broken - either setting CWR on all packets, dropping the TSO
+ * mbuf entirely, or clearing the CWR bit when it may be inappropriate.
+ */
+ if (flags & TH_CWR)
+ tso = 0;
if ((tso == 0) && (len > segsiz))
len = segsiz;
(void)tcp_get_usecs(tv);
@@ -18888,22 +18911,17 @@
udp->uh_ulen = htons(ulen);
}
m->m_pkthdr.rcvif = (struct ifnet *)0;
- if (TCPS_HAVERCVDSYN(tp->t_state) &&
- (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT))) {
- int ect = tcp_ecn_output_established(tp, &flags, len, true);
- if ((tp->t_state == TCPS_SYN_RECEIVED) &&
- (tp->t_flags2 & TF2_ECN_SND_ECE))
- tp->t_flags2 &= ~TF2_ECN_SND_ECE;
+ if (tcp_ect) {
#ifdef INET6
if (rack->r_is_v6) {
ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
- ip6->ip6_flow |= htonl(ect << 20);
+ ip6->ip6_flow |= htonl(tcp_ect << 20);
}
else
#endif
{
ip->ip_tos &= ~IPTOS_ECN_MASK;
- ip->ip_tos |= ect;
+ ip->ip_tos |= tcp_ect;
}
}
if (rack->r_ctl.crte != NULL) {
@@ -19306,6 +19324,7 @@
int cnt_thru = 1;
#endif
int32_t slot, segsiz, len, max_val, tso = 0, sb_offset, error, ulen = 0;
+ int32_t tcp_ect = IPTOS_ECN_NOTECT;
uint16_t flags;
uint32_t s_soff;
uint32_t if_hw_tsomaxsegcount = 0, startseq;
@@ -19409,6 +19428,28 @@
} else {
tso = 0;
}
+ /*
+ * If we are starting a connection, send ECN setup
+ * SYN packet. If we are on a retransmit, we may
+ * resend those bits a number of times as per
+ * RFC 3168.
+ */
+ if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn) {
+ flags |= tcp_ecn_output_syn_sent(tp);
+ }
+ /* Also handle parallel SYN for ECN */
+ if ((TCPS_HAVERCVDSYN(tp->t_state)) &&
+ (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT))) {
+ tcp_ect = tcp_ecn_output_established(tp, &flags, len, false);
+ }
+ /*
+ * Disallow use of TSO while sending segments with the CWR flag set,
+ * as the support for TSO with ECN is inconsistent and frequently
+ * broken - either setting CWR on all packets, dropping the TSO
+ * mbuf entirely, or clearing the CWR bit when it may be inappropriate.
+ */
+ if (flags & TH_CWR)
+ tso = 0;
if ((tso == 0) && (len > segsiz))
len = segsiz;
(void)tcp_get_usecs(tv);
@@ -19463,23 +19504,18 @@
udp->uh_ulen = htons(ulen);
}
m->m_pkthdr.rcvif = (struct ifnet *)0;
- if (TCPS_HAVERCVDSYN(tp->t_state) &&
- (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT))) {
- int ect = tcp_ecn_output_established(tp, &flags, len, false);
- if ((tp->t_state == TCPS_SYN_RECEIVED) &&
- (tp->t_flags2 & TF2_ECN_SND_ECE))
- tp->t_flags2 &= ~TF2_ECN_SND_ECE;
+ if (tcp_ect) {
#ifdef INET6
if (rack->r_is_v6) {
ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
- ip6->ip6_flow |= htonl(ect << 20);
+ ip6->ip6_flow |= htonl(tcp_ect << 20);
}
else
#endif
{
#ifdef INET
ip->ip_tos &= ~IPTOS_ECN_MASK;
- ip->ip_tos |= ect;
+ ip->ip_tos |= tcp_ect;
#endif
}
}
@@ -19933,6 +19969,7 @@
volatile int32_t sack_rxmit;
struct rack_sendmap *rsm = NULL;
int32_t tso, mtu;
+ int32_t tcp_ect = IPTOS_ECN_NOTECT;
struct tcpopt to;
int32_t slot = 0;
int32_t sup_rack = 0;
@@ -21433,6 +21470,28 @@
ipoptlen += ipsec_optlen;
#endif
+ /*
+ * If we are starting a connection, send ECN setup
+ * SYN packet. If we are on a retransmit, we may
+ * resend those bits a number of times as per
+ * RFC 3168.
+ */
+ if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn) {
+ flags |= tcp_ecn_output_syn_sent(tp);
+ }
+ /* Also handle parallel SYN for ECN */
+ if ((TCPS_HAVERCVDSYN(tp->t_state)) &&
+ (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT))) {
+ tcp_ect = tcp_ecn_output_established(tp, &flags, len, sack_rxmit);
+ }
+ /*
+ * Disallow use of TSO while sending segments with the CWR flag set,
+ * as the support for TSO with ECN is inconsistent and frequently
+ * broken - either setting CWR on all packets, dropping the TSO
+ * mbuf entirely, or clearing the CWR bit when it may be inappropriate.
+ */
+ if (flags & TH_CWR)
+ tso = 0;
/*
* Adjust data length if insertion of options will bump the packet
* length beyond the t_maxseg length. Clear the FIN bit because we
@@ -21755,32 +21814,18 @@
if (flags & TH_FIN && tp->t_flags & TF_SENTFIN &&
tp->snd_nxt == tp->snd_max)
tp->snd_nxt--;
- /*
- * If we are starting a connection, send ECN setup SYN packet. If we
- * are on a retransmit, we may resend those bits a number of times
- * as per RFC 3168.
- */
- if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn) {
- flags |= tcp_ecn_output_syn_sent(tp);
- }
- /* Also handle parallel SYN for ECN */
- if (TCPS_HAVERCVDSYN(tp->t_state) &&
- (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT))) {
- int ect = tcp_ecn_output_established(tp, &flags, len, sack_rxmit);
- if ((tp->t_state == TCPS_SYN_RECEIVED) &&
- (tp->t_flags2 & TF2_ECN_SND_ECE))
- tp->t_flags2 &= ~TF2_ECN_SND_ECE;
+ if (tcp_ect) {
#ifdef INET6
if (isipv6) {
ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
- ip6->ip6_flow |= htonl(ect << 20);
+ ip6->ip6_flow |= htonl(tcp_ect << 20);
}
else
#endif
{
#ifdef INET
ip->ip_tos &= ~IPTOS_ECN_MASK;
- ip->ip_tos |= ect;
+ ip->ip_tos |= tcp_ect;
#endif
}
}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, May 10, 8:04 PM (2 h, 11 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28646238
Default Alt Text
D43166.1778443455.diff (11 KB)
Attached To
Mode
D43166: tcp: bypass TSO when CWR bit is to be sent
Attached
Detach File
Event Timeline
Log In to Comment