diff --git a/sys/conf/config.mk b/sys/conf/config.mk --- a/sys/conf/config.mk +++ b/sys/conf/config.mk @@ -32,6 +32,8 @@ opt_ipsec.h: @echo "#define IPSEC_SUPPORT 1" > ${.TARGET} .endif +opt_accecn.h: + @echo "#define TCP_ACCECNOPT 1" > ${.TARGET} .if ${MK_RATELIMIT} != "no" opt_ratelimit.h: @echo "#define RATELIMIT 1" > ${.TARGET} diff --git a/sys/conf/options b/sys/conf/options --- a/sys/conf/options +++ b/sys/conf/options @@ -475,6 +475,7 @@ TCP_RFC7413_MAX_KEYS opt_inet.h TCP_RFC7413_MAX_PSKS opt_inet.h TCP_SIGNATURE opt_ipsec.h +TCP_ACCECNOPT opt_accecn.h VLAN_ARRAY opt_vlan.h XDR XBONEHACK diff --git a/sys/netinet/tcp.h b/sys/netinet/tcp.h --- a/sys/netinet/tcp.h +++ b/sys/netinet/tcp.h @@ -104,6 +104,12 @@ #define TCPOLEN_SIGNATURE 18 #define TCPOPT_FAST_OPEN 34 #define TCPOLEN_FAST_OPEN_EMPTY 2 +#if defined(TCP_ACCECNOPT) +#define TCPOPT_ACCECN_0 0xAC +#define TCPOPT_ACCECN_1 0XAE +#define TCPOLEN_ACCECN_EMPTY 2 /* 3 bytes exp opt */ +#define TCPOLEN_ACCECN_COUNTER 3 +#endif /* TCP_ACCECNOPT */ #define MAX_TCPOPTLEN 40 /* Absolute maximum TCP options len */ diff --git a/sys/netinet/tcp_ecn.h b/sys/netinet/tcp_ecn.h --- a/sys/netinet/tcp_ecn.h +++ b/sys/netinet/tcp_ecn.h @@ -43,7 +43,11 @@ void tcp_ecn_input_syn_sent(struct tcpcb *, uint16_t, int); void tcp_ecn_input_parallel_syn(struct tcpcb *, uint16_t, int); +#if defined(TCP_ACCECNOPT) +int tcp_ecn_input_segment(struct tcpcb *, uint16_t, int, int); +#else int tcp_ecn_input_segment(struct tcpcb *, uint16_t, int); +#endif /* TCP_ACCECNOPT */ uint16_t tcp_ecn_output_syn_sent(struct tcpcb *); int tcp_ecn_output_established(struct tcpcb *, uint16_t *, int, bool); void tcp_ecn_syncache_socket(struct tcpcb *, struct syncache *); diff --git a/sys/netinet/tcp_ecn.c b/sys/netinet/tcp_ecn.c --- a/sys/netinet/tcp_ecn.c +++ b/sys/netinet/tcp_ecn.c @@ -62,6 +62,7 @@ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_tcpdebug.h" +#include "opt_accecn.h" #include #include @@ -109,7 +110,6 @@ void tcp_ecn_input_syn_sent(struct tcpcb *tp, uint16_t thflags, int iptos) { - if (V_tcp_do_ecn == 0) return; if ((V_tcp_do_ecn == 1) || @@ -261,19 +261,35 @@ * TCP ECN processing. */ int +#if defined(TCP_ACCECNOPT) +tcp_ecn_input_segment(struct tcpcb *tp, uint16_t thflags, int tlen, int iptos) +#else tcp_ecn_input_segment(struct tcpcb *tp, uint16_t thflags, int iptos) +#endif /* TCP_ACCECNOPT */ { int delta_ace = 0; if (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT)) { switch (iptos & IPTOS_ECN_MASK) { case IPTOS_ECN_CE: +#if defined(TCP_ACCECNOPT) + tp->t_flags2 |= TF2_ACO_CE; + tp->t_rceb += tlen; +#endif /* TCP_ACCECNOPT */ TCPSTAT_INC(tcps_ecn_ce); break; case IPTOS_ECN_ECT0: +#if defined(TCP_ACCECNOPT) + tp->t_flags2 |= TF2_ACO_E0; + tp->t_re0b += tlen; +#endif /* TCP_ACCECNOPT */ TCPSTAT_INC(tcps_ecn_ect0); break; case IPTOS_ECN_ECT1: +#if defined(TCP_ACCECNOPT) + tp->t_flags2 |= TF2_ACO_E1; + tp->t_re1b += tlen; +#endif /* TCP_ACCECNOPT */ TCPSTAT_INC(tcps_ecn_ect1); break; } diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -56,6 +56,7 @@ #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_tcpdebug.h" +#include "opt_accecn.h" #include #include @@ -1627,7 +1628,11 @@ /* * TCP ECN processing. */ +#if defined(TCP_ACCECNOPT) + if (tcp_ecn_input_segment(tp, thflags, tlen, iptos)) +#else if (tcp_ecn_input_segment(tp, thflags, iptos)) +#endif /* TCP_ACCECNOPT */ cc_cong_signal(tp, th, CC_ECN); /* diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -39,6 +39,7 @@ #include "opt_ipsec.h" #include "opt_kern_tls.h" #include "opt_tcpdebug.h" +#include "opt_accecn.h" #include #include @@ -889,9 +890,40 @@ if (tp->t_flags & TF_SIGNATURE) to.to_flags |= TOF_SIGNATURE; #endif /* TCP_SIGNATURE */ - +#if defined(TCP_ACCECNOPT) + /* + * AccECN option + * Don't send on , only on or + * when doing an AccECN session + */ + if (((V_tcp_do_ecn == 3) || (V_tcp_do_ecn == 4)) && + ((tp->t_flags2 & TF2_ACE_PERMIT) || + ((flags & TH_SYN) && (flags & TH_ACK)))) { + to.to_flags |= TOF_ACCECNOPT; + to.to_ee0b = tp->t_re0b; + to.to_ee1b = tp->t_re1b; + to.to_eceb = tp->t_rceb; + to.to_acceflags = ((tp->t_flags2 & TF2_ACO_E0) ? TOF_ACCE_E0 : 0) | + ((tp->t_flags2 & TF2_ACO_E1) ? TOF_ACCE_E1 : 0) | + ((tp->t_flags2 & TF2_ACO_CE) ? TOF_ACCE_CE : 0); + if (flags & TH_SYN) + to.to_acceflags |= TOF_ACCE_SYN; + if (tp->t_flags & TF_ACKNOW) + to.to_acceflags |= TOF_ACCE_ACKNOW; + } +#endif /* TCP_ACCECNOPT */ /* Processing the options. */ hdrlen += optlen = tcp_addoptions(&to, opt); +#if defined(TCP_ACCECNOPT) + if (to.to_flags & TOF_ACCECNOPT) { + if (to.to_acceflags & TOF_ACCE_E0) + tp->t_flags2 &= ~TF2_ACO_E0; + if (to.to_acceflags & TOF_ACCE_E1) + tp->t_flags2 &= ~TF2_ACO_E1; + if (to.to_acceflags & TOF_ACCE_CE) + tp->t_flags2 &= ~TF2_ACO_CE; + } +#endif /* TCP_ACCECNOPT */ /* * If we wanted a TFO option to be added, but it was unable * to fit, ensure no data is sent. @@ -1934,6 +1966,139 @@ optlen += total_len; break; } +#if defined(TCP_ACCECNOPT) + case TOF_ACCECNOPT: + { + log(2, "aco - flags %b\n", to->to_acceflags, "\20\1SYN\2CE\3E0\4E1\5NOW"); + int max_len = TCP_MAXOLEN - optlen; + if (max_len < TCPOLEN_ACCECN_EMPTY) { + log(2, "aco: not enough space\n"); + to->to_flags &= ~TOF_ACCECNOPT; + continue; + } + if (max_len < (TCPOLEN_ACCECN_EMPTY + + 1 * TCPOLEN_ACCECN_COUNTER)) { + log(2, "aco: empty option only\n"); + if (to->to_acceflags & TOF_ACCE_SYN) { + *optp++ = TCPOPT_ACCECN_0; + optlen += TCPOLEN_ACCECN_EMPTY; + *optp++ = TCPOLEN_ACCECN_EMPTY; + continue; + } else { + to->to_flags &= ~TOF_ACCECNOPT; + continue; + } + } + if (max_len < (TCPOLEN_ACCECN_EMPTY + + 2 * TCPOLEN_ACCECN_COUNTER)) { + log(2, "aco: 1 couter option\n"); + if (to->to_acceflags & TOF_ACCE_E1) { + *optp++ = TCPOPT_ACCECN_1; + *optp++ = TCPOLEN_ACCECN_EMPTY + + TCPOLEN_ACCECN_COUNTER; + optlen += TCPOLEN_ACCECN_EMPTY + + TCPOLEN_ACCECN_COUNTER; + *optp++ = (char)(to->to_ee1b >> 16); + *optp++ = (char)(to->to_ee1b >> 8); + *optp++ = (char)(to->to_ee1b); + to->to_acceflags &= ~TOF_ACCE_E1; + continue; + } + *optp++ = TCPOPT_ACCECN_0; + *optp++ = TCPOLEN_ACCECN_EMPTY + + TCPOLEN_ACCECN_COUNTER; + optlen += TCPOLEN_ACCECN_EMPTY + + TCPOLEN_ACCECN_COUNTER; + *optp++ = (char)(to->to_ee0b >> 16); + *optp++ = (char)(to->to_ee0b >> 8); + *optp++ = (char)(to->to_ee0b); + to->to_acceflags &= ~TOF_ACCE_E0; + continue; + } + if (max_len < (TCPOLEN_ACCECN_EMPTY + + 3 * TCPOLEN_ACCECN_COUNTER)) { + log(2, "aco: 2 couters option\n"); + if (to->to_acceflags & TOF_ACCE_E1) { + *optp++ = TCPOPT_ACCECN_1; + *optp++ = TCPOLEN_ACCECN_EMPTY + + 2 * TCPOLEN_ACCECN_COUNTER; + optlen += TCPOLEN_ACCECN_EMPTY + + 2 * TCPOLEN_ACCECN_COUNTER; + *optp++ = (char)(to->to_ee1b >> 16); + *optp++ = (char)(to->to_ee1b >> 8); + *optp++ = (char)(to->to_ee1b); + to->to_acceflags &= ~TOF_ACCE_E1; + *optp++ = (char)(to->to_eceb >> 16); + *optp++ = (char)(to->to_eceb >> 8); + *optp++ = (char)(to->to_eceb); + to->to_acceflags &= ~TOF_ACCE_CE; + continue; + } + *optp++ = TCPOPT_ACCECN_0; + *optp++ = TCPOLEN_ACCECN_EMPTY + + 2 * TCPOLEN_ACCECN_COUNTER; + optlen += TCPOLEN_ACCECN_EMPTY + + 2 * TCPOLEN_ACCECN_COUNTER; + *optp++ = (char)(to->to_ee0b >> 16); + *optp++ = (char)(to->to_ee0b >> 8); + *optp++ = (char)(to->to_ee0b); + to->to_acceflags &= ~TOF_ACCE_E0; + *optp++ = (char)(to->to_eceb >> 16); + *optp++ = (char)(to->to_eceb >> 8); + *optp++ = (char)(to->to_eceb); + to->to_acceflags &= ~TOF_ACCE_CE; + continue; + } + /* + * TCP option sufficient to hold full AccECN option + * but only send changed counters normally, + * full counters on ACKNOW + */ + log(2, "aco: full option\n"); + if (to->to_acceflags & TOF_ACCE_E1) { + log(2, "aco: e1 set\n"); + *optp++ = TCPOPT_ACCECN_1; + *optp++ = TCPOLEN_ACCECN_EMPTY + + 3 * TCPOLEN_ACCECN_COUNTER; + optlen += TCPOLEN_ACCECN_EMPTY + + 3 * TCPOLEN_ACCECN_COUNTER; + *optp++ = (char)(to->to_ee1b >> 16); + *optp++ = (char)(to->to_ee1b >> 8); + *optp++ = (char)(to->to_ee1b); + to->to_acceflags &= ~TOF_ACCE_E1; + *optp++ = (char)(to->to_eceb >> 16); + *optp++ = (char)(to->to_eceb >> 8); + *optp++ = (char)(to->to_eceb); + to->to_acceflags &= ~TOF_ACCE_CE; + *optp++ = (char)(to->to_ee0b >> 16); + *optp++ = (char)(to->to_ee0b >> 8); + *optp++ = (char)(to->to_ee0b); + to->to_acceflags &= ~TOF_ACCE_E0; + continue; + } else { + log(2, "aco: e1 not set\n"); + *optp++ = TCPOPT_ACCECN_0; + *optp++ = TCPOLEN_ACCECN_EMPTY + + 3 * TCPOLEN_ACCECN_COUNTER; + optlen += TCPOLEN_ACCECN_EMPTY + + 3 * TCPOLEN_ACCECN_COUNTER; + *optp++ = (char)(to->to_ee0b >> 16); + *optp++ = (char)(to->to_ee0b >> 8); + *optp++ = (char)(to->to_ee0b); + to->to_acceflags &= ~TOF_ACCE_E0; + *optp++ = (char)(to->to_eceb >> 16); + *optp++ = (char)(to->to_eceb >> 8); + *optp++ = (char)(to->to_eceb); + to->to_acceflags &= ~TOF_ACCE_CE; + *optp++ = (char)(to->to_ee1b >> 16); + *optp++ = (char)(to->to_ee1b >> 8); + *optp++ = (char)(to->to_ee1b); + to->to_acceflags &= ~TOF_ACCE_E1; + continue; + } + } +#else +#endif /* TCP_ACCECNOPT */ default: panic("%s: unknown TCP option type", __func__); break; diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -39,6 +39,7 @@ #include "opt_ipsec.h" #include "opt_kern_tls.h" #include "opt_tcpdebug.h" +#include "opt_accecn.h" #include #include @@ -2310,6 +2311,13 @@ tcp_log_tcpcbinit(tp); #endif tp->t_pacing_rate = -1; + if (V_tcp_do_lrd) + tp->t_flags |= TF_LRD; +#ifdef TCP_ACCECNOPT + tp->t_re0b = 1; + tp->t_re1b = 1; + tp->t_rceb = 0; +#endif if (tp->t_fb->tfb_tcp_fb_init) { if ((*tp->t_fb->tfb_tcp_fb_init)(tp)) { refcount_release(&tp->t_fb->tfb_refcnt); @@ -2322,8 +2330,6 @@ if (V_tcp_perconn_stats_enable == 1) tp->t_stats = stats_blob_alloc(V_tcp_perconn_stats_dflt_tpl, 0); #endif - if (V_tcp_do_lrd) - tp->t_flags |= TF_LRD; return (tp); /* XXX */ } diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c --- a/sys/netinet/tcp_syncache.c +++ b/sys/netinet/tcp_syncache.c @@ -38,6 +38,7 @@ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" +#include "opt_accecn.h" #include #include @@ -1956,7 +1957,34 @@ /* don't send cookie again when retransmitting response */ sc->sc_tfo_cookie = NULL; } +#if defined(TCP_ACCECNOPT) + if ((sc->sc_flags & SCF_ECN_MASK) && + ((sc->sc_flags & SCF_ECN_MASK) != SCF_ECN)) { + to.to_flags |= TOF_ACCECNOPT; + to.to_acceflags |= TOF_ACCE_E0 | + TOF_ACCE_E1 | + TOF_ACCE_CE; + to.to_ee0b = 1; + to.to_ee1b = 1; + to.to_eceb = 0; + to.to_acceflags |= TOF_ACCE_SYN; + } +#endif } +#if defined(TCP_ACCECNOPT) + else { + if ((sc->sc_flags & SCF_ECN_MASK) && + ((sc->sc_flags & SCF_ECN_MASK) != SCF_ECN)) { + to.to_flags |= TOF_ACCECNOPT; + to.to_acceflags |= TOF_ACCE_E0 | + TOF_ACCE_E1 | + TOF_ACCE_CE; + to.to_ee0b = 1; + to.to_ee1b = 1; + to.to_eceb = 0; + } + } +#endif if (sc->sc_flags & SCF_TIMESTAMP) { to.to_tsval = sc->sc_tsoff + tcp_ts_getticks(); to.to_tsecr = sc->sc_tsreflect; diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -249,8 +249,11 @@ int t_dupacks; /* consecutive dup acks recd */ int t_lognum; /* Number of log entries */ int t_loglimit; /* Maximum number of log entries */ - uint32_t t_rcep; /* Number of received CE marked packets */ - uint32_t t_scep; /* Synced number of delivered CE packets */ + uint32_t t_rcep; /* Number of received CE marked packets */ + uint32_t t_scep; /* Synced number of delivered CE packets */ + uint32_t t_re0b; /* Number of ECT0 marked data bytes */ + uint32_t t_re1b; /* Number of ECT1 marked data bytes */ + uint32_t t_rceb; /* Number of CE marked data bytes */ int64_t t_pacing_rate; /* bytes / sec, -1 => unlimited */ struct tcp_log_stailq t_logs; /* Log buffer */ struct tcp_log_id_node *t_lin; @@ -569,7 +572,12 @@ #define TF2_ECN_SND_CWR 0x00000040 /* ECN CWR in queue */ #define TF2_ECN_SND_ECE 0x00000080 /* ECN ECE in queue */ #define TF2_ACE_PERMIT 0x00000100 /* Accurate ECN mode */ -#define TF2_FBYTES_COMPLETE 0x00000400 /* We have first bytes in and out */ +#if defined(TCP_ACCECNOPT) +#define TF2_ACO_E0 0x00000200 /* EE0 counter changed */ +#define TF2_ACO_E1 0x00000400 /* EE1 counter changed */ +#define TF2_ACO_CE 0x00000800 /* ECE counter changed */ +#endif /* TCP_ACCECNOPT */ +#define TF2_FBYTES_COMPLETE 0x00001000 /* We have first bytes in and out */ /* * Structure to hold TCP options that are only used during segment * processing (in tcp_input), but not held in the tcpcb. @@ -587,7 +595,12 @@ #define TOF_SIGNATURE 0x0040 /* TCP-MD5 signature option (RFC2385) */ #define TOF_SACK 0x0080 /* Peer sent SACK option */ #define TOF_FASTOPEN 0x0100 /* TCP Fast Open (TFO) cookie */ +#if defined(TCP_ACCECNOPT) +#define TOF_ACCECNOPT 0x0200 /* AccECN Option */ +#define TOF_MAXOPT 0x0400 +#else #define TOF_MAXOPT 0x0200 +#endif /* TCP_ACCECNOPT */ u_int32_t to_tsval; /* new timestamp */ u_int32_t to_tsecr; /* reflected timestamp */ u_char *to_sacks; /* pointer to the first SACK blocks */ @@ -597,7 +610,19 @@ u_int8_t to_wscale; /* window scaling */ u_int8_t to_nsacks; /* number of SACK blocks */ u_int8_t to_tfo_len; /* TFO cookie length */ - u_int32_t to_spare; /* UTO */ +#if defined(TCP_ACCECNOPT) + u_int8_t to_acceflags; /* AccECN option flags */ +#define TOF_ACCE_SYN 0x01 /* send empty option */ +#define TOF_ACCE_CE 0x02 /* CE counter changed */ +#define TOF_ACCE_E0 0x04 /* E0 counter changed */ +#define TOF_ACCE_E1 0x08 /* E1 counter changed */ +#define TOF_ACCE_ACKNOW 0x10 /* send full option */ + u_int32_t to_ee0b; /* AccECN E0 marked bytes */ + u_int32_t to_ee1b; /* AccECN E1 marked bytes */ + u_int32_t to_eceb; /* AccECN CE marked bytes */ +#else + u_int32_t to_spare; /* UTO */ +#endif /* TCP_ACCECNOPT */ }; /*