Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F81969708
D4350.id10999.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
39 KB
Referenced Files
None
Subscribers
None
D4350.id10999.diff
View Options
Index: sys/conf/files
===================================================================
--- sys/conf/files
+++ sys/conf/files
@@ -3681,6 +3681,7 @@
netinet/sctputil.c optional inet sctp | inet6 sctp
netinet/siftr.c optional inet siftr alq | inet6 siftr alq
netinet/tcp_debug.c optional tcpdebug
+netinet/tcp_fastopen.c optional inet tcp_rfc7413 | inet6 tcp_rfc7413
netinet/tcp_hostcache.c optional inet | inet6
netinet/tcp_input.c optional inet | inet6
netinet/tcp_lro.c optional inet | inet6
Index: sys/conf/options
===================================================================
--- sys/conf/options
+++ sys/conf/options
@@ -439,6 +439,8 @@
TCPPCAP opt_global.h
SIFTR
TCP_OFFLOAD opt_inet.h # Enable code to dispatch TCP offloading
+TCP_RFC7413 opt_inet.h
+TCP_RFC7413_MAX_KEYS opt_inet.h
TCP_SIGNATURE opt_inet.h
VLAN_ARRAY opt_vlan.h
XBONEHACK
Index: sys/netinet/tcp.h
===================================================================
--- sys/netinet/tcp.h
+++ sys/netinet/tcp.h
@@ -97,6 +97,10 @@
#define TCPOLEN_TSTAMP_APPA (TCPOLEN_TIMESTAMP+2) /* appendix A */
#define TCPOPT_SIGNATURE 19 /* Keyed MD5: RFC 2385 */
#define TCPOLEN_SIGNATURE 18
+#define TCPOPT_FAST_OPEN 34
+#define TCPOLEN_FAST_OPEN_EMPTY 2
+#define TCPOLEN_FAST_OPEN_MIN 6
+#define TCPOLEN_FAST_OPEN_MAX 18
/* Miscellaneous constants */
#define MAX_SACK_BLKS 6 /* Max # SACK blocks stored at receiver side */
@@ -165,6 +169,7 @@
#define TCP_KEEPIDLE 256 /* L,N,X start keeplives after this period */
#define TCP_KEEPINTVL 512 /* L,N interval between keepalives */
#define TCP_KEEPCNT 1024 /* L,N number of keepalives before close */
+#define TCP_FASTOPEN 1025 /* enable TFO / was created via TFO */
#define TCP_PCAP_OUT 2048 /* number of output packets to keep */
#define TCP_PCAP_IN 4096 /* number of input packets to keep */
Index: sys/netinet/tcp_fastopen.h
===================================================================
--- sys/netinet/tcp_fastopen.h
+++ sys/netinet/tcp_fastopen.h
@@ -0,0 +1,47 @@
+/*-
+ * Copyright (c) 2015 Patrick Kelsey
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _TCP_FASTOPEN_H_
+#define _TCP_FASTOPEN_H_
+
+#ifdef _KERNEL
+
+#define TCP_FASTOPEN_COOKIE_LEN 8 /* tied to SipHash24 64-bit output */
+
+VNET_DECLARE(unsigned int, tcp_fastopen_enabled);
+#define V_tcp_fastopen_enabled VNET(tcp_fastopen_enabled)
+
+void tcp_fastopen_init(void);
+void tcp_fastopen_destroy(void);
+unsigned int *tcp_fastopen_alloc_counter(void);
+void tcp_fastopen_decrement_counter(unsigned int *counter);
+int tcp_fastopen_check_cookie(struct in_conninfo *inc, uint8_t *cookie,
+ unsigned int len, uint64_t *latest_cookie);
+#endif /* _KERNEL */
+
+#endif /* _TCP_FASTOPEN_H_ */
Index: sys/netinet/tcp_fastopen.c
===================================================================
--- sys/netinet/tcp_fastopen.c
+++ sys/netinet/tcp_fastopen.c
@@ -0,0 +1,442 @@
+/*-
+ * Copyright (c) 2015 Patrick Kelsey
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * This is a server-side implementation of TCP Fast Open (TFO) [RFC7413].
+ *
+ * This implementation is currently considered to be experimental and is not
+ * included in kernel builds by default. To include this code, add the
+ * following line to your kernel config:
+ *
+ * options TCP_RFC7413
+ *
+ * The generated TFO cookies are the 64-bit output of
+ * SipHash24(<16-byte-key><client-ip>). Multiple concurrent valid keys are
+ * supported so that time-based rolling cookie invalidation policies can be
+ * implemented in the system. The default number of concurrent keys is 2.
+ * This can be adjusted in the kernel config as follows:
+ *
+ * options TCP_RFC7413_MAX_KEYS=<num-keys>
+ *
+ *
+ * The following TFO-specific sysctls are defined:
+ *
+ * net.inet.tcp.fastopen.acceptany (RW, default 0)
+ * When non-zero, all client-supplied TFO cookies will be considered to
+ * be valid.
+ *
+ * net.inet.tcp.fastopen.autokey (RW, default 120)
+ * When this and net.inet.tcp.fastopen.enabled are non-zero, a new key
+ * will be automatically generated after this many seconds.
+ *
+ * net.inet.tcp.fastopen.enabled (RW, default 0)
+ * When zero, no new TFO connections can be created. On the transition
+ * from enabled to disabled, all installed keys are removed. On the
+ * transition from disabled to enabled, if net.inet.tcp.fastopen.autokey
+ * is non-zero and there are no keys installed, a new key will be
+ * generated immediately. The transition from enabled to disabled does
+ * not affect any TFO connections in progress; it only prevents new ones
+ * from being made.
+ *
+ * net.inet.tcp.fastopen.keylen (RO)
+ * The key length in bytes.
+ *
+ * net.inet.tcp.fastopen.maxkeys (RO)
+ * The maximum number of keys supported.
+ *
+ * net.inet.tcp.fastopen.numkeys (RO)
+ * The current number of keys installed.
+ *
+ * net.inet.tcp.fastopen.setkey (WO)
+ * Install a new key by writing net.inet.tcp.fastopen.keylen bytes to this
+ * sysctl.
+ *
+ *
+ * In order for TFO connections to be created via a listen socket, that
+ * socket must have the TCP_FASTOPEN socket option set on it. This option
+ * can be set on the socket either before or after the listen() is invoked.
+ * Clearing this option on a listen socket after it has been set has no
+ * effect on existing TFO connections or TFO connections in progress; it
+ * only prevents new TFO connections from being made.
+ *
+ * For passively-created sockets, the TCP_FASTOPEN socket option can be
+ * queried to determine whether the connection was established using TFO.
+ * Note that connections that are established via a TFO SYN, but that fall
+ * back to using a non-TFO SYN|ACK will have the TCP_FASTOPEN socket option
+ * set.
+ *
+ * Per the RFC, this implementation limits the number of TFO connections
+ * that can be in the SYN_RECEIVED state on a per listen-socket basis.
+ * Whenever this limit is exceeded, requests for new TFO connections are
+ * serviced as non-TFO requests. Without such a limit, given a valid TFO
+ * cookie, an attacker could keep the listen queue in an overflow condition
+ * using a TFO SYN flood. This implementation sets the limit at half the
+ * configured listen backlog.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_inet.h"
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/rmlock.h>
+#include <sys/socketvar.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+
+#include <crypto/siphash/siphash.h>
+
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/in_pcb.h>
+#include <netinet/tcp_fastopen.h>
+#include <netinet/tcp_var.h>
+
+
+#define TCP_FASTOPEN_KEY_LEN SIPHASH_KEY_LENGTH
+
+#if !defined(TCP_RFC7413_MAX_KEYS) || (TCP_RFC7413_MAX_KEYS < 1)
+#define TCP_FASTOPEN_MAX_KEYS 2
+#else
+#define TCP_FASTOPEN_MAX_KEYS TCP_RFC7413_MAX_KEYS
+#endif
+
+struct tcp_fastopen_keylist {
+ unsigned int newest;
+ uint8_t key[TCP_FASTOPEN_MAX_KEYS][TCP_FASTOPEN_KEY_LEN];
+};
+
+struct tcp_fastopen_callout {
+ struct callout c;
+ struct vnet *v;
+};
+
+SYSCTL_NODE(_net_inet_tcp, OID_AUTO, fastopen, CTLFLAG_RW, 0, "TCP Fast Open");
+
+static VNET_DEFINE(int, tcp_fastopen_acceptany) = 0;
+#define V_tcp_fastopen_acceptany VNET(tcp_fastopen_acceptany)
+SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, acceptany,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_fastopen_acceptany), 0,
+ "Accept any non-empty cookie");
+
+static VNET_DEFINE(unsigned int, tcp_fastopen_autokey) = 120;
+#define V_tcp_fastopen_autokey VNET(tcp_fastopen_autokey)
+static int sysctl_net_inet_tcp_fastopen_autokey(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, autokey,
+ CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, NULL, 0,
+ &sysctl_net_inet_tcp_fastopen_autokey, "IU",
+ "Number of seconds between auto-generation of a new key; zero disables");
+
+VNET_DEFINE(unsigned int, tcp_fastopen_enabled) = 0;
+static int sysctl_net_inet_tcp_fastopen_enabled(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, enabled,
+ CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, NULL, 0,
+ &sysctl_net_inet_tcp_fastopen_enabled, "IU",
+ "Enable/disable TCP Fast Open processing");
+
+SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, keylen,
+ CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_KEY_LEN,
+ "Key length in bytes");
+
+SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, maxkeys,
+ CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_MAX_KEYS,
+ "Maximum number of keys supported");
+
+static VNET_DEFINE(unsigned int, tcp_fastopen_numkeys) = 0;
+#define V_tcp_fastopen_numkeys VNET(tcp_fastopen_numkeys)
+SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, numkeys,
+ CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(tcp_fastopen_numkeys), 0,
+ "Number of keys installed");
+
+static int sysctl_net_inet_tcp_fastopen_setkey(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, setkey,
+ CTLFLAG_VNET | CTLTYPE_OPAQUE | CTLFLAG_WR, NULL, 0,
+ &sysctl_net_inet_tcp_fastopen_setkey, "",
+ "Install a new key");
+
+static VNET_DEFINE(struct rmlock, tcp_fastopen_keylock);
+#define V_tcp_fastopen_keylock VNET(tcp_fastopen_keylock)
+
+#define TCP_FASTOPEN_KEYS_RLOCK(t) rm_rlock(&V_tcp_fastopen_keylock, (t))
+#define TCP_FASTOPEN_KEYS_RUNLOCK(t) rm_runlock(&V_tcp_fastopen_keylock, (t))
+#define TCP_FASTOPEN_KEYS_WLOCK() rm_wlock(&V_tcp_fastopen_keylock)
+#define TCP_FASTOPEN_KEYS_WUNLOCK() rm_wunlock(&V_tcp_fastopen_keylock)
+
+static VNET_DEFINE(struct tcp_fastopen_keylist, tcp_fastopen_keys);
+#define V_tcp_fastopen_keys VNET(tcp_fastopen_keys)
+
+static VNET_DEFINE(struct tcp_fastopen_callout, tcp_fastopen_autokey_ctx);
+#define V_tcp_fastopen_autokey_ctx VNET(tcp_fastopen_autokey_ctx)
+
+static VNET_DEFINE(uma_zone_t, counter_zone);
+#define V_counter_zone VNET(counter_zone)
+
+void
+tcp_fastopen_init(void)
+{
+ V_counter_zone = uma_zcreate("tfo", sizeof(unsigned int),
+ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+ rm_init(&V_tcp_fastopen_keylock, "tfo_keylock");
+ callout_init_rm(&V_tcp_fastopen_autokey_ctx.c,
+ &V_tcp_fastopen_keylock, 0);
+ V_tcp_fastopen_keys.newest = TCP_FASTOPEN_MAX_KEYS - 1;
+}
+
+void
+tcp_fastopen_destroy(void)
+{
+ callout_drain(&V_tcp_fastopen_autokey_ctx.c);
+ rm_destroy(&V_tcp_fastopen_keylock);
+ uma_zdestroy(V_counter_zone);
+}
+
+unsigned int *
+tcp_fastopen_alloc_counter(void)
+{
+ unsigned int *counter;
+ counter = uma_zalloc(V_counter_zone, M_NOWAIT);
+ if (counter)
+ *counter = 1;
+ return (counter);
+}
+
+void
+tcp_fastopen_decrement_counter(unsigned int *counter)
+{
+ if (*counter == 1)
+ uma_zfree(V_counter_zone, counter);
+ else
+ atomic_subtract_int(counter, 1);
+}
+
+static void
+tcp_fastopen_addkey_locked(uint8_t *key)
+{
+
+ V_tcp_fastopen_keys.newest++;
+ if (V_tcp_fastopen_keys.newest == TCP_FASTOPEN_MAX_KEYS)
+ V_tcp_fastopen_keys.newest = 0;
+ memcpy(V_tcp_fastopen_keys.key[V_tcp_fastopen_keys.newest], key,
+ TCP_FASTOPEN_KEY_LEN);
+ if (V_tcp_fastopen_numkeys < TCP_FASTOPEN_MAX_KEYS)
+ V_tcp_fastopen_numkeys++;
+}
+
+static void
+tcp_fastopen_autokey_locked(void)
+{
+ uint8_t newkey[TCP_FASTOPEN_KEY_LEN];
+
+ arc4rand(newkey, TCP_FASTOPEN_KEY_LEN, 0);
+ tcp_fastopen_addkey_locked(newkey);
+}
+
+static void
+tcp_fastopen_autokey_callout(void *arg)
+{
+ struct tcp_fastopen_callout *ctx = arg;
+
+ CURVNET_SET(ctx->v);
+ tcp_fastopen_autokey_locked();
+ callout_reset(&ctx->c, V_tcp_fastopen_autokey * hz,
+ tcp_fastopen_autokey_callout, ctx);
+ CURVNET_RESTORE();
+}
+
+
+static uint64_t
+tcp_fastopen_make_cookie(uint8_t key[SIPHASH_KEY_LENGTH], struct in_conninfo *inc)
+{
+ SIPHASH_CTX ctx;
+ uint64_t siphash;
+
+ SipHash24_Init(&ctx);
+ SipHash_SetKey(&ctx, key);
+ switch (inc->inc_flags & INC_ISIPV6) {
+#ifdef INET
+ case 0:
+ SipHash_Update(&ctx, &inc->inc_faddr, sizeof(inc->inc_faddr));
+ break;
+#endif
+#ifdef INET6
+ case INC_ISIPV6:
+ SipHash_Update(&ctx, &inc->inc6_faddr, sizeof(inc->inc6_faddr));
+ break;
+#endif
+ }
+ SipHash_Final((u_int8_t *)&siphash, &ctx);
+
+ return (siphash);
+}
+
+
+/*
+ * Return values:
+ * -1 the cookie is invalid and no valid cookie is available
+ * 0 the cookie is invalid and the latest cookie has been returned
+ * 1 the cookie is valid and the latest cookie has been returned
+ */
+int
+tcp_fastopen_check_cookie(struct in_conninfo *inc, uint8_t *cookie,
+ unsigned int len, uint64_t *latest_cookie)
+{
+ struct rm_priotracker tracker;
+ unsigned int i, key_index;
+ uint64_t cur_cookie;
+
+ if (V_tcp_fastopen_acceptany) {
+ *latest_cookie = 0;
+ return (1);
+ }
+
+ if (len != TCP_FASTOPEN_COOKIE_LEN) {
+ if (V_tcp_fastopen_numkeys > 0) {
+ *latest_cookie =
+ tcp_fastopen_make_cookie(
+ V_tcp_fastopen_keys.key[V_tcp_fastopen_keys.newest],
+ inc);
+ return (0);
+ }
+ return (-1);
+ }
+
+ /*
+ * Check against each available key, from newest to oldest.
+ */
+ TCP_FASTOPEN_KEYS_RLOCK(&tracker);
+ key_index = V_tcp_fastopen_keys.newest;
+ for (i = 0; i < V_tcp_fastopen_numkeys; i++) {
+ cur_cookie =
+ tcp_fastopen_make_cookie(V_tcp_fastopen_keys.key[key_index],
+ inc);
+ if (i == 0)
+ *latest_cookie = cur_cookie;
+ if (memcmp(cookie, &cur_cookie, TCP_FASTOPEN_COOKIE_LEN) == 0) {
+ TCP_FASTOPEN_KEYS_RUNLOCK(&tracker);
+ return (1);
+ }
+ if (key_index == 0)
+ key_index = TCP_FASTOPEN_MAX_KEYS - 1;
+ else
+ key_index--;
+ }
+ TCP_FASTOPEN_KEYS_RUNLOCK(&tracker);
+
+ return (0);
+}
+
+static int
+sysctl_net_inet_tcp_fastopen_autokey(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ unsigned int new;
+
+ new = V_tcp_fastopen_autokey;
+ error = sysctl_handle_int(oidp, &new, 0, req);
+ if (error == 0 && req->newptr) {
+ if (new > (INT_MAX / hz))
+ return (EINVAL);
+
+ TCP_FASTOPEN_KEYS_WLOCK();
+ if (V_tcp_fastopen_enabled) {
+ if (V_tcp_fastopen_autokey && !new)
+ callout_stop(&V_tcp_fastopen_autokey_ctx.c);
+ else if (new)
+ callout_reset(&V_tcp_fastopen_autokey_ctx.c,
+ new * hz, tcp_fastopen_autokey_callout,
+ &V_tcp_fastopen_autokey_ctx);
+ }
+ V_tcp_fastopen_autokey = new;
+ TCP_FASTOPEN_KEYS_WUNLOCK();
+ }
+
+ return (error);
+}
+
+static int
+sysctl_net_inet_tcp_fastopen_enabled(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ unsigned int new;
+
+ new = V_tcp_fastopen_enabled;
+ error = sysctl_handle_int(oidp, &new, 0, req);
+ if (error == 0 && req->newptr) {
+ if (V_tcp_fastopen_enabled && !new) {
+ /* enabled -> disabled */
+ TCP_FASTOPEN_KEYS_WLOCK();
+ V_tcp_fastopen_numkeys = 0;
+ V_tcp_fastopen_keys.newest = TCP_FASTOPEN_MAX_KEYS - 1;
+ if (V_tcp_fastopen_autokey)
+ callout_stop(&V_tcp_fastopen_autokey_ctx.c);
+ V_tcp_fastopen_enabled = 0;
+ TCP_FASTOPEN_KEYS_WUNLOCK();
+ } else if (!V_tcp_fastopen_enabled && new) {
+ /* disabled -> enabled */
+ TCP_FASTOPEN_KEYS_WLOCK();
+ if (V_tcp_fastopen_autokey &&
+ (V_tcp_fastopen_numkeys == 0)) {
+ tcp_fastopen_autokey_locked();
+ callout_reset(&V_tcp_fastopen_autokey_ctx.c,
+ V_tcp_fastopen_autokey * hz,
+ tcp_fastopen_autokey_callout,
+ &V_tcp_fastopen_autokey_ctx);
+ }
+ V_tcp_fastopen_enabled = 1;
+ TCP_FASTOPEN_KEYS_WUNLOCK();
+ }
+ }
+ return (error);
+}
+
+static int
+sysctl_net_inet_tcp_fastopen_setkey(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ uint8_t newkey[TCP_FASTOPEN_KEY_LEN];
+
+ if (req->oldptr != NULL || req->oldlen != 0)
+ return (EINVAL);
+ if (req->newptr == NULL)
+ return (EPERM);
+ if (req->newlen != sizeof(newkey))
+ return (EINVAL);
+ error = SYSCTL_IN(req, newkey, sizeof(newkey));
+ if (error)
+ return (error);
+
+ TCP_FASTOPEN_KEYS_WLOCK();
+ tcp_fastopen_addkey_locked(newkey);
+ TCP_FASTOPEN_KEYS_WUNLOCK();
+
+ return (0);
+}
Index: sys/netinet/tcp_input.c
===================================================================
--- sys/netinet/tcp_input.c
+++ sys/netinet/tcp_input.c
@@ -98,6 +98,9 @@
#include <netinet6/in6_pcb.h>
#include <netinet6/ip6_var.h>
#include <netinet6/nd6.h>
+#ifdef TCP_RFC7413
+#include <netinet/tcp_fastopen.h>
+#endif
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
@@ -1019,7 +1022,8 @@
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
#endif
if (!((tp->t_state == TCPS_ESTABLISHED && (thflags & TH_SYN) == 0) ||
- (tp->t_state == TCPS_LISTEN && (thflags & TH_SYN)))) {
+ (tp->t_state == TCPS_LISTEN && (thflags & TH_SYN) &&
+ !(tp->t_flags & TF_FASTOPEN)))) {
if (ti_locked == TI_UNLOCKED) {
if (INP_INFO_TRY_RLOCK(&V_tcbinfo) == 0) {
in_pcbref(inp);
@@ -1111,6 +1115,9 @@
rstreason = BANDLIM_RST_OPENPORT;
goto dropwithreset;
}
+#ifdef TCP_RFC7413
+new_tfo_socket:
+#endif
if (so == NULL) {
/*
* We completed the 3-way handshake
@@ -1373,7 +1380,12 @@
#endif
TCP_PROBE3(debug__input, tp, th, mtod(m, const char *));
tcp_dooptions(&to, optp, optlen, TO_SYN);
+#ifdef TCP_RFC7413
+ if (syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL))
+ goto new_tfo_socket;
+#else
syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL);
+#endif
/*
* Entry added to syncache and mbuf consumed.
* Only the listen socket is unlocked by syncache_add().
@@ -1488,7 +1500,8 @@
struct in_conninfo *inc;
struct mbuf *mfree;
struct tcpopt to;
-
+ int tfo_syn;
+
#ifdef TCPDEBUG
/*
* The size of tcp_saveipgen must be the size of the max ip header,
@@ -1940,6 +1953,28 @@
rstreason = BANDLIM_RST_OPENPORT;
goto dropwithreset;
}
+#ifdef TCP_RFC7413
+ if (tp->t_flags & TF_FASTOPEN) {
+ /*
+ * When a TFO connection is in SYN_RECEIVED, the
+ * only valid packets are the initial SYN, a
+ * retransmit/copy of the initial SYN (possibly with
+ * a subset of the original data), a valid ACK, a
+ * FIN, or a RST.
+ */
+ if ((thflags & (TH_SYN|TH_ACK)) == (TH_SYN|TH_ACK)) {
+ rstreason = BANDLIM_RST_OPENPORT;
+ goto dropwithreset;
+ } else if (thflags & TH_SYN) {
+ /* non-initial SYN is ignored */
+ if ((tcp_timer_active(tp, TT_DELACK) ||
+ tcp_timer_active(tp, TT_REXMT)))
+ goto drop;
+ } else if (!(thflags & (TH_ACK|TH_FIN|TH_RST))) {
+ goto drop;
+ }
+ }
+#endif
break;
/*
@@ -2155,7 +2190,8 @@
* RFC5961 Section 4.2
* Send challenge ACK for any SYN in synchronized state.
*/
- if ((thflags & TH_SYN) && tp->t_state != TCPS_SYN_SENT) {
+ if ((thflags & TH_SYN) && tp->t_state != TCPS_SYN_SENT &&
+ tp->t_state != TCPS_SYN_RECEIVED) {
KASSERT(ti_locked == TI_RLOCKED,
("tcp_do_segment: TH_SYN ti_locked %d", ti_locked));
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
@@ -2349,9 +2385,16 @@
*/
if ((thflags & TH_ACK) == 0) {
if (tp->t_state == TCPS_SYN_RECEIVED ||
- (tp->t_flags & TF_NEEDSYN))
+ (tp->t_flags & TF_NEEDSYN)) {
+#ifdef TCP_RFC7413
+ if (tp->t_state == TCPS_SYN_RECEIVED &&
+ tp->t_flags & TF_FASTOPEN) {
+ tp->snd_wnd = tiwin;
+ cc_conn_init(tp);
+ }
+#endif
goto step6;
- else if (tp->t_flags & TF_ACKNOW)
+ } else if (tp->t_flags & TF_ACKNOW)
goto dropafterack;
else
goto drop;
@@ -2368,7 +2411,6 @@
* The ACK was checked above.
*/
case TCPS_SYN_RECEIVED:
-
TCPSTAT_INC(tcps_connects);
soisconnected(so);
/* Do window scaling? */
@@ -2390,7 +2432,27 @@
tcp_state_change(tp, TCPS_ESTABLISHED);
TCP_PROBE5(accept__established, NULL, tp,
mtod(m, const char *), tp, th);
- cc_conn_init(tp);
+#ifdef TCP_RFC7413
+ if (tp->t_tfo_pending) {
+ tcp_fastopen_decrement_counter(tp->t_tfo_pending);
+ tp->t_tfo_pending = NULL;
+
+ /*
+ * Account for the ACK of our SYN prior to
+ * regular ACK processing below.
+ */
+ tp->snd_una++;
+ }
+ /*
+ * TFO connections call cc_conn_init() during SYN
+ * processing. Calling it again here for such
+ * connections is not harmless as it would undo the
+ * snd_cwnd reduction that occurs when a TFO SYN|ACK
+ * is retransmitted.
+ */
+ if (!(tp->t_flags & TF_FASTOPEN))
+#endif
+ cc_conn_init(tp);
tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp));
}
/*
@@ -2656,6 +2718,7 @@
INP_WLOCK_ASSERT(tp->t_inpcb);
acked = BYTES_THIS_ACK(tp, th);
+
TCPSTAT_INC(tcps_rcvackpack);
TCPSTAT_ADD(tcps_rcvackbyte, acked);
@@ -2914,9 +2977,12 @@
* case PRU_RCVD). If a FIN has already been received on this
* connection then we just ignore the text.
*/
- if ((tlen || (thflags & TH_FIN)) &&
+ tfo_syn = ((tp->t_state == TCPS_SYN_RECEIVED) &&
+ (tp->t_flags & TF_FASTOPEN));
+ if ((tlen || (thflags & TH_FIN) || tfo_syn) &&
TCPS_HAVERCVDFIN(tp->t_state) == 0) {
tcp_seq save_start = th->th_seq;
+
m_adj(m, drop_hdrlen); /* delayed header drop */
/*
* Insert segment which includes th into TCP reassembly queue
@@ -2932,8 +2998,9 @@
*/
if (th->th_seq == tp->rcv_nxt &&
LIST_EMPTY(&tp->t_segq) &&
- TCPS_HAVEESTABLISHED(tp->t_state)) {
- if (DELAY_ACK(tp, tlen))
+ (TCPS_HAVEESTABLISHED(tp->t_state) ||
+ tfo_syn)) {
+ if (DELAY_ACK(tp, tlen) || tfo_syn)
tp->t_flags |= TF_DELACK;
else
tp->t_flags |= TF_ACKNOW;
@@ -3288,6 +3355,21 @@
to->to_sacks = cp + 2;
TCPSTAT_INC(tcps_sack_rcv_blocks);
break;
+#ifdef TCP_RFC7413
+ case TCPOPT_FAST_OPEN:
+ if ((optlen != TCPOLEN_FAST_OPEN_EMPTY) &&
+ (optlen < TCPOLEN_FAST_OPEN_MIN) &&
+ (optlen > TCPOLEN_FAST_OPEN_MAX))
+ continue;
+ if (!(flags & TO_SYN))
+ continue;
+ if (!V_tcp_fastopen_enabled)
+ continue;
+ to->to_flags |= TOF_FASTOPEN;
+ to->to_tfo_len = optlen - 2;
+ to->to_tfo_cookie = to->to_tfo_len ? cp + 2 : NULL;
+ break;
+#endif
default:
continue;
}
Index: sys/netinet/tcp_output.c
===================================================================
--- sys/netinet/tcp_output.c
+++ sys/netinet/tcp_output.c
@@ -68,6 +68,9 @@
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
#endif
+#ifdef TCP_RFC7413
+#include <netinet/tcp_fastopen.h>
+#endif
#define TCPOUTFLAGS
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
@@ -204,6 +207,17 @@
return (tcp_offload_output(tp));
#endif
+#ifdef TCP_RFC7413
+ /*
+ * For TFO connections in SYN_RECEIVED, only allow the initial
+ * SYN|ACK and those sent by the retransmit timer.
+ */
+ if ((tp->t_flags & TF_FASTOPEN) &&
+ (tp->t_state == TCPS_SYN_RECEIVED) &&
+ SEQ_GT(tp->snd_max, tp->snd_una) && /* inital SYN|ACK sent */
+ (tp->snd_nxt != tp->snd_una)) /* not a retransmit */
+ return (0);
+#endif
/*
* Determine length of data that should be transmitted,
* and flags that will be used.
@@ -390,6 +404,15 @@
if ((flags & TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una)) {
if (tp->t_state != TCPS_SYN_RECEIVED)
flags &= ~TH_SYN;
+#ifdef TCP_RFC7413
+ /*
+ * When sending additional segments following a TFO SYN|ACK,
+ * do not include the SYN bit.
+ */
+ if ((tp->t_flags & TF_FASTOPEN) &&
+ (tp->t_state == TCPS_SYN_RECEIVED))
+ flags &= ~TH_SYN;
+#endif
off--, len++;
}
@@ -403,6 +426,17 @@
flags &= ~TH_FIN;
}
+#ifdef TCP_RFC7413
+ /*
+ * When retransmitting SYN|ACK on a passively-created TFO socket,
+ * don't include data, as the presence of data may have caused the
+ * original SYN|ACK to have been dropped by a middlebox.
+ */
+ if ((tp->t_flags & TF_FASTOPEN) &&
+ (((tp->t_state == TCPS_SYN_RECEIVED) && (tp->t_rxtshift > 0)) ||
+ (flags & TH_RST)))
+ len = 0;
+#endif
if (len <= 0) {
/*
* If FIN has been sent but not acked,
@@ -725,6 +759,22 @@
tp->snd_nxt = tp->iss;
to.to_mss = tcp_mssopt(&tp->t_inpcb->inp_inc);
to.to_flags |= TOF_MSS;
+#ifdef TCP_RFC7413
+ /*
+ * Only include the TFO option on the first
+ * transmission of the SYN|ACK on a
+ * passively-created TFO socket, as the presence of
+ * the TFO option may have caused the original
+ * SYN|ACK to have been dropped by a middlebox.
+ */
+ if ((tp->t_flags & TF_FASTOPEN) &&
+ (tp->t_state == TCPS_SYN_RECEIVED) &&
+ (tp->t_rxtshift == 0)) {
+ to.to_tfo_len = TCP_FASTOPEN_COOKIE_LEN;
+ to.to_tfo_cookie = (u_char *)&tp->t_tfo_cookie;
+ to.to_flags |= TOF_FASTOPEN;
+ }
+#endif
}
/* Window scaling. */
if ((flags & TH_SYN) && (tp->t_flags & TF_REQ_SCALE)) {
@@ -1004,7 +1054,7 @@
* give data to the user when a buffer fills or
* a PUSH comes in.)
*/
- if (off + len == sbused(&so->so_snd))
+ if ((off + len == sbused(&so->so_snd)) && !(flags & TH_SYN))
flags |= TH_PUSH;
SOCKBUF_UNLOCK(&so->so_snd);
} else {
@@ -1711,6 +1761,25 @@
TCPSTAT_INC(tcps_sack_send_blocks);
break;
}
+#ifdef TCP_RFC7413
+ case TOF_FASTOPEN:
+ {
+ int total_len;
+
+ /* XXX is there any point to aligning this option? */
+ total_len = TCPOLEN_FAST_OPEN_EMPTY + to->to_tfo_len;
+ if (TCP_MAXOLEN - optlen < total_len)
+ continue;
+ *optp++ = TCPOPT_FAST_OPEN;
+ *optp++ = total_len;
+ if (to->to_tfo_len > 0) {
+ bcopy(to->to_tfo_cookie, optp, to->to_tfo_len);
+ optp += to->to_tfo_len;
+ }
+ optlen += total_len;
+ break;
+ }
+#endif
default:
panic("%s: unknown TCP option type", __func__);
break;
Index: sys/netinet/tcp_subr.c
===================================================================
--- sys/netinet/tcp_subr.c
+++ sys/netinet/tcp_subr.c
@@ -83,6 +83,9 @@
#include <netinet6/nd6.h>
#endif
+#ifdef TCP_RFC7413
+#include <netinet/tcp_fastopen.h>
+#endif
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
@@ -433,6 +436,10 @@
#ifdef TCPPCAP
tcp_pcap_init();
#endif
+
+#ifdef TCP_RFC7413
+ tcp_fastopen_init();
+#endif
}
#ifdef VIMAGE
@@ -441,6 +448,9 @@
{
int error;
+#ifdef TCP_RFC7413
+ tcp_fastopen_destroy();
+#endif
tcp_hc_destroy();
syncache_destroy();
tcp_tw_destroy();
@@ -1135,6 +1145,17 @@
if (tp->t_state == TCPS_LISTEN)
tcp_offload_listen_stop(tp);
#endif
+#ifdef TCP_RFC7413
+ /*
+ * This releases the TFO pending counter resource for TFO listen
+ * sockets as well as passively-created TFO sockets that transition
+ * from SYN_RECEIVED to CLOSED.
+ */
+ if (tp->t_tfo_pending) {
+ tcp_fastopen_decrement_counter(tp->t_tfo_pending);
+ tp->t_tfo_pending = NULL;
+ }
+#endif
in_pcbdrop(inp);
TCPSTAT_INC(tcps_closed);
KASSERT(inp->inp_socket != NULL, ("tcp_close: inp_socket NULL"));
Index: sys/netinet/tcp_syncache.h
===================================================================
--- sys/netinet/tcp_syncache.h
+++ sys/netinet/tcp_syncache.h
@@ -41,7 +41,7 @@
void syncache_unreach(struct in_conninfo *, struct tcphdr *);
int syncache_expand(struct in_conninfo *, struct tcpopt *,
struct tcphdr *, struct socket **, struct mbuf *);
-void syncache_add(struct in_conninfo *, struct tcpopt *,
+int syncache_add(struct in_conninfo *, struct tcpopt *,
struct tcphdr *, struct inpcb *, struct socket **, struct mbuf *,
void *, void *);
void syncache_chkrst(struct in_conninfo *, struct tcphdr *);
@@ -74,7 +74,9 @@
#endif
struct label *sc_label; /* MAC label reference */
struct ucred *sc_cred; /* cred cache for jail checks */
-
+#ifdef TCP_RFC7413
+ void *sc_tfo_cookie; /* for TCP Fast Open response */
+#endif
void *sc_pspare; /* TCP_SIGNATURE */
u_int32_t sc_spare[2]; /* UTO */
};
Index: sys/netinet/tcp_syncache.c
===================================================================
--- sys/netinet/tcp_syncache.c
+++ sys/netinet/tcp_syncache.c
@@ -80,6 +80,9 @@
#include <netinet6/in6_pcb.h>
#endif
#include <netinet/tcp.h>
+#ifdef TCP_RFC7413
+#include <netinet/tcp_fastopen.h>
+#endif
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
@@ -1061,6 +1064,39 @@
return (0);
}
+#ifdef TCP_RFC7413
+static void
+syncache_tfo_expand(struct syncache *sc, struct socket **lsop, struct mbuf *m,
+ uint64_t response_cookie)
+{
+ struct inpcb *inp;
+ struct tcpcb *tp;
+ unsigned int *pending_counter;
+
+ /*
+ * Global TCP locks are held because we manipulate the PCB lists
+ * and create a new socket.
+ */
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+
+ pending_counter = intotcpcb(sotoinpcb(*lsop))->t_tfo_pending;
+ *lsop = syncache_socket(sc, *lsop, m);
+ if (*lsop == NULL) {
+ TCPSTAT_INC(tcps_sc_aborted);
+ atomic_subtract_int(pending_counter, 1);
+ } else {
+ inp = sotoinpcb(*lsop);
+ tp = intotcpcb(inp);
+ tp->t_flags |= TF_FASTOPEN;
+ tp->t_tfo_cookie = response_cookie;
+ tp->snd_max = tp->iss;
+ tp->snd_nxt = tp->iss;
+ tp->t_tfo_pending = pending_counter;
+ TCPSTAT_INC(tcps_sc_completed);
+ }
+}
+#endif /* TCP_RFC7413 */
+
/*
* Given a LISTEN socket and an inbound SYN request, add
* this to the syn cache, and send back a segment:
@@ -1073,8 +1109,15 @@
* DoS attack, an attacker could send data which would eventually
* consume all available buffer space if it were ACKed. By not ACKing
* the data, we avoid this DoS scenario.
+ *
+ * The exception to the above is when a SYN with a valid TCP Fast Open (TFO)
+ * cookie is processed, V_tcp_fastopen_enabled set to true, and the
+ * TCP_FASTOPEN socket option is set. In this case, a new socket is created
+ * and returned via lsop, the mbuf is not freed so that tcp_input() can
+ * queue its data to the socket, and 1 is returned to indicate the
+ * TFO-socket-creation path was taken.
*/
-void
+int
syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
struct inpcb *inp, struct socket **lsop, struct mbuf *m, void *tod,
void *todctx)
@@ -1087,6 +1130,7 @@
u_int ltflags;
int win, sb_hiwat, ip_ttl, ip_tos;
char *s;
+ int rv = 0;
#ifdef INET6
int autoflowlabel = 0;
#endif
@@ -1095,6 +1139,11 @@
#endif
struct syncache scs;
struct ucred *cred;
+#ifdef TCP_RFC7413
+ uint64_t tfo_response_cookie;
+ int tfo_cookie_valid = 0;
+ int tfo_response_cookie_valid = 0;
+#endif
INP_WLOCK_ASSERT(inp); /* listen socket */
KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_SYN,
@@ -1119,6 +1168,29 @@
sb_hiwat = so->so_rcv.sb_hiwat;
ltflags = (tp->t_flags & (TF_NOOPT | TF_SIGNATURE));
+#ifdef TCP_RFC7413
+ if (V_tcp_fastopen_enabled && (tp->t_flags & TF_FASTOPEN) &&
+ (tp->t_tfo_pending != NULL) && (to->to_flags & TOF_FASTOPEN)) {
+ /*
+ * Limit the number of pending TFO connections to
+ * approximately half of the queue limit. This prevents TFO
+ * SYN floods from starving the service by filling the
+ * listen queue with bogus TFO connections.
+ */
+ if (atomic_fetchadd_int(tp->t_tfo_pending, 1) <=
+ (so->so_qlimit / 2)) {
+ int result;
+
+ result = tcp_fastopen_check_cookie(inc,
+ to->to_tfo_cookie, to->to_tfo_len,
+ &tfo_response_cookie);
+ tfo_cookie_valid = (result > 0);
+ tfo_response_cookie_valid = (result >= 0);
+ } else
+ atomic_subtract_int(tp->t_tfo_pending, 1);
+ }
+#endif
+
/* By the time we drop the lock these should no longer be used. */
so = NULL;
tp = NULL;
@@ -1130,7 +1202,10 @@
} else
mac_syncache_create(maclabel, inp);
#endif
- INP_WUNLOCK(inp);
+#ifdef TCP_RFC7413
+ if (!tfo_cookie_valid)
+#endif
+ INP_WUNLOCK(inp);
/*
* Remember the IP options, if any.
@@ -1159,6 +1234,10 @@
sc = syncache_lookup(inc, &sch); /* returns locked entry */
SCH_LOCK_ASSERT(sch);
if (sc != NULL) {
+#ifdef TCP_RFC7413
+ if (tfo_cookie_valid)
+ INP_WUNLOCK(inp);
+#endif
TCPSTAT_INC(tcps_sc_dupsyn);
if (ipopts) {
/*
@@ -1201,6 +1280,14 @@
goto done;
}
+#ifdef TCP_RFC7413
+ if (tfo_cookie_valid) {
+ bzero(&scs, sizeof(scs));
+ sc = &scs;
+ goto skip_alloc;
+ }
+#endif
+
sc = uma_zalloc(V_tcp_syncache.zone, M_NOWAIT | M_ZERO);
if (sc == NULL) {
/*
@@ -1224,7 +1311,13 @@
}
}
}
-
+
+#ifdef TCP_RFC7413
+skip_alloc:
+ if (!tfo_cookie_valid && tfo_response_cookie_valid)
+ sc->sc_tfo_cookie = &tfo_response_cookie;
+#endif
+
/*
* Fill in the syncache values.
*/
@@ -1332,6 +1425,15 @@
#endif
SCH_UNLOCK(sch);
+#ifdef TCP_RFC7413
+ if (tfo_cookie_valid) {
+ syncache_tfo_expand(sc, lsop, m, tfo_response_cookie);
+ /* INP_WUNLOCK(inp) will be performed by the called */
+ rv = 1;
+ goto tfo_done;
+ }
+#endif
+
/*
* Do a standard 3-way handshake.
*/
@@ -1349,17 +1451,20 @@
}
done:
+ if (m) {
+ *lsop = NULL;
+ m_freem(m);
+ }
+#ifdef TCP_RFC7413
+tfo_done:
+#endif
if (cred != NULL)
crfree(cred);
#ifdef MAC
if (sc == &scs)
mac_syncache_destroy(&maclabel);
#endif
- if (m) {
-
- *lsop = NULL;
- m_freem(m);
- }
+ return (rv);
}
static int
@@ -1511,6 +1616,16 @@
}
}
#endif
+
+#ifdef TCP_RFC7413
+ if (sc->sc_tfo_cookie) {
+ to.to_flags |= TOF_FASTOPEN;
+ to.to_tfo_len = TCP_FASTOPEN_COOKIE_LEN;
+ to.to_tfo_cookie = sc->sc_tfo_cookie;
+ /* don't send cookie again when retransmitting response */
+ sc->sc_tfo_cookie = NULL;
+ }
+#endif
optlen = tcp_addoptions(&to, (u_char *)(th + 1));
/* Adjust headers by option size. */
Index: sys/netinet/tcp_timer.c
===================================================================
--- sys/netinet/tcp_timer.c
+++ sys/netinet/tcp_timer.c
@@ -643,7 +643,8 @@
} else
tp->t_flags &= ~TF_PREVVALID;
TCPSTAT_INC(tcps_rexmttimeo);
- if (tp->t_state == TCPS_SYN_SENT)
+ if ((tp->t_state == TCPS_SYN_SENT) ||
+ (tp->t_state == TCPS_SYN_RECEIVED))
rexmt = TCPTV_RTOBASE * tcp_syn_backoff[tp->t_rxtshift];
else
rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
Index: sys/netinet/tcp_usrreq.c
===================================================================
--- sys/netinet/tcp_usrreq.c
+++ sys/netinet/tcp_usrreq.c
@@ -81,6 +81,9 @@
#include <netinet6/ip6_var.h>
#include <netinet6/scope6_var.h>
#endif
+#ifdef TCP_RFC7413
+#include <netinet/tcp_fastopen.h>
+#endif
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
@@ -404,6 +407,10 @@
}
SOCK_UNLOCK(so);
+#ifdef TCP_RFC7413
+ if (tp->t_flags & TF_FASTOPEN)
+ tp->t_tfo_pending = tcp_fastopen_alloc_counter();
+#endif
out:
TCPDEBUG2(PRU_LISTEN);
TCP_PROBE2(debug__user, tp, PRU_LISTEN);
@@ -450,6 +457,10 @@
}
SOCK_UNLOCK(so);
+#ifdef TCP_RFC7413
+ if (tp->t_flags & TF_FASTOPEN)
+ tp->t_tfo_pending = tcp_fastopen_alloc_counter();
+#endif
out:
TCPDEBUG2(PRU_LISTEN);
TCP_PROBE2(debug__user, tp, PRU_LISTEN);
@@ -804,6 +815,18 @@
}
tp = intotcpcb(inp);
TCPDEBUG1();
+#ifdef TCP_RFC7413
+ /*
+ * For passively-created TFO connections, don't attempt a window
+ * update while still in SYN_RECEIVED as this may trigger an early
+ * SYN|ACK. It is preferable to have the SYN|ACK be sent along with
+ * application response data, or failing that, when the DELACK timer
+ * expires.
+ */
+ if ((tp->t_flags & TF_FASTOPEN) &&
+ (tp->t_state == TCPS_SYN_RECEIVED))
+ goto out;
+#endif
#ifdef TCP_OFFLOAD
if (tp->t_flags & TF_TOE)
tcp_offload_rcvd(tp);
@@ -1599,6 +1622,29 @@
goto unlock_and_done;
#endif
+#ifdef TCP_RFC7413
+ case TCP_FASTOPEN:
+ INP_WUNLOCK(inp);
+ if (!V_tcp_fastopen_enabled)
+ return (EPERM);
+
+ error = sooptcopyin(sopt, &optval, sizeof optval,
+ sizeof optval);
+ if (error)
+ return (error);
+
+ INP_WLOCK_RECHECK(inp);
+ if (optval) {
+ tp->t_flags |= TF_FASTOPEN;
+ if ((tp->t_state == TCPS_LISTEN) &&
+ (tp->t_tfo_pending == NULL))
+ tp->t_tfo_pending =
+ tcp_fastopen_alloc_counter();
+ } else
+ tp->t_flags &= ~TF_FASTOPEN;
+ goto unlock_and_done;
+#endif
+
default:
INP_WUNLOCK(inp);
error = ENOPROTOOPT;
@@ -1678,6 +1724,14 @@
error = sooptcopyout(sopt, &optval, sizeof optval);
break;
#endif
+
+#ifdef TCP_RFC7413
+ case TCP_FASTOPEN:
+ optval = tp->t_flags & TF_FASTOPEN;
+ INP_WUNLOCK(inp);
+ error = sooptcopyout(sopt, &optval, sizeof optval);
+ break;
+#endif
default:
INP_WUNLOCK(inp);
error = ENOPROTOOPT;
@@ -2001,6 +2055,10 @@
db_printf("%sTF_ECN_PERMIT", comma ? ", " : "");
comma = 1;
}
+ if (t_flags & TF_FASTOPEN) {
+ db_printf("%sTF_FASTOPEN", comma ? ", " : "");
+ comma = 1;
+ }
}
static void
Index: sys/netinet/tcp_var.h
===================================================================
--- sys/netinet/tcp_var.h
+++ sys/netinet/tcp_var.h
@@ -207,8 +207,18 @@
u_int t_pmtud_saved_maxopd; /* pre-blackhole MSS */
u_int t_flags2; /* More tcpcb flags storage */
+#if defined(_KERNEL) && defined(TCP_RFC7413)
+ uint32_t t_ispare[6]; /* 5 UTO, 1 TBD */
+ uint64_t t_tfo_cookie; /* TCP Fast Open cookie */
+#else
uint32_t t_ispare[8]; /* 5 UTO, 3 TBD */
+#endif
+#if defined(_KERNEL) && defined(TCP_RFC7413)
+ void *t_pspare2[3]; /* 1 TCP_SIGNATURE, 2 TBD */
+ unsigned int *t_tfo_pending; /* TCP Fast Open pending counter */
+#else
void *t_pspare2[4]; /* 1 TCP_SIGNATURE, 3 TBD */
+#endif
#if defined(_KERNEL) && defined(TCPPCAP)
struct mbufq t_inpkts; /* List of saved input packets. */
struct mbufq t_outpkts; /* List of saved output packets. */
@@ -254,6 +264,7 @@
#define TF_ECN_SND_ECE 0x10000000 /* ECN ECE in queue */
#define TF_CONGRECOVERY 0x20000000 /* congestion recovery mode */
#define TF_WASCRECOVERY 0x40000000 /* was in congestion recovery */
+#define TF_FASTOPEN 0x80000000 /* TCP Fast Open indication */
#define IN_FASTRECOVERY(t_flags) (t_flags & TF_FASTRECOVERY)
#define ENTER_FASTRECOVERY(t_flags) t_flags |= TF_FASTRECOVERY
@@ -313,14 +324,17 @@
#define TOF_TS 0x0010 /* timestamp */
#define TOF_SIGNATURE 0x0040 /* TCP-MD5 signature option (RFC2385) */
#define TOF_SACK 0x0080 /* Peer sent SACK option */
-#define TOF_MAXOPT 0x0100
+#define TOF_FASTOPEN 0x0100 /* TCP Fast Open (TFO) cookie */
+#define TOF_MAXOPT 0x0200
u_int32_t to_tsval; /* new timestamp */
u_int32_t to_tsecr; /* reflected timestamp */
u_char *to_sacks; /* pointer to the first SACK blocks */
u_char *to_signature; /* pointer to the TCP-MD5 signature */
+ u_char *to_tfo_cookie; /* pointer to the TFO cookie */
u_int16_t to_mss; /* maximum segment size */
u_int8_t to_wscale; /* window scaling */
u_int8_t to_nsacks; /* number of SACK blocks */
+ u_int8_t to_tfo_len; /* TFO cookie length */
u_int32_t to_spare; /* UTO */
};
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Dec 15, 9:33 AM (12 h, 22 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
9091611
Default Alt Text
D4350.id10999.diff (39 KB)
Attached To
Mode
D4350: TCP Fast Open (TFO) [RFC7413] Server-side Implementation
Attached
Detach File
Event Timeline
Log In to Comment