Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F145563042
D3687.1778202330.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Flag For Later
Award Token
Size
17 KB
Referenced Files
None
Subscribers
None
D3687.1778202330.diff
View Options
Index: lib/libc/sys/getsockopt.2
===================================================================
--- lib/libc/sys/getsockopt.2
+++ lib/libc/sys/getsockopt.2
@@ -28,7 +28,7 @@
.\" @(#)getsockopt.2 8.4 (Berkeley) 5/2/95
.\" $FreeBSD$
.\"
-.Dd April 5, 2013
+.Dd August 4, 2016
.Dt GETSOCKOPT 2
.Os
.Sh NAME
@@ -187,6 +187,7 @@
.It Dv SO_LISTENQLEN Ta "get complete queue length of the socket (get only)"
.It Dv SO_LISTENINCQLEN Ta "get incomplete queue length of the socket (get only)"
.It Dv SO_USER_COOKIE Ta "set the 'so_user_cookie' value for the socket (uint32_t, set only)"
+.It Dv SO_MAX_PACING_RATE "set the maximum transmit rate in bytes per second for the socket"
.El
.Pp
.Dv SO_DEBUG
@@ -496,6 +497,11 @@
returns the number of unaccepted complete connections.
.Dv SO_LISTENINCQLEN
returns the number of unaccepted incomplete connections.
+.Pp
+.Dv SO_MAX_PACING_RATE
+instructs the socket and underlying network adapter layers that the
+transmit rate should be limited to the given unsigned 32-bit value in
+bytes per second.
.Sh RETURN VALUES
.Rv -std
.Sh ERRORS
Index: sbin/ifconfig/ifconfig.8
===================================================================
--- sbin/ifconfig/ifconfig.8
+++ sbin/ifconfig/ifconfig.8
@@ -28,7 +28,7 @@
.\" From: @(#)ifconfig.8 8.3 (Berkeley) 1/5/94
.\" $FreeBSD$
.\"
-.Dd June 8, 2016
+.Dd August 2, 2016
.Dt IFCONFIG 8
.Os
.Sh NAME
@@ -454,6 +454,8 @@
and 802.11g
.Pq Cm 11g
operating modes.
+.It Cm txrtlmt
+Set if the driver supports TX rate limiting.
.It Cm inst Ar minst , Cm instance Ar minst
Set the media instance to
.Ar minst .
Index: sbin/ifconfig/ifconfig.c
===================================================================
--- sbin/ifconfig/ifconfig.c
+++ sbin/ifconfig/ifconfig.c
@@ -1145,7 +1145,7 @@
"\020\1RXCSUM\2TXCSUM\3NETCONS\4VLAN_MTU\5VLAN_HWTAGGING\6JUMBO_MTU\7POLLING" \
"\10VLAN_HWCSUM\11TSO4\12TSO6\13LRO\14WOL_UCAST\15WOL_MCAST\16WOL_MAGIC" \
"\17TOE4\20TOE6\21VLAN_HWFILTER\23VLAN_HWTSO\24LINKSTATE\25NETMAP" \
-"\26RXCSUM_IPV6\27TXCSUM_IPV6"
+"\26RXCSUM_IPV6\27TXCSUM_IPV6\31TXRTLMT"
/*
* Print the status of the interface. If an address family was
@@ -1453,6 +1453,8 @@
DEF_CMD("-wol_mcast", -IFCAP_WOL_MCAST, setifcap),
DEF_CMD("wol_magic", IFCAP_WOL_MAGIC, setifcap),
DEF_CMD("-wol_magic", -IFCAP_WOL_MAGIC, setifcap),
+ DEF_CMD("txrtlmt", IFCAP_TXRTLMT, setifcap),
+ DEF_CMD("-txrtlmt", -IFCAP_TXRTLMT, setifcap),
DEF_CMD("normal", -IFF_LINK0, setifflags),
DEF_CMD("compress", IFF_LINK0, setifflags),
DEF_CMD("noicmp", IFF_LINK1, setifflags),
Index: sys/conf/NOTES
===================================================================
--- sys/conf/NOTES
+++ sys/conf/NOTES
@@ -616,6 +616,8 @@
options INET #Internet communications protocols
options INET6 #IPv6 communications protocols
+options RATELIMIT # TX rate limiting support
+
options ROUTETABLES=2 # allocated fibs up to 65536. default is 1.
# but that would be a bad idea as they are large.
Index: sys/conf/config.mk
===================================================================
--- sys/conf/config.mk
+++ sys/conf/config.mk
@@ -19,6 +19,10 @@
opt_inet6.h:
@echo "#define INET6 1" > ${.TARGET}
.endif
+.if ${MK_RATELIMIT} != "no"
+opt_ratelimit.h:
+ @echo "#define RATELIMIT 1" > ${.TARGET}
+.endif
.if ${MK_EISA} != "no"
opt_eisa.h:
@echo "#define DEV_EISA 1" > ${.TARGET}
Index: sys/conf/kern.opts.mk
===================================================================
--- sys/conf/kern.opts.mk
+++ sys/conf/kern.opts.mk
@@ -47,7 +47,8 @@
EISA \
EXTRA_TCP_STACKS \
NAND \
- OFED
+ OFED \
+ RATELIMIT
# Some options are totally broken on some architectures. We disable
# them. If you need to enable them on an experimental basis, you
Index: sys/conf/options
===================================================================
--- sys/conf/options
+++ sys/conf/options
@@ -408,6 +408,7 @@
BOOTP_WIRED_TO opt_bootp.h
DEVICE_POLLING
DUMMYNET opt_ipdn.h
+RATELIMIT opt_ratelimit.h
INET opt_inet.h
INET6 opt_inet6.h
IPDIVERT
Index: sys/kern/uipc_socket.c
===================================================================
--- sys/kern/uipc_socket.c
+++ sys/kern/uipc_socket.c
@@ -105,6 +105,7 @@
#include "opt_inet.h"
#include "opt_inet6.h"
+#include "opt_ratelimit.h"
#include "opt_compat.h"
#include <sys/param.h>
@@ -2678,6 +2679,18 @@
#endif
break;
+ case SO_MAX_PACING_RATE:
+#ifdef RATELIMIT
+ error = sooptcopyin(sopt, &val32, sizeof(val32),
+ sizeof(val32));
+ if (error)
+ goto bad;
+ so->so_max_pacing_rate = val32;
+#else
+ error = EOPNOTSUPP;
+#endif
+ break;
+
default:
if (V_socket_hhh[HHOOK_SOCKET_OPT]->hhh_nhooks > 0)
error = hhook_run_socket(so, sopt,
@@ -2733,6 +2746,9 @@
#ifdef MAC
struct mac extmac;
#endif
+#ifdef RATELIMIT
+ uint32_t val32;
+#endif
CURVNET_SET(so->so_vnet);
error = 0;
@@ -2865,6 +2881,15 @@
optval = so->so_incqlen;
goto integer;
+ case SO_MAX_PACING_RATE:
+#ifdef RATELIMIT
+ val32 = so->so_max_pacing_rate;
+ error = sooptcopyout(sopt, &val32, sizeof(val32));
+#else
+ error = EOPNOTSUPP;
+#endif
+ break;
+
default:
if (V_socket_hhh[HHOOK_SOCKET_OPT]->hhh_nhooks > 0)
error = hhook_run_socket(so, sopt,
Index: sys/net/if.h
===================================================================
--- sys/net/if.h
+++ sys/net/if.h
@@ -239,6 +239,7 @@
#define IFCAP_RXCSUM_IPV6 0x200000 /* can offload checksum on IPv6 RX */
#define IFCAP_TXCSUM_IPV6 0x400000 /* can offload checksum on IPv6 TX */
#define IFCAP_HWSTATS 0x800000 /* manages counters internally */
+#define IFCAP_TXRTLMT 0x1000000 /* hardware supports TX rate limiting */
#define IFCAP_HWCSUM_IPV6 (IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6)
@@ -371,6 +372,16 @@
};
/*
+ * Interface to create/delete/modify/get TX rate limiting.
+ */
+struct ifreq_txrtlmt {
+ uint32_t txringid_max_rate;
+ uint32_t txringid;
+ uint32_t txringid_flowid;
+ uint32_t txringid_flowtype;
+};
+
+/*
* Interface request structure used for socket
* ioctl's. All interface ioctl's must have parameter
* definitions which begin with ifr_name. The
Index: sys/net/if.c
===================================================================
--- sys/net/if.c
+++ sys/net/if.c
@@ -2767,6 +2767,17 @@
ifr = (struct ifreq *)data;
switch (cmd) {
+ /*
+ * The TX rate limiting IOCTLs should only be used
+ * within the kernel. Prevent user-space from using
+ * them:
+ */
+ case SIOCARATECTL:
+ case SIOCSRATECTL:
+ case SIOCDRATECTL:
+ CURVNET_RESTORE();
+ return (EOPNOTSUPP);
+
#ifdef VIMAGE
case SIOCSIFRVNET:
error = priv_check(td, PRIV_NET_SETIFVNET);
Index: sys/netinet/in_pcb.h
===================================================================
--- sys/netinet/in_pcb.h
+++ sys/netinet/in_pcb.h
@@ -202,10 +202,13 @@
u_char inp_ip_minttl; /* (i) minimum TTL or drop */
uint32_t inp_flowid; /* (x) flow id / queue id */
u_int inp_refcount; /* (i) refcount */
- void *inp_pspare[5]; /* (x) packet pacing / general use */
+ struct ifnet *inp_txringid_ifp; /* (i) ifp of TX ring ID */
+ void *inp_pspare[4]; /* (x) packet pacing / general use */
uint32_t inp_flowtype; /* (x) M_HASHTYPE value */
uint32_t inp_rss_listen_bucket; /* (x) overridden RSS listen bucket */
- u_int inp_ispare[4]; /* (x) packet pacing / user cookie /
+ uint32_t inp_txringid_max_rate; /* (i) driver TX ring ID rate */
+ uint32_t inp_txringid; /* (i) driver TX ring ID */
+ u_int inp_ispare[2]; /* (x) packet pacing / user cookie /
* general use */
/* Local and foreign ports, local and foreign addr. */
@@ -736,6 +739,9 @@
struct sockaddr *
in_sockaddr(in_port_t port, struct in_addr *addr);
void in_pcbsosetlabel(struct socket *so);
+#ifdef RATELIMIT
+void in_pcboutput_txrtlmt(struct inpcb *, struct ifnet *, struct mbuf *);
+#endif
#endif /* _KERNEL */
#endif /* !_NETINET_IN_PCB_H_ */
Index: sys/netinet/in_pcb.c
===================================================================
--- sys/netinet/in_pcb.c
+++ sys/netinet/in_pcb.c
@@ -42,6 +42,7 @@
#include "opt_ipsec.h"
#include "opt_inet.h"
#include "opt_inet6.h"
+#include "opt_ratelimit.h"
#include "opt_pcbgroup.h"
#include "opt_rss.h"
@@ -57,6 +58,7 @@
#include <sys/rmlock.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
+#include <sys/sockio.h>
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/refcount.h>
@@ -136,6 +138,9 @@
#define V_ipport_tcplastcount VNET(ipport_tcplastcount)
static void in_pcbremlists(struct inpcb *inp);
+#ifdef RATELIMIT
+static void in_pcbdetach_txrtlmt(struct inpcb *inp);
+#endif
#ifdef INET
static struct inpcb *in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo,
struct in_addr faddr, u_int fport_arg,
@@ -1140,6 +1145,10 @@
KASSERT(inp->inp_socket != NULL, ("%s: inp_socket == NULL", __func__));
+#ifdef RATELIMIT
+ if (inp->inp_txringid_ifp != NULL)
+ in_pcbdetach_txrtlmt(inp);
+#endif
inp->inp_socket->so_pcb = NULL;
inp->inp_socket = NULL;
}
@@ -2683,3 +2692,193 @@
db_print_inpcb(inp, "inpcb", 0);
}
#endif /* DDB */
+
+#ifdef RATELIMIT
+/*
+ * Modify existing TX rate limit on inp_txringid_ifp and update
+ * inpcb info:
+ */
+static int
+in_pcbmodify_txrtlmt(struct inpcb *inp, struct ifnet *ifp,
+ uint32_t max_pacing_rate)
+{
+ struct ifreq_txrtlmt req;
+ int error;
+
+ INP_WLOCK_ASSERT(inp);
+
+ req.txringid_max_rate = max_pacing_rate;
+ req.txringid = inp->inp_txringid;
+ req.txringid_flowid = inp->inp_flowid;
+ req.txringid_flowtype = inp->inp_flowtype;
+
+ error = ifp->if_ioctl(ifp, SIOCSRATECTL, (caddr_t)&req);
+
+ if (error)
+ return (error);
+
+ inp->inp_txringid_max_rate = max_pacing_rate;
+ return (0);
+}
+
+/*
+ * Create a TX rate limit on ifp and attach it to inpcb:
+ */
+static int
+in_pcbattach_txrtlmt(struct inpcb *inp, struct ifnet *ifp,
+ uint32_t max_pacing_rate)
+{
+ struct ifreq_txrtlmt req;
+ int error;
+
+ INP_WLOCK_ASSERT(inp);
+ KASSERT(inp->inp_txringid_ifp == NULL,
+ ("%s: inp_txringid_ifp != NULL", __func__));
+
+ req.txringid_max_rate = max_pacing_rate;
+ req.txringid_flowid = inp->inp_flowid;
+ req.txringid_flowtype = inp->inp_flowtype;
+
+ if_ref(ifp);
+ error = ifp->if_ioctl(ifp, SIOCARATECTL, (caddr_t)&req);
+
+ if (error) {
+ if_rele(ifp);
+ return (error);
+ }
+
+ inp->inp_txringid_ifp = ifp;
+ inp->inp_txringid_max_rate = max_pacing_rate;
+ inp->inp_txringid = req.txringid;
+ return (0);
+}
+
+/*
+ * Remove TX rate limit from inp_txringid_ifp and detach it from
+ * the inpcb:
+ */
+static void
+in_pcbdetach_txrtlmt(struct inpcb *inp)
+{
+ struct ifreq_txrtlmt req;
+ struct ifnet *ifp;
+
+ INP_WLOCK_ASSERT(inp);
+
+ KASSERT(inp->inp_txringid_ifp != NULL,
+ ("%s: inp->inp_txringid_ifp == NULL", __func__));
+
+ ifp = inp->inp_txringid_ifp;
+ req.txringid = inp->inp_txringid;
+ req.txringid_flowid = inp->inp_flowid;
+ req.txringid_flowtype = inp->inp_flowtype;
+
+ inp->inp_txringid_ifp = NULL;
+ inp->inp_txringid = 0;
+ inp->inp_txringid_max_rate = 0;
+
+ /*
+ * If the device was detached while we still had reference on
+ * ifp, we assume if_dead() was called and replaced callbacks
+ * with stubs.
+ */
+ ifp->if_ioctl(ifp, SIOCDRATECTL, (caddr_t)&req);
+ if_rele(ifp);
+}
+
+/*
+ * Track route changes and modify the TX rate limit hint in the given
+ * mbuf to match what the network driver expects.
+ */
+void
+in_pcboutput_txrtlmt(struct inpcb *inp, struct ifnet *ifp, struct mbuf *mb)
+{
+ struct socket *socket;
+ uint32_t max_pacing_rate;
+ int error;
+
+ if (inp == NULL)
+ return;
+
+ socket = inp->inp_socket;
+ if (socket == NULL)
+ return;
+
+ /*
+ * NOTE: The so_max_pacing_rate value is read unlocked,
+ * because atomic updates are not required since the variable
+ * is checked at every mbuf we send. It is assumed that the
+ * variable read itself will be atomic.
+ */
+ max_pacing_rate = socket->so_max_pacing_rate;
+
+ if (max_pacing_rate == 0 && inp->inp_txringid_ifp == NULL)
+ return;
+
+ /*
+ * In order to utilize packet pacing with RSS, we need to wait
+ * until there is a valid RSS hash before we can proceed:
+ */
+ if (inp->inp_flowtype == M_HASHTYPE_NONE)
+ return;
+
+ /*
+ * NOTE: If we have a referece on the ifp, a new ifp can't be
+ * created at the same memory address of the old ifp. This
+ * lets us insure that if we transmit on one interface and its
+ * module is unloaded and then loaded, we won't try to
+ * transmit on an invalid ring on the new ifp, but first we
+ * delete the ring on the old ifp, and then will create a new
+ * one on the new ifp.
+ */
+ if (ifp != inp->inp_txringid_ifp) {
+ bool wlocked = INP_WLOCKED(inp);
+
+ if (!wlocked) {
+ /*
+ * NOTE: If the write locking fails, we need
+ * to bail out and use the non-ratelimited
+ * ring for the transmit until there is a new
+ * chance to write lock.
+ */
+ if (!INP_TRY_UPGRADE(inp))
+ return;
+ }
+
+ if (inp->inp_txringid_ifp != NULL)
+ in_pcbdetach_txrtlmt(inp);
+
+ error = in_pcbattach_txrtlmt(inp, ifp, max_pacing_rate);
+
+ if (!wlocked)
+ INP_DOWNGRADE(inp);
+ if (error)
+ return;
+
+ } else if (inp->inp_txringid_max_rate != max_pacing_rate) {
+ bool wlocked = INP_WLOCKED(inp);
+
+ if (!wlocked) {
+ /*
+ * NOTE: If the write locking fails, use the
+ * current pacing rate until there is a new
+ * chance to write lock:
+ */
+ if (!INP_TRY_UPGRADE(inp))
+ goto done;
+ }
+
+ error = in_pcbmodify_txrtlmt(inp, ifp, max_pacing_rate);
+ if (!wlocked)
+ INP_DOWNGRADE(inp);
+ if (error)
+ goto done; /* use old rate */
+ }
+done:
+ /*
+ * Update the flow ID and RSS hash for the transmitted mbuf.
+ */
+ mb->m_pkthdr.flowid = inp->inp_txringid;
+ M_HASHTYPE_SET(mb, M_HASHTYPE_TXRTLMT);
+}
+#endif /* RATELIMIT */
Index: sys/netinet/ip_output.c
===================================================================
--- sys/netinet/ip_output.c
+++ sys/netinet/ip_output.c
@@ -33,6 +33,7 @@
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
+#include "opt_ratelimit.h"
#include "opt_ipsec.h"
#include "opt_mbuf_stress_test.h"
#include "opt_mpath.h"
@@ -657,7 +658,12 @@
* to avoid confusing lower layers.
*/
m_clrprotoflags(m);
+
IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL);
+#ifdef RATELIMIT
+ if (ifp->if_capabilities & IFCAP_TXRTLMT)
+ in_pcboutput_txrtlmt(inp, ifp, m);
+#endif
error = (*ifp->if_output)(ifp, m,
(const struct sockaddr *)gw, ro);
goto done;
@@ -694,6 +700,10 @@
m_clrprotoflags(m);
IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL);
+#ifdef RATELIMIT
+ if (ifp->if_capabilities & IFCAP_TXRTLMT)
+ in_pcboutput_txrtlmt(inp, ifp, m);
+#endif
error = (*ifp->if_output)(ifp, m,
(const struct sockaddr *)gw, ro);
} else
Index: sys/netinet6/ip6_output.c
===================================================================
--- sys/netinet6/ip6_output.c
+++ sys/netinet6/ip6_output.c
@@ -65,6 +65,7 @@
#include "opt_inet.h"
#include "opt_inet6.h"
+#include "opt_ratelimit.h"
#include "opt_ipsec.h"
#include "opt_sctp.h"
#include "opt_route.h"
@@ -944,6 +945,10 @@
m->m_pkthdr.len);
ifa_free(&ia6->ia_ifa);
}
+#ifdef RATELIMIT
+ if (ifp->if_capabilities & IFCAP_TXRTLMT)
+ in_pcboutput_txrtlmt(inp, ifp, m);
+#endif
error = nd6_output_ifp(ifp, origifp, m, dst,
(struct route *)ro);
goto done;
@@ -1044,6 +1049,10 @@
counter_u64_add(ia->ia_ifa.ifa_obytes,
m->m_pkthdr.len);
}
+#ifdef RATELIMIT
+ if (ifp->if_capabilities & IFCAP_TXRTLMT)
+ in_pcboutput_txrtlmt(inp, ifp, m);
+#endif
error = nd6_output_ifp(ifp, origifp, m, dst,
(struct route *)ro);
} else
Index: sys/sys/mbuf.h
===================================================================
--- sys/sys/mbuf.h
+++ sys/sys/mbuf.h
@@ -344,6 +344,7 @@
#define M_HASHTYPE_RSS_UDP_IPV6_EX M_HASHTYPE_HASH(10)/* IPv6 UDP 4-tuple +
* ext hdrs */
+#define M_HASHTYPE_TXRTLMT 62 /* rate limited TX traffic */
#define M_HASHTYPE_OPAQUE 63 /* ordering, not affinity */
#define M_HASHTYPE_OPAQUE_HASH M_HASHTYPE_HASH(M_HASHTYPE_OPAQUE)
/* ordering+hash, not affinity*/
Index: sys/sys/socket.h
===================================================================
--- sys/sys/socket.h
+++ sys/sys/socket.h
@@ -158,6 +158,7 @@
#define SO_USER_COOKIE 0x1015 /* user cookie (dummynet etc.) */
#define SO_PROTOCOL 0x1016 /* get socket protocol (Linux name) */
#define SO_PROTOTYPE SO_PROTOCOL /* alias for SO_PROTOCOL (SunOS name) */
+#define SO_MAX_PACING_RATE 0x1017 /* set max TX pacing rate per socket */
#endif
/*
Index: sys/sys/socketvar.h
===================================================================
--- sys/sys/socketvar.h
+++ sys/sys/socketvar.h
@@ -79,6 +79,7 @@
void *so_pcb; /* protocol control block */
struct vnet *so_vnet; /* (a) network stack instance */
struct protosw *so_proto; /* (a) protocol handle */
+ uint32_t so_max_pacing_rate; /* (f) TX pacing rate info */
/*
* Variables for connection queuing.
* Socket where accepts occur is so_head in all subsidiary sockets.
Index: sys/sys/sockio.h
===================================================================
--- sys/sys/sockio.h
+++ sys/sys/sockio.h
@@ -133,4 +133,8 @@
#define SIOCGIFGMEMB _IOWR('i', 138, struct ifgroupreq) /* get members */
#define SIOCGIFXMEDIA _IOWR('i', 139, struct ifmediareq) /* get net xmedia */
+#define SIOCARATECTL _IOWR('i', 140, struct ifreq_txrtlmt) /* add tx rate limit */
+#define SIOCSRATECTL _IOWR('i', 141, struct ifreq_txrtlmt) /* set tx rate limit */
+#define SIOCDRATECTL _IOW('i', 142, struct ifreq_txrtlmt) /* del tx rate limit */
+
#endif /* !_SYS_SOCKIO_H_ */
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Fri, May 8, 1:05 AM (18 h, 6 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28584075
Default Alt Text
D3687.1778202330.diff (17 KB)
Attached To
Mode
D3687: Implement kernel support for hardware rate limited sockets
Attached
Detach File
Event Timeline
Log In to Comment