Page MenuHomeFreeBSD

D3687.1775567295.diff
No OneTemporary

Size
29 KB
Referenced Files
None
Subscribers
None

D3687.1775567295.diff

Index: lib/libc/sys/getsockopt.2
===================================================================
--- lib/libc/sys/getsockopt.2
+++ lib/libc/sys/getsockopt.2
@@ -28,7 +28,7 @@
.\" @(#)getsockopt.2 8.4 (Berkeley) 5/2/95
.\" $FreeBSD$
.\"
-.Dd April 5, 2013
+.Dd October 11, 2016
.Dt GETSOCKOPT 2
.Os
.Sh NAME
@@ -187,6 +187,7 @@
.It Dv SO_LISTENQLEN Ta "get complete queue length of the socket (get only)"
.It Dv SO_LISTENINCQLEN Ta "get incomplete queue length of the socket (get only)"
.It Dv SO_USER_COOKIE Ta "set the 'so_user_cookie' value for the socket (uint32_t, set only)"
+.It Dv SO_MAX_PACING_RATE "set the maximum transmit rate in bytes per second for the socket"
.El
.Pp
.Dv SO_DEBUG
@@ -496,6 +497,11 @@
returns the number of unaccepted complete connections.
.Dv SO_LISTENINCQLEN
returns the number of unaccepted incomplete connections.
+.Pp
+.Dv SO_MAX_PACING_RATE
+instructs the socket and underlying network adapter layers that the
+transmit rate should be limited to the given unsigned 32-bit value in
+bytes per second.
.Sh RETURN VALUES
.Rv -std
.Sh ERRORS
Index: sbin/ifconfig/ifconfig.8
===================================================================
--- sbin/ifconfig/ifconfig.8
+++ sbin/ifconfig/ifconfig.8
@@ -28,7 +28,7 @@
.\" From: @(#)ifconfig.8 8.3 (Berkeley) 1/5/94
.\" $FreeBSD$
.\"
-.Dd September 17, 2016
+.Dd October 11, 2016
.Dt IFCONFIG 8
.Os
.Sh NAME
@@ -460,6 +460,8 @@
and 802.11g
.Pq Cm 11g
operating modes.
+.It Cm txrtlmt
+Set if the driver supports TX rate limiting.
.It Cm inst Ar minst , Cm instance Ar minst
Set the media instance to
.Ar minst .
Index: sbin/ifconfig/ifconfig.c
===================================================================
--- sbin/ifconfig/ifconfig.c
+++ sbin/ifconfig/ifconfig.c
@@ -1145,7 +1145,7 @@
"\020\1RXCSUM\2TXCSUM\3NETCONS\4VLAN_MTU\5VLAN_HWTAGGING\6JUMBO_MTU\7POLLING" \
"\10VLAN_HWCSUM\11TSO4\12TSO6\13LRO\14WOL_UCAST\15WOL_MCAST\16WOL_MAGIC" \
"\17TOE4\20TOE6\21VLAN_HWFILTER\23VLAN_HWTSO\24LINKSTATE\25NETMAP" \
-"\26RXCSUM_IPV6\27TXCSUM_IPV6"
+"\26RXCSUM_IPV6\27TXCSUM_IPV6\31TXRTLMT"
/*
* Print the status of the interface. If an address family was
@@ -1453,6 +1453,8 @@
DEF_CMD("-wol_mcast", -IFCAP_WOL_MCAST, setifcap),
DEF_CMD("wol_magic", IFCAP_WOL_MAGIC, setifcap),
DEF_CMD("-wol_magic", -IFCAP_WOL_MAGIC, setifcap),
+ DEF_CMD("txrtlmt", IFCAP_TXRTLMT, setifcap),
+ DEF_CMD("-txrtlmt", -IFCAP_TXRTLMT, setifcap),
DEF_CMD("normal", -IFF_LINK0, setifflags),
DEF_CMD("compress", IFF_LINK0, setifflags),
DEF_CMD("noicmp", IFF_LINK1, setifflags),
Index: sys/conf/NOTES
===================================================================
--- sys/conf/NOTES
+++ sys/conf/NOTES
@@ -616,6 +616,8 @@
options INET #Internet communications protocols
options INET6 #IPv6 communications protocols
+options RATELIMIT # TX rate limiting support
+
options ROUTETABLES=2 # allocated fibs up to 65536. default is 1.
# but that would be a bad idea as they are large.
Index: sys/conf/config.mk
===================================================================
--- sys/conf/config.mk
+++ sys/conf/config.mk
@@ -19,6 +19,10 @@
opt_inet6.h:
@echo "#define INET6 1" > ${.TARGET}
.endif
+.if ${MK_RATELIMIT} != "no"
+opt_ratelimit.h:
+ @echo "#define RATELIMIT 1" > ${.TARGET}
+.endif
.if ${MK_EISA} != "no"
opt_eisa.h:
@echo "#define DEV_EISA 1" > ${.TARGET}
Index: sys/conf/kern.opts.mk
===================================================================
--- sys/conf/kern.opts.mk
+++ sys/conf/kern.opts.mk
@@ -47,7 +47,8 @@
EISA \
EXTRA_TCP_STACKS \
NAND \
- OFED
+ OFED \
+ RATELIMIT
# Some options are totally broken on some architectures. We disable
# them. If you need to enable them on an experimental basis, you
Index: sys/conf/options
===================================================================
--- sys/conf/options
+++ sys/conf/options
@@ -410,6 +410,7 @@
BOOTP_WIRED_TO opt_bootp.h
DEVICE_POLLING
DUMMYNET opt_ipdn.h
+RATELIMIT opt_ratelimit.h
INET opt_inet.h
INET6 opt_inet6.h
IPDIVERT
Index: sys/kern/uipc_socket.c
===================================================================
--- sys/kern/uipc_socket.c
+++ sys/kern/uipc_socket.c
@@ -105,6 +105,7 @@
#include "opt_inet.h"
#include "opt_inet6.h"
+#include "opt_ratelimit.h"
#include "opt_compat.h"
#include <sys/param.h>
@@ -2679,6 +2680,18 @@
#endif
break;
+ case SO_MAX_PACING_RATE:
+#ifdef RATELIMIT
+ error = sooptcopyin(sopt, &val32, sizeof(val32),
+ sizeof(val32));
+ if (error)
+ goto bad;
+ so->so_max_pacing_rate = val32;
+#else
+ error = EOPNOTSUPP;
+#endif
+ break;
+
default:
if (V_socket_hhh[HHOOK_SOCKET_OPT]->hhh_nhooks > 0)
error = hhook_run_socket(so, sopt,
@@ -2734,6 +2747,9 @@
#ifdef MAC
struct mac extmac;
#endif
+#ifdef RATELIMIT
+ uint32_t val32;
+#endif
CURVNET_SET(so->so_vnet);
error = 0;
@@ -2866,6 +2882,15 @@
optval = so->so_incqlen;
goto integer;
+ case SO_MAX_PACING_RATE:
+#ifdef RATELIMIT
+ val32 = so->so_max_pacing_rate;
+ error = sooptcopyout(sopt, &val32, sizeof(val32));
+#else
+ error = EOPNOTSUPP;
+#endif
+ break;
+
default:
if (V_socket_hhh[HHOOK_SOCKET_OPT]->hhh_nhooks > 0)
error = hhook_run_socket(so, sopt,
Index: sys/modules/if_lagg/Makefile
===================================================================
--- sys/modules/if_lagg/Makefile
+++ sys/modules/if_lagg/Makefile
@@ -2,6 +2,6 @@
.PATH: ${.CURDIR}/../../net
KMOD= if_lagg
-SRCS= if_lagg.c ieee8023ad_lacp.c opt_inet.h opt_inet6.h
+SRCS= if_lagg.c ieee8023ad_lacp.c opt_inet.h opt_inet6.h opt_ratelimit.h
.include <bsd.kmod.mk>
Index: sys/modules/if_vlan/Makefile
===================================================================
--- sys/modules/if_vlan/Makefile
+++ sys/modules/if_vlan/Makefile
@@ -4,6 +4,6 @@
KMOD= if_vlan
SRCS= if_vlan.c
-SRCS+= opt_inet.h opt_vlan.h
+SRCS+= opt_inet.h opt_vlan.h opt_ratelimit.h
.include <bsd.kmod.mk>
Index: sys/net/ieee8023ad_lacp.h
===================================================================
--- sys/net/ieee8023ad_lacp.h
+++ sys/net/ieee8023ad_lacp.h
@@ -284,6 +284,9 @@
struct mbuf *lacp_input(struct lagg_port *, struct mbuf *);
struct lagg_port *lacp_select_tx_port(struct lagg_softc *, struct mbuf *);
+#ifdef RATELIMIT
+struct lagg_port *lacp_select_tx_port_by_hash(struct lagg_softc *, uint32_t);
+#endif
void lacp_attach(struct lagg_softc *);
void lacp_detach(void *);
void lacp_init(struct lagg_softc *);
Index: sys/net/ieee8023ad_lacp.c
===================================================================
--- sys/net/ieee8023ad_lacp.c
+++ sys/net/ieee8023ad_lacp.c
@@ -30,6 +30,8 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include "opt_ratelimit.h"
+
#include <sys/param.h>
#include <sys/callout.h>
#include <sys/eventhandler.h>
@@ -853,6 +855,35 @@
return (lp->lp_lagg);
}
+
+#ifdef RATELIMIT
+struct lagg_port *
+lacp_select_tx_port_by_hash(struct lagg_softc *sc, uint32_t flowid)
+{
+ struct lacp_softc *lsc = LACP_SOFTC(sc);
+ struct lacp_portmap *pm;
+ struct lacp_port *lp;
+ uint32_t hash;
+
+ if (__predict_false(lsc->lsc_suppress_distributing)) {
+ LACP_DPRINTF((NULL, "%s: waiting transit\n", __func__));
+ return (NULL);
+ }
+
+ pm = &lsc->lsc_pmap[lsc->lsc_activemap];
+ if (pm->pm_count == 0) {
+ LACP_DPRINTF((NULL, "%s: no active aggregator\n", __func__));
+ return (NULL);
+ }
+
+ hash = flowid >> sc->flowid_shift;
+ hash %= pm->pm_count;
+ lp = pm->pm_map[hash];
+
+ return (lp->lp_lagg);
+}
+#endif
+
/*
* lacp_suppress_distributing: drop transmit packets for a while
* to preserve packet ordering.
Index: sys/net/if.h
===================================================================
--- sys/net/if.h
+++ sys/net/if.h
@@ -239,6 +239,7 @@
#define IFCAP_RXCSUM_IPV6 0x200000 /* can offload checksum on IPv6 RX */
#define IFCAP_TXCSUM_IPV6 0x400000 /* can offload checksum on IPv6 TX */
#define IFCAP_HWSTATS 0x800000 /* manages counters internally */
+#define IFCAP_TXRTLMT 0x1000000 /* hardware supports TX rate limiting */
#define IFCAP_HWCSUM_IPV6 (IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6)
Index: sys/net/if_dead.c
===================================================================
--- sys/net/if_dead.c
+++ sys/net/if_dead.c
@@ -100,6 +100,30 @@
return (0);
}
+static int
+ifdead_snd_tag_alloc(struct ifnet *ifp, union if_snd_tag_alloc_params *params,
+ struct m_snd_tag **ppmt)
+{
+ return (EOPNOTSUPP);
+}
+
+static int
+ifdead_snd_tag_modify(struct m_snd_tag *pmt, union if_snd_tag_modify_params *params)
+{
+ return (EOPNOTSUPP);
+}
+
+static int
+ifdead_snd_tag_query(struct m_snd_tag *pmt, union if_snd_tag_query_params *params)
+{
+ return (EOPNOTSUPP);
+}
+
+static void
+ifdead_snd_tag_free(struct m_snd_tag *pmt)
+{
+}
+
void
if_dead(struct ifnet *ifp)
{
@@ -112,4 +136,8 @@
ifp->if_qflush = ifdead_qflush;
ifp->if_transmit = ifdead_transmit;
ifp->if_get_counter = ifdead_get_counter;
+ ifp->if_snd_tag_alloc = ifdead_snd_tag_alloc;
+ ifp->if_snd_tag_modify = ifdead_snd_tag_modify;
+ ifp->if_snd_tag_query = ifdead_snd_tag_query;
+ ifp->if_snd_tag_free = ifdead_snd_tag_free;
}
Index: sys/net/if_lagg.c
===================================================================
--- sys/net/if_lagg.c
+++ sys/net/if_lagg.c
@@ -23,6 +23,7 @@
#include "opt_inet.h"
#include "opt_inet6.h"
+#include "opt_ratelimit.h"
#include <sys/param.h>
#include <sys/kernel.h>
@@ -118,6 +119,11 @@
static void lagg_init(void *);
static void lagg_stop(struct lagg_softc *);
static int lagg_ioctl(struct ifnet *, u_long, caddr_t);
+#ifdef RATELIMIT
+static int lagg_snd_tag_alloc(struct ifnet *,
+ union if_snd_tag_alloc_params *,
+ struct m_snd_tag **);
+#endif
static int lagg_ether_setmulti(struct lagg_softc *);
static int lagg_ether_cmdmulti(struct lagg_port *, int);
static int lagg_setflag(struct lagg_port *, int, int,
@@ -503,7 +509,12 @@
ifp->if_ioctl = lagg_ioctl;
ifp->if_get_counter = lagg_get_counter;
ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST;
+#ifdef RATELIMIT
+ ifp->if_snd_tag_alloc = lagg_snd_tag_alloc;
+ ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS | IFCAP_TXRTLMT;
+#else
ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS;
+#endif
/*
* Attach as an ordinary ethernet device, children will be attached
@@ -1549,6 +1560,52 @@
return (error);
}
+#ifdef RATELIMIT
+static int
+lagg_snd_tag_alloc(struct ifnet *ifp,
+ union if_snd_tag_alloc_params *params,
+ struct m_snd_tag **ppmt)
+{
+ struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
+ struct lagg_port *lp;
+ struct lagg_lb *lb;
+ uint32_t p;
+
+ switch (sc->sc_proto) {
+ case LAGG_PROTO_FAILOVER:
+ lp = lagg_link_active(sc, sc->sc_primary);
+ break;
+ case LAGG_PROTO_LOADBALANCE:
+ if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) == 0 ||
+ params->hdr.flowtype == M_HASHTYPE_NONE)
+ return (EOPNOTSUPP);
+ p = params->hdr.flowid >> sc->flowid_shift;
+ p %= sc->sc_count;
+ lb = (struct lagg_lb *)sc->sc_psc;
+ lp = lb->lb_ports[p];
+ lp = lagg_link_active(sc, lp);
+ break;
+ case LAGG_PROTO_LACP:
+ if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) == 0 ||
+ params->hdr.flowtype == M_HASHTYPE_NONE)
+ return (EOPNOTSUPP);
+ lp = lacp_select_tx_port_by_hash(sc, params->hdr.flowid);
+ break;
+ default:
+ return (EOPNOTSUPP);
+ }
+ if (lp == NULL)
+ return (EOPNOTSUPP);
+ ifp = lp->lp_ifp;
+ if (ifp == NULL || ifp->if_snd_tag_alloc == NULL ||
+ (ifp->if_capenable & IFCAP_TXRTLMT) == 0)
+ return (EOPNOTSUPP);
+
+ /* forward allocation request */
+ return (ifp->if_snd_tag_alloc(ifp, params, ppmt));
+}
+#endif
+
static int
lagg_ether_setmulti(struct lagg_softc *sc)
{
Index: sys/net/if_var.h
===================================================================
--- sys/net/if_var.h
+++ sys/net/if_var.h
@@ -175,6 +175,49 @@
#define IFENCAP_FLAG_BROADCAST 0x02 /* Destination is broadcast */
+/*
+ * Network interface send tag support. The storage of "struct
+ * m_snd_tag" comes from the network driver and it is free to allocate
+ * as much additional space as it wants for its own use.
+ */
+struct m_snd_tag;
+
+#define IF_SND_TAG_TYPE_RATE_LIMIT 0
+#define IF_SND_TAG_TYPE_MAX 1
+
+struct if_snd_tag_alloc_header {
+ uint32_t type; /* send tag type, see IF_SND_TAG_XXX */
+ uint32_t flowid; /* mbuf hash value */
+ uint32_t flowtype; /* mbuf hash type */
+};
+
+struct if_snd_tag_alloc_rate_limit {
+ struct if_snd_tag_alloc_header hdr;
+ uint64_t max_rate; /* in bytes/s */
+};
+
+struct if_snd_tag_rate_limit_params {
+ uint64_t max_rate; /* in bytes/s */
+};
+
+union if_snd_tag_alloc_params {
+ struct if_snd_tag_alloc_header hdr;
+ struct if_snd_tag_alloc_rate_limit rate_limit;
+};
+
+union if_snd_tag_modify_params {
+ struct if_snd_tag_rate_limit_params rate_limit;
+};
+
+union if_snd_tag_query_params {
+ struct if_snd_tag_rate_limit_params rate_limit;
+};
+
+typedef int (if_snd_tag_alloc_t)(struct ifnet *, union if_snd_tag_alloc_params *,
+ struct m_snd_tag **);
+typedef int (if_snd_tag_modify_t)(struct m_snd_tag *, union if_snd_tag_modify_params *);
+typedef int (if_snd_tag_query_t)(struct m_snd_tag *, union if_snd_tag_query_params *);
+typedef void (if_snd_tag_free_t)(struct m_snd_tag *);
/*
* Structure defining a network interface.
@@ -304,12 +347,19 @@
u_int if_hw_tsomaxsegsize; /* TSO maximum segment size in bytes */
/*
+ * Network adapter send tag support:
+ */
+ if_snd_tag_alloc_t *if_snd_tag_alloc;
+ if_snd_tag_modify_t *if_snd_tag_modify;
+ if_snd_tag_query_t *if_snd_tag_query;
+ if_snd_tag_free_t *if_snd_tag_free;
+
+ /*
* Spare fields to be added before branching a stable branch, so
* that structure can be enhanced without changing the kernel
* binary interface.
*/
- void *if_pspare[4]; /* packet pacing / general use */
- int if_ispare[4]; /* packet pacing / general use */
+ int if_ispare[4]; /* general use */
};
/* for compatibility with other BSDs */
Index: sys/net/if_vlan.c
===================================================================
--- sys/net/if_vlan.c
+++ sys/net/if_vlan.c
@@ -46,6 +46,7 @@
#include "opt_inet.h"
#include "opt_vlan.h"
+#include "opt_ratelimit.h"
#include <sys/param.h>
#include <sys/eventhandler.h>
@@ -212,6 +213,10 @@
static void vlan_init(void *foo);
static void vlan_input(struct ifnet *ifp, struct mbuf *m);
static int vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr);
+#ifdef RATELIMIT
+static int vlan_snd_tag_alloc(struct ifnet *,
+ union if_snd_tag_alloc_params *, struct m_snd_tag **);
+#endif
static void vlan_qflush(struct ifnet *ifp);
static int vlan_setflag(struct ifnet *ifp, int flag, int status,
int (*func)(struct ifnet *, int));
@@ -971,6 +976,9 @@
ifp->if_transmit = vlan_transmit;
ifp->if_qflush = vlan_qflush;
ifp->if_ioctl = vlan_ioctl;
+#ifdef RATELIMIT
+ ifp->if_snd_tag_alloc = vlan_snd_tag_alloc;
+#endif
ifp->if_flags = VLAN_IFFLAGS;
ether_ifattach(ifp, eaddr);
/* Now undo some of the damage... */
@@ -1591,6 +1599,15 @@
TOEDEV(ifp) = TOEDEV(p);
ifp->if_capenable |= p->if_capenable & IFCAP_TOE;
}
+
+#ifdef RATELIMIT
+ /*
+ * If the parent interface supports ratelimiting, so does the
+ * VLAN interface.
+ */
+ ifp->if_capabilities |= (p->if_capabilities & IFCAP_TXRTLMT);
+ ifp->if_capenable |= (p->if_capenable & IFCAP_TXRTLMT);
+#endif
}
static void
@@ -1801,3 +1818,19 @@
return (error);
}
+
+#ifdef RATELIMIT
+static int
+vlan_snd_tag_alloc(struct ifnet *ifp,
+ union if_snd_tag_alloc_params *params,
+ struct m_snd_tag **ppmt)
+{
+
+ /* get trunk device */
+ ifp = vlan_trunkdev(ifp);
+ if (ifp == NULL || (ifp->if_capenable & IFCAP_TXRTLMT) == 0)
+ return (EOPNOTSUPP);
+ /* forward allocation request */
+ return (ifp->if_snd_tag_alloc(ifp, params, ppmt));
+}
+#endif
Index: sys/netinet/in_pcb.h
===================================================================
--- sys/netinet/in_pcb.h
+++ sys/netinet/in_pcb.h
@@ -181,6 +181,7 @@
* read-lock usage during modification, this model can be applied to other
* protocols (especially SCTP).
*/
+struct m_snd_tag;
struct inpcb {
LIST_ENTRY(inpcb) inp_hash; /* (h/i) hash list */
LIST_ENTRY(inpcb) inp_pcbgrouphash; /* (g/i) hash list */
@@ -202,11 +203,11 @@
u_char inp_ip_minttl; /* (i) minimum TTL or drop */
uint32_t inp_flowid; /* (x) flow id / queue id */
u_int inp_refcount; /* (i) refcount */
- void *inp_pspare[5]; /* (x) packet pacing / general use */
+ struct m_snd_tag *inp_snd_tag; /* (i) send tag for outgoing mbufs */
+ void *inp_pspare[4]; /* (x) general use */
uint32_t inp_flowtype; /* (x) M_HASHTYPE value */
uint32_t inp_rss_listen_bucket; /* (x) overridden RSS listen bucket */
- u_int inp_ispare[4]; /* (x) packet pacing / user cookie /
- * general use */
+ u_int inp_ispare[4]; /* (x) user cookie / general use */
/* Local and foreign ports, local and foreign addr. */
struct in_conninfo inp_inc; /* (i) list for PCB's local port */
@@ -616,6 +617,7 @@
#define INP_RSS_BUCKET_SET 0x00000080 /* IP_RSS_LISTEN_BUCKET is set */
#define INP_RECVFLOWID 0x00000100 /* populate recv datagram with flow info */
#define INP_RECVRSSBUCKETID 0x00000200 /* populate recv datagram with bucket id */
+#define INP_RATE_LIMIT_CHANGED 0x00000400 /* rate limit needs attention */
/*
* Flags passed to in_pcblookup*() functions.
@@ -736,6 +738,14 @@
struct sockaddr *
in_sockaddr(in_port_t port, struct in_addr *addr);
void in_pcbsosetlabel(struct socket *so);
+#ifdef RATELIMIT
+int in_pcbattach_txrtlmt(struct inpcb *, struct ifnet *, uint32_t, uint32_t, uint32_t);
+void in_pcbdetach_txrtlmt(struct inpcb *);
+int in_pcbmodify_txrtlmt(struct inpcb *, uint32_t);
+int in_pcbquery_txrtlmt(struct inpcb *, uint32_t *);
+void in_pcboutput_txrtlmt(struct inpcb *, struct ifnet *, struct mbuf *);
+void in_pcboutput_eagain(struct inpcb *);
+#endif
#endif /* _KERNEL */
#endif /* !_NETINET_IN_PCB_H_ */
Index: sys/netinet/in_pcb.c
===================================================================
--- sys/netinet/in_pcb.c
+++ sys/netinet/in_pcb.c
@@ -42,6 +42,7 @@
#include "opt_ipsec.h"
#include "opt_inet.h"
#include "opt_inet6.h"
+#include "opt_ratelimit.h"
#include "opt_pcbgroup.h"
#include "opt_rss.h"
@@ -57,6 +58,7 @@
#include <sys/rmlock.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
+#include <sys/sockio.h>
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/refcount.h>
@@ -1140,6 +1142,10 @@
KASSERT(inp->inp_socket != NULL, ("%s: inp_socket == NULL", __func__));
+#ifdef RATELIMIT
+ if (inp->inp_snd_tag != NULL)
+ in_pcbdetach_txrtlmt(inp);
+#endif
inp->inp_socket->so_pcb = NULL;
inp->inp_socket = NULL;
}
@@ -2677,3 +2683,251 @@
db_print_inpcb(inp, "inpcb", 0);
}
#endif /* DDB */
+
+#ifdef RATELIMIT
+/*
+ * Modify TX rate limit based on the existing "inp->inp_snd_tag",
+ * if any.
+ */
+int
+in_pcbmodify_txrtlmt(struct inpcb *inp, uint32_t max_pacing_rate)
+{
+ union if_snd_tag_modify_params params = {
+ .rate_limit.max_rate = max_pacing_rate,
+ };
+ struct m_snd_tag *mst;
+ struct ifnet *ifp;
+ int error;
+
+ mst = inp->inp_snd_tag;
+ if (mst == NULL)
+ return (EINVAL);
+
+ ifp = mst->ifp;
+ if (ifp == NULL)
+ return (EINVAL);
+
+ if (ifp->if_snd_tag_modify == NULL) {
+ error = EOPNOTSUPP;
+ } else {
+ error = ifp->if_snd_tag_modify(mst, &params);
+ }
+ return (error);
+}
+
+/*
+ * Query existing TX rate limit based on the existing
+ * "inp->inp_snd_tag", if any.
+ */
+int
+in_pcbquery_txrtlmt(struct inpcb *inp, uint32_t *p_max_pacing_rate)
+{
+ union if_snd_tag_query_params params = { };
+ struct m_snd_tag *mst;
+ struct ifnet *ifp;
+ int error;
+
+ mst = inp->inp_snd_tag;
+ if (mst == NULL)
+ return (EINVAL);
+
+ ifp = mst->ifp;
+ if (ifp == NULL)
+ return (EINVAL);
+
+ if (ifp->if_snd_tag_query == NULL) {
+ error = EOPNOTSUPP;
+ } else {
+ error = ifp->if_snd_tag_query(mst, &params);
+ if (error == 0 && p_max_pacing_rate != NULL)
+ *p_max_pacing_rate = params.rate_limit.max_rate;
+ }
+ return (error);
+}
+
+/*
+ * Allocate a new TX rate limit send tag from the network interface
+ * given by the "ifp" argument and save it in "inp->inp_snd_tag":
+ */
+int
+in_pcbattach_txrtlmt(struct inpcb *inp, struct ifnet *ifp,
+ uint32_t flowtype, uint32_t flowid, uint32_t max_pacing_rate)
+{
+ union if_snd_tag_alloc_params params = {
+ .rate_limit.hdr.type = IF_SND_TAG_TYPE_RATE_LIMIT,
+ .rate_limit.hdr.flowid = flowid,
+ .rate_limit.hdr.flowtype = flowtype,
+ .rate_limit.max_rate = max_pacing_rate,
+ };
+ int error;
+
+ INP_WLOCK_ASSERT(inp);
+
+ if (inp->inp_snd_tag != NULL)
+ return (EINVAL);
+
+ if (ifp->if_snd_tag_alloc == NULL) {
+ error = EOPNOTSUPP;
+ } else {
+ error = ifp->if_snd_tag_alloc(ifp, &params, &inp->inp_snd_tag);
+
+ /*
+ * At success increment the refcount on
+ * the network interface:
+ */
+ if (error == 0)
+ if_ref(inp->inp_snd_tag->ifp);
+ }
+ return (error);
+}
+
+/*
+ * Free an existing TX rate limit tag based on the "inp->inp_snd_tag",
+ * if any:
+ */
+void
+in_pcbdetach_txrtlmt(struct inpcb *inp)
+{
+ struct m_snd_tag *mst;
+ struct ifnet *ifp;
+
+ INP_WLOCK_ASSERT(inp);
+
+ mst = inp->inp_snd_tag;
+ inp->inp_snd_tag = NULL;
+
+ if (mst == NULL)
+ return;
+
+ ifp = mst->ifp;
+ if (ifp == NULL)
+ return;
+
+ /*
+ * If the device was detached while we still had reference(s)
+ * on the ifp, we assume if_snd_tag_free() was replaced with
+ * stubs.
+ */
+ ifp->if_snd_tag_free(mst);
+
+ /* release reference count on network interface */
+ if_rele(ifp);
+}
+
+/*
+ * This function should be called when the INP_RATE_LIMIT_CHANGED flag
+ * is set in the fast path and will attach/detach/modify the TX rate
+ * limit send tag based on the socket's so_max_pacing_rate value.
+ */
+void
+in_pcboutput_txrtlmt(struct inpcb *inp, struct ifnet *ifp, struct mbuf *mb)
+{
+ struct socket *socket;
+ uint32_t max_pacing_rate;
+ bool was_wlocked;
+ int error;
+
+ if (inp == NULL)
+ return;
+
+ socket = inp->inp_socket;
+ if (socket == NULL)
+ return;
+
+ was_wlocked = INP_WLOCKED(inp);
+
+ if (!was_wlocked) {
+ /*
+ * NOTE: If the write locking fails, we need to bail
+ * out and use the non-ratelimited ring for the
+ * transmit until there is a new chance to get the
+ * write lock.
+ */
+ if (!INP_TRY_UPGRADE(inp))
+ return;
+ }
+
+ /*
+ * NOTE: The so_max_pacing_rate value is read unlocked,
+ * because atomic updates are not required since the variable
+ * is checked at every mbuf we send. It is assumed that the
+ * variable read itself will be atomic.
+ */
+ max_pacing_rate = socket->so_max_pacing_rate;
+
+ /*
+ * NOTE: When attaching to a network interface a reference is
+ * made to ensure the network interface doesn't go away until
+ * all ratelimit connections are gone. The network interface
+ * pointers compared below represent valid network interfaces,
+ * except when comparing towards NULL.
+ */
+ if (max_pacing_rate == 0 && inp->inp_snd_tag == NULL) {
+ error = 0;
+ } else if (!(ifp->if_capenable & IFCAP_TXRTLMT)) {
+ if (inp->inp_snd_tag != NULL)
+ in_pcbdetach_txrtlmt(inp);
+ error = 0;
+ } else if (inp->inp_snd_tag == NULL) {
+ /*
+ * In order to utilize packet pacing with RSS, we need
+ * to wait until there is a valid RSS hash before we
+ * can proceed:
+ */
+ if (M_HASHTYPE_GET(mb) == M_HASHTYPE_NONE) {
+ error = EAGAIN;
+ } else {
+ error = in_pcbattach_txrtlmt(inp, ifp, M_HASHTYPE_GET(mb),
+ mb->m_pkthdr.flowid, max_pacing_rate);
+ }
+ } else {
+ error = in_pcbmodify_txrtlmt(inp, max_pacing_rate);
+ }
+ if (error == 0 || error == EOPNOTSUPP)
+ inp->inp_flags2 &= ~INP_RATE_LIMIT_CHANGED;
+ if (!was_wlocked)
+ INP_DOWNGRADE(inp);
+}
+
+/*
+ * Track route changes for TX rate limiting.
+ */
+void
+in_pcboutput_eagain(struct inpcb *inp)
+{
+ struct socket *socket;
+ bool was_wlocked;
+
+ if (inp == NULL)
+ return;
+
+ socket = inp->inp_socket;
+ if (socket == NULL)
+ return;
+
+ if (inp->inp_snd_tag == NULL)
+ return;
+
+ was_wlocked = INP_WLOCKED(inp);
+
+ if (!was_wlocked) {
+ /*
+ * NOTE: If the write locking fails, we need to bail
+ * out and use the non-ratelimited ring for the
+ * transmit until there is a new chance to get the
+ * write lock.
+ */
+ if (!INP_TRY_UPGRADE(inp))
+ return;
+ }
+
+ /* detach rate limiting */
+ in_pcbdetach_txrtlmt(inp);
+
+ /* make sure new mbuf send tag allocation is made */
+ inp->inp_flags2 |= INP_RATE_LIMIT_CHANGED;
+
+ if (!was_wlocked)
+ INP_DOWNGRADE(inp);
+}
+#endif /* RATELIMIT */
Index: sys/netinet/ip_output.c
===================================================================
--- sys/netinet/ip_output.c
+++ sys/netinet/ip_output.c
@@ -33,6 +33,7 @@
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
+#include "opt_ratelimit.h"
#include "opt_ipsec.h"
#include "opt_mbuf_stress_test.h"
#include "opt_mpath.h"
@@ -661,8 +662,23 @@
*/
m_clrprotoflags(m);
IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL);
+#ifdef RATELIMIT
+ if (inp != NULL) {
+ if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED)
+ in_pcboutput_txrtlmt(inp, ifp, m);
+ /* stamp send tag on mbuf */
+ m->m_pkthdr.snd_tag = inp->inp_snd_tag;
+ } else {
+ m->m_pkthdr.snd_tag = NULL;
+ }
+#endif
error = (*ifp->if_output)(ifp, m,
(const struct sockaddr *)gw, ro);
+#ifdef RATELIMIT
+ /* check for route change */
+ if (error == EAGAIN)
+ in_pcboutput_eagain(inp);
+#endif
goto done;
}
@@ -697,8 +713,23 @@
m_clrprotoflags(m);
IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL);
+#ifdef RATELIMIT
+ if (inp != NULL) {
+ if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED)
+ in_pcboutput_txrtlmt(inp, ifp, m);
+ /* stamp send tag on mbuf */
+ m->m_pkthdr.snd_tag = inp->inp_snd_tag;
+ } else {
+ m->m_pkthdr.snd_tag = NULL;
+ }
+#endif
error = (*ifp->if_output)(ifp, m,
(const struct sockaddr *)gw, ro);
+#ifdef RATELIMIT
+ /* check for route change */
+ if (error == EAGAIN)
+ in_pcboutput_eagain(inp);
+#endif
} else
m_freem(m);
}
@@ -973,6 +1004,16 @@
INP_WUNLOCK(inp);
error = 0;
break;
+ case SO_MAX_PACING_RATE:
+#ifdef RATELIMIT
+ INP_WLOCK(inp);
+ inp->inp_flags2 |= INP_RATE_LIMIT_CHANGED;
+ INP_WUNLOCK(inp);
+ error = 0;
+#else
+ error = EOPNOTSUPP;
+#endif
+ break;
default:
break;
}
Index: sys/netinet6/ip6_output.c
===================================================================
--- sys/netinet6/ip6_output.c
+++ sys/netinet6/ip6_output.c
@@ -65,6 +65,7 @@
#include "opt_inet.h"
#include "opt_inet6.h"
+#include "opt_ratelimit.h"
#include "opt_ipsec.h"
#include "opt_sctp.h"
#include "opt_route.h"
@@ -954,8 +955,23 @@
m->m_pkthdr.len);
ifa_free(&ia6->ia_ifa);
}
+#ifdef RATELIMIT
+ if (inp != NULL) {
+ if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED)
+ in_pcboutput_txrtlmt(inp, ifp, m);
+ /* stamp send tag on mbuf */
+ m->m_pkthdr.snd_tag = inp->inp_snd_tag;
+ } else {
+ m->m_pkthdr.snd_tag = NULL;
+ }
+#endif
error = nd6_output_ifp(ifp, origifp, m, dst,
(struct route *)ro);
+#ifdef RATELIMIT
+ /* check for route change */
+ if (error == EAGAIN)
+ in_pcboutput_eagain(inp);
+#endif
goto done;
}
@@ -1054,8 +1070,23 @@
counter_u64_add(ia->ia_ifa.ifa_obytes,
m->m_pkthdr.len);
}
+#ifdef RATELIMIT
+ if (inp != NULL) {
+ if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED)
+ in_pcboutput_txrtlmt(inp, ifp, m);
+ /* stamp send tag on mbuf */
+ m->m_pkthdr.snd_tag = inp->inp_snd_tag;
+ } else {
+ m->m_pkthdr.snd_tag = NULL;
+ }
+#endif
error = nd6_output_ifp(ifp, origifp, m, dst,
(struct route *)ro);
+#ifdef RATELIMIT
+ /* check for route change */
+ if (error == EAGAIN)
+ in_pcboutput_eagain(inp);
+#endif
} else
m_freem(m);
}
@@ -1441,6 +1472,16 @@
INP_WUNLOCK(in6p);
error = 0;
break;
+ case SO_MAX_PACING_RATE:
+#ifdef RATELIMIT
+ INP_WLOCK(in6p);
+ in6p->inp_flags2 |= INP_RATE_LIMIT_CHANGED;
+ INP_WUNLOCK(in6p);
+ error = 0;
+#else
+ error = EOPNOTSUPP;
+#endif
+ break;
default:
break;
}
Index: sys/sys/mbuf.h
===================================================================
--- sys/sys/mbuf.h
+++ sys/sys/mbuf.h
@@ -130,6 +130,14 @@
};
/*
+ * Static network interface owned tag.
+ * Allocated through ifp->if_snd_tag_alloc().
+ */
+struct m_snd_tag {
+ struct ifnet *ifp; /* network interface tag belongs to */
+};
+
+/*
* Record/packet header in first mbuf of chain; valid only if M_PKTHDR is set.
* Size ILP32: 48
* LP64: 56
@@ -137,7 +145,10 @@
* they are correct.
*/
struct pkthdr {
- struct ifnet *rcvif; /* rcv interface */
+ union {
+ struct m_snd_tag *snd_tag; /* send tag, if any */
+ struct ifnet *rcvif; /* rcv interface */
+ };
SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */
int32_t len; /* total packet length */
Index: sys/sys/socket.h
===================================================================
--- sys/sys/socket.h
+++ sys/sys/socket.h
@@ -158,6 +158,7 @@
#define SO_USER_COOKIE 0x1015 /* user cookie (dummynet etc.) */
#define SO_PROTOCOL 0x1016 /* get socket protocol (Linux name) */
#define SO_PROTOTYPE SO_PROTOCOL /* alias for SO_PROTOCOL (SunOS name) */
+#define SO_MAX_PACING_RATE 0x1017 /* set max TX pacing rate per socket */
#endif
/*
Index: sys/sys/socketvar.h
===================================================================
--- sys/sys/socketvar.h
+++ sys/sys/socketvar.h
@@ -127,8 +127,9 @@
int so_fibnum; /* routing domain for this socket */
uint32_t so_user_cookie;
- void *so_pspare[2]; /* packet pacing / general use */
- int so_ispare[2]; /* packet pacing / general use */
+ void *so_pspare[2]; /* general use */
+ uint32_t so_max_pacing_rate; /* (f) TX rate limit in bytes/s */
+ int so_ispare[1]; /* general use */
};
/*

File Metadata

Mime Type
text/plain
Expires
Tue, Apr 7, 1:08 PM (49 m, 7 s)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28295040
Default Alt Text
D3687.1775567295.diff (29 KB)

Event Timeline