Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F144556186
D11003.1775438491.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Flag For Later
Award Token
Size
20 KB
Referenced Files
None
Subscribers
None
D11003.1775438491.diff
View Options
Index: sys/kern/uipc_debug.c
===================================================================
--- sys/kern/uipc_debug.c
+++ sys/kern/uipc_debug.c
@@ -75,7 +75,7 @@
}
static void
-db_print_sooptions(short so_options)
+db_print_sooptions(int so_options)
{
int comma;
@@ -120,6 +120,10 @@
db_printf("%sSO_REUSEPORT", comma ? ", " : "");
comma = 1;
}
+ if (so_options & SO_REUSEPORT_LB) {
+ db_printf("%sSO_REUSEPORT_LB", comma ? ", " : "");
+ comma = 1;
+ }
if (so_options & SO_TIMESTAMP) {
db_printf("%sSO_TIMESTAMP", comma ? ", " : "");
comma = 1;
Index: sys/kern/uipc_socket.c
===================================================================
--- sys/kern/uipc_socket.c
+++ sys/kern/uipc_socket.c
@@ -2552,6 +2552,7 @@
case SO_BROADCAST:
case SO_REUSEADDR:
case SO_REUSEPORT:
+ case SO_REUSEPORT_LB:
case SO_OOBINLINE:
case SO_TIMESTAMP:
case SO_BINTIME:
@@ -2801,6 +2802,7 @@
case SO_KEEPALIVE:
case SO_REUSEADDR:
case SO_REUSEPORT:
+ case SO_REUSEPORT_LB:
case SO_BROADCAST:
case SO_OOBINLINE:
case SO_ACCEPTCONN:
Index: sys/netinet/in_pcb.h
===================================================================
--- sys/netinet/in_pcb.h
+++ sys/netinet/in_pcb.h
@@ -76,6 +76,11 @@
struct in_addr ia46_addr4;
};
+union in_dependaddr {
+ struct in_addr_4in6 id46_addr;
+ struct in6_addr id6_addr;
+};
+
/*
* NOTE: ipv6 addrs should be 64-bit aligned, per RFC 2553. in_conninfo has
* some extra padding to accomplish this.
@@ -86,22 +91,14 @@
u_int16_t ie_fport; /* foreign port */
u_int16_t ie_lport; /* local port */
/* protocol dependent part, local and foreign addr */
- union {
- /* foreign host table entry */
- struct in_addr_4in6 ie46_foreign;
- struct in6_addr ie6_foreign;
- } ie_dependfaddr;
- union {
- /* local host table entry */
- struct in_addr_4in6 ie46_local;
- struct in6_addr ie6_local;
- } ie_dependladdr;
+ union in_dependaddr ie_dependfaddr; /* foreign host table entry */
+ union in_dependaddr ie_dependladdr; /* local host table entry */
+#define ie_faddr ie_dependfaddr.id46_addr.ia46_addr4
+#define ie_laddr ie_dependladdr.id46_addr.ia46_addr4
+#define ie6_faddr ie_dependfaddr.id6_addr
+#define ie6_laddr ie_dependladdr.id6_addr
u_int32_t ie6_zoneid; /* scope zone id */
};
-#define ie_faddr ie_dependfaddr.ie46_foreign.ia46_addr4
-#define ie_laddr ie_dependladdr.ie46_local.ia46_addr4
-#define ie6_faddr ie_dependfaddr.ie6_foreign
-#define ie6_laddr ie_dependladdr.ie6_local
/*
* XXX The defines for inc_* are hacks and should be changed to direct
@@ -326,6 +323,21 @@
u_short phd_port;
};
+struct inpcblbgroup {
+ LIST_ENTRY(inpcblbgroup) il_list;
+ uint16_t il_lport;
+ u_char il_vflag;
+ u_char il_pad;
+ uint32_t il_factor;
+ union in_dependaddr il_dependladdr;
+#define il_laddr il_dependladdr.id46_addr.ia46_addr4
+#define il6_laddr il_dependladdr.id6_addr
+ uint32_t il_inpsiz; /* size of il_inp[] */
+ uint32_t il_inpcnt; /* # of elem in il_inp[] */
+ struct inpcb *il_inp[];
+};
+LIST_HEAD(inpcblbgrouphead, inpcblbgroup);
+
/*-
* Global data structure for each high-level protocol (UDP, TCP, ...) in both
* IPv4 and IPv6. Holds inpcb lists and information for managing them.
@@ -419,6 +431,13 @@
u_long ipi_wildmask; /* (p) */
/*
+ * Load balanced group used by the SO_REUSEPORT_LB option,
+ * hashed by local address and local port.
+ */
+ struct inpcblbgrouphead *ipi_lbgrouphashbase;
+ u_long ipi_lbgrouphashmask;
+
+ /*
* Pointer to network stack instance
*/
struct vnet *ipi_vnet; /* (c) */
@@ -504,7 +523,7 @@
inp_inpcbtotcpcb(struct inpcb *inp);
void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
uint32_t *faddr, uint16_t *fp);
-short inp_so_options(const struct inpcb *inp);
+int inp_so_options(const struct inpcb *inp);
#endif /* _KERNEL */
@@ -567,6 +586,10 @@
(((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport))) & (mask))
#define INP_PCBPORTHASH(lport, mask) \
(ntohs((lport)) & (mask))
+#define INP_PCBLBGROUP_PORTHASH(lport, mask) \
+ (ntohs((lport)) & (mask))
+#define INP_PCBLBGROUP_PKTHASH(faddr, lport, fport) \
+ ((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport)))
#define INP6_PCBHASHKEY(faddr) ((faddr)->s6_addr32[3])
/*
@@ -622,11 +645,11 @@
/*
* Flags for inp_flags2.
*/
-#define INP_LLE_VALID 0x00000001 /* cached lle is valid */
+#define INP_LLE_VALID 0x00000001 /* cached lle is valid */
#define INP_RT_VALID 0x00000002 /* cached rtentry is valid */
#define INP_PCBGROUPWILD 0x00000004 /* in pcbgroup wildcard list */
#define INP_REUSEPORT 0x00000008 /* SO_REUSEPORT option is set */
-#define INP_FREED 0x00000010 /* inp itself is not valid */
+#define INP_FREED 0x00000010 /* inp itself is not valid */
#define INP_REUSEADDR 0x00000020 /* SO_REUSEADDR option is set */
#define INP_BINDMULTI 0x00000040 /* IP_BINDMULTI option is set */
#define INP_RSS_BUCKET_SET 0x00000080 /* IP_RSS_LISTEN_BUCKET is set */
@@ -634,6 +657,7 @@
#define INP_RECVRSSBUCKETID 0x00000200 /* populate recv datagram with bucket id */
#define INP_RATE_LIMIT_CHANGED 0x00000400 /* rate limit needs attention */
#define INP_ORIGDSTADDR 0x00000800 /* receive IP dst address/port */
+#define INP_REUSEPORT_LB 0x00001000 /* SO_REUSEPORT_LB option is set */
/*
* Flags passed to in_pcblookup*() functions.
Index: sys/netinet/in_pcb.c
===================================================================
--- sys/netinet/in_pcb.c
+++ sys/netinet/in_pcb.c
@@ -102,6 +102,9 @@
#include <security/mac/mac_framework.h>
+#define INPCBLBGROUP_SIZMIN 8
+#define INPCBLBGROUP_SIZMAX 256
+
static struct callout ipport_tick_callout;
/*
@@ -211,6 +214,189 @@
* functions often modify hash chains or addresses in pcbs.
*/
+static struct inpcblbgroup *
+in_pcblbgroup_alloc(struct inpcblbgrouphead *hdr, u_char vflag,
+ uint16_t port, const union in_dependaddr *addr, int size)
+{
+ struct inpcblbgroup *grp;
+
+ size_t bytes = __offsetof(struct inpcblbgroup, il_inp[size]);
+ grp = malloc(bytes, M_PCB, M_WAITOK | M_ZERO);
+ grp->il_vflag = vflag;
+ grp->il_lport = port;
+ grp->il_dependladdr = *addr;
+ grp->il_inpsiz = size;
+ LIST_INSERT_HEAD(hdr, grp, il_list);
+
+ return grp;
+}
+
+static void
+in_pcblbgroup_free(struct inpcblbgroup *grp)
+{
+ LIST_REMOVE(grp, il_list);
+ free(grp, M_TEMP);
+}
+
+static struct inpcblbgroup *
+in_pcblbgroup_resize(struct inpcblbgrouphead *hdr,
+ struct inpcblbgroup *old_grp, int size)
+{
+ struct inpcblbgroup *grp;
+ int i;
+
+ grp = in_pcblbgroup_alloc(hdr, old_grp->il_vflag,
+ old_grp->il_lport, &old_grp->il_dependladdr, size);
+
+ KASSERT(old_grp->il_inpcnt < grp->il_inpsiz,
+ ("invalid new local group size %d and old local group count %d",
+ grp->il_inpsiz, old_grp->il_inpcnt));
+ for (i = 0; i < old_grp->il_inpcnt; ++i)
+ grp->il_inp[i] = old_grp->il_inp[i];
+ grp->il_inpcnt = old_grp->il_inpcnt;
+ grp->il_factor = old_grp->il_factor;
+
+ in_pcblbgroup_free(old_grp);
+
+ return grp;
+}
+
+// XXX Need this?
+static void
+in_pcblbgroup_factor(struct inpcblbgroup *grp)
+{
+
+ int ncpus2_shift = 0; // XXX Get real value for this?
+
+ grp->il_factor =
+ ((uint32_t)(0xffff >> ncpus2_shift) / grp->il_inpcnt) + 1;
+
+ KASSERT(grp->il_factor != 0, ("invalid lb group factor, "
+ "ncpus2_shift %d, inpcnt %d", ncpus2_shift, grp->il_inpcnt));
+}
+
+/*
+ * Add PCB to lb group (load balance used by SO_REUSEPORT_LB)
+ */
+static void
+in_pcbinslbgrouphash(struct inpcb *inp, struct inpcbinfo *pcbinfo)
+{
+ struct inpcblbgrouphead *hdr;
+ struct inpcblbgroup *grp;
+
+ uint16_t hashmask = pcbinfo->ipi_lbgrouphashmask;
+ uint16_t lport = inp->inp_lport;
+ uint32_t group_index = INP_PCBLBGROUP_PORTHASH(lport, hashmask);
+
+ hdr = &pcbinfo->ipi_lbgrouphashbase[group_index];
+
+ struct ucred *cred;
+
+ if (pcbinfo->ipi_lbgrouphashbase == NULL)
+ return;
+
+ /*
+ * don't allow jailed socket to join local group
+ */
+ if (inp->inp_socket != NULL)
+ cred = inp->inp_socket->so_cred;
+ else
+ cred = NULL;
+ if (cred != NULL && jailed(cred))
+ return;
+
+#ifdef INET6
+ /*
+ * don't allow IPv4 mapped INET6 wild socket
+ */
+ if ((inp->inp_vflag & INP_IPV4) &&
+ inp->inp_laddr.s_addr == INADDR_ANY &&
+ INP_CHECK_SOCKAF(inp->inp_socket, AF_INET6))
+ return;
+#endif
+
+ hdr = &pcbinfo->ipi_lbgrouphashbase[
+ INP_PCBLBGROUP_PORTHASH(inp->inp_lport, pcbinfo->ipi_lbgrouphashmask)];
+
+ LIST_FOREACH(grp, hdr, il_list) {
+ if (grp->il_vflag == inp->inp_vflag &&
+ grp->il_lport == inp->inp_lport &&
+ memcmp(&grp->il_dependladdr,
+ &inp->inp_inc.inc_ie.ie_dependladdr,
+ sizeof(grp->il_dependladdr)) == 0) {
+ break;
+ }
+ }
+ if (grp == NULL) {
+ /* Create new local group */
+ grp = in_pcblbgroup_alloc(hdr, inp->inp_vflag,
+ inp->inp_lport, &inp->inp_inc.inc_ie.ie_dependladdr,
+ INPCBLBGROUP_SIZMIN);
+ } else if (grp->il_inpcnt == grp->il_inpsiz) {
+ if (grp->il_inpsiz >= INPCBLBGROUP_SIZMAX) {
+ static int limit_logged = 0;
+
+ if (!limit_logged) {
+ limit_logged = 1;
+ printf("lb group port %d, "
+ "limit reached\n", ntohs(grp->il_lport));
+ }
+ return;
+ }
+
+ /* Expand this local group */
+ grp = in_pcblbgroup_resize(hdr, grp, grp->il_inpsiz * 2);
+ }
+
+ KASSERT(grp->il_inpcnt < grp->il_inpsiz,
+ ("invalid local group size %d and count %d",
+ grp->il_inpsiz, grp->il_inpcnt));
+
+ grp->il_inp[grp->il_inpcnt] = inp;
+ grp->il_inpcnt++;
+ in_pcblbgroup_factor(grp);
+}
+
+static void
+in_pcbremlbgrouphash(struct inpcb *inp, struct inpcbinfo *pcbinfo)
+{
+ struct inpcblbgrouphead *hdr;
+ struct inpcblbgroup *grp;
+
+ if (pcbinfo->ipi_lbgrouphashbase == NULL)
+ return;
+
+ hdr = &pcbinfo->ipi_lbgrouphashbase[
+ INP_PCBLBGROUP_PORTHASH(inp->inp_lport, pcbinfo->ipi_lbgrouphashmask)];
+
+ LIST_FOREACH(grp, hdr, il_list) {
+ int i;
+
+ for (i = 0; i < grp->il_inpcnt; ++i) {
+ if (grp->il_inp[i] != inp)
+ continue;
+
+ if (grp->il_inpcnt == 1) {
+ /* Free this local group */
+ in_pcblbgroup_free(grp);
+ } else {
+ /* Pull up inpcbs */
+ for (; i + 1 < grp->il_inpcnt; ++i)
+ grp->il_inp[i] = grp->il_inp[i + 1];
+ grp->il_inpcnt--;
+
+ if (grp->il_inpsiz > INPCBLBGROUP_SIZMIN &&
+ grp->il_inpcnt <= (grp->il_inpsiz / 4)) {
+ /* Shrink this local group */
+ grp = in_pcblbgroup_resize(hdr, grp,
+ grp->il_inpsiz / 2);
+ }
+ }
+ return;
+ }
+ }
+}
+
/*
* Initialize an inpcbinfo -- we should be able to reduce the number of
* arguments in time.
@@ -235,6 +421,8 @@
&pcbinfo->ipi_hashmask);
pcbinfo->ipi_porthashbase = hashinit(porthash_nelements, M_PCB,
&pcbinfo->ipi_porthashmask);
+ pcbinfo->ipi_lbgrouphashbase = hashinit(hash_nelements, M_PCB,
+ &pcbinfo->ipi_lbgrouphashmask);
#ifdef PCBGROUP
in_pcbgroup_init(pcbinfo, hashfields, hash_nelements);
#endif
@@ -259,6 +447,8 @@
hashdestroy(pcbinfo->ipi_hashbase, M_PCB, pcbinfo->ipi_hashmask);
hashdestroy(pcbinfo->ipi_porthashbase, M_PCB,
pcbinfo->ipi_porthashmask);
+ hashdestroy(pcbinfo->ipi_lbgrouphashbase, M_PCB,
+ pcbinfo->ipi_lbgrouphashmask);
#ifdef PCBGROUP
in_pcbgroup_destroy(pcbinfo);
#endif
@@ -497,18 +687,20 @@
/*
* Return cached socket options.
*/
-short
+int
inp_so_options(const struct inpcb *inp)
{
- short so_options;
+ int so_options;
- so_options = 0;
+ so_options = 0;
- if ((inp->inp_flags2 & INP_REUSEPORT) != 0)
- so_options |= SO_REUSEPORT;
- if ((inp->inp_flags2 & INP_REUSEADDR) != 0)
- so_options |= SO_REUSEADDR;
- return (so_options);
+ if ((inp->inp_flags2 & INP_REUSEPORT_LB) != 0)
+ so_options |= SO_REUSEPORT_LB;
+ if ((inp->inp_flags2 & INP_REUSEPORT) != 0)
+ so_options |= SO_REUSEPORT;
+ if ((inp->inp_flags2 & INP_REUSEADDR) != 0)
+ so_options |= SO_REUSEADDR;
+ return (so_options);
}
#endif /* INET || INET6 */
@@ -565,6 +757,12 @@
int error;
/*
+ * XXX Maybe we could let SO_REUSEPORT_LB set SO_REUSEPORT bit here
+ * so that we don't have to add to the (already messy) code below
+ */
+ int reuseport_lb = (so->so_options & SO_REUSEPORT_LB);
+
+ /*
* No state changes, so read locks are sufficient here.
*/
INP_LOCK_ASSERT(inp);
@@ -616,12 +814,12 @@
sin->sin_port = 0; /* yech... */
bzero(&sin->sin_zero, sizeof(sin->sin_zero));
/*
- * Is the address a local IP address?
+ * Is the address a local IP address?
* If INP_BINDANY is set, then the socket may be bound
* to any endpoint address, local or not.
*/
if ((inp->inp_flags & INP_BINDANY) == 0 &&
- ifa_ifwithaddr_check((struct sockaddr *)sin) == 0)
+ ifa_ifwithaddr_check((struct sockaddr *)sin) == 0)
return (EADDRNOTAVAIL);
}
laddr = sin->sin_addr;
@@ -651,7 +849,8 @@
ntohl(t->inp_faddr.s_addr) == INADDR_ANY) &&
(ntohl(sin->sin_addr.s_addr) != INADDR_ANY ||
ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
- (t->inp_flags2 & INP_REUSEPORT) == 0) &&
+ (t->inp_flags2 & INP_REUSEPORT) ||
+ (t->inp_flags2 & INP_REUSEPORT_LB) == 0) &&
(inp->inp_cred->cr_uid !=
t->inp_cred->cr_uid))
return (EADDRINUSE);
@@ -676,11 +875,13 @@
*/
tw = intotw(t);
if (tw == NULL ||
- (reuseport & tw->tw_so_options) == 0)
+ (reuseport & tw->tw_so_options) == 0) {
return (EADDRINUSE);
+ }
} else if (t &&
- ((inp->inp_flags2 & INP_BINDMULTI) == 0) &&
- (reuseport & inp_so_options(t)) == 0) {
+ ((inp->inp_flags2 & INP_BINDMULTI) == 0) &&
+ (reuseport & inp_so_options(t)) == 0 &&
+ (reuseport_lb & inp_so_options(t)) == 0) {
#ifdef INET6
if (ntohl(sin->sin_addr.s_addr) !=
INADDR_ANY ||
@@ -689,7 +890,7 @@
(inp->inp_vflag & INP_IPV6PROTO) == 0 ||
(t->inp_vflag & INP_IPV6PROTO) == 0)
#endif
- return (EADDRINUSE);
+ return (EADDRINUSE);
if (t && (! in_pcbbind_check_bindmulti(inp, t)))
return (EADDRINUSE);
}
@@ -806,7 +1007,7 @@
/*
* If we found a route, use the address corresponding to
* the outgoing interface.
- *
+ *
* Otherwise assume faddr is reachable on a directly connected
* network and try to find a corresponding interface to take
* the source address from.
@@ -1350,6 +1551,7 @@
struct inpcbport *phd = inp->inp_phd;
INP_HASH_WLOCK(inp->inp_pcbinfo);
+ in_pcbremlbgrouphash(inp, inp->inp_pcbinfo);
LIST_REMOVE(inp, inp_hash);
LIST_REMOVE(inp, inp_portlist);
if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
@@ -1610,6 +1812,64 @@
}
#undef INP_LOOKUP_MAPPED_PCB_COST
+
+static struct inpcb *
+in_pcblookup_lbgroup(const struct inpcbinfo *pcbinfo,
+ const struct in_addr *laddr, uint16_t lport, const struct in_addr *faddr,
+ uint16_t fport, int lookupflags)
+{
+ struct inpcb *local_wild = NULL;
+ const struct inpcblbgrouphead *hdr;
+ struct inpcblbgroup *grp;
+ struct inpcblbgroup *grp_local_wild;
+
+ hdr = &pcbinfo->ipi_lbgrouphashbase[
+ INP_PCBLBGROUP_PORTHASH(lport, pcbinfo->ipi_lbgrouphashmask)];
+
+ /*
+ * Order of socket selection:
+ * 1. non-wild.
+ * 2. wild (if lookupflags contains INPLOOKUP_WILDCARD).
+ *
+ * NOTE:
+ * - Local group does not contain jailed sockets
+ * - Local group does not contain IPv4 mapped INET6 wild sockets
+ */
+ LIST_FOREACH(grp, hdr, il_list) {
+#ifdef INET6
+ if (!(grp->il_vflag & INP_IPV4))
+ continue;
+#endif
+
+ if (grp->il_lport == lport) {
+
+ uint32_t idx = 0;
+ int pkt_hash = INP_PCBLBGROUP_PKTHASH(faddr->s_addr, lport, fport);
+
+ /* Use il_inpcnt instead of dfbsd's il_factor */
+ idx = pkt_hash % grp->il_inpcnt;
+
+ KASSERT(idx >= 0 && idx < grp->il_inpcnt,
+ ("invalid hash index %d with count %d and factor %d",
+ pkt_hash, grp->il_inpcnt, grp->il_factor));
+
+ if (grp->il_laddr.s_addr == laddr->s_addr) {
+ return grp->il_inp[idx];
+ } else {
+ if (grp->il_laddr.s_addr == INADDR_ANY &&
+ (lookupflags & INPLOOKUP_WILDCARD)) {
+ local_wild = grp->il_inp[idx];
+ grp_local_wild = grp;
+ }
+ }
+ }
+ }
+ if (local_wild != NULL) {
+ return local_wild;
+ }
+ return NULL;
+}
+
#ifdef PCBGROUP
/*
* Lookup PCB in hash list, using pcbgroup tables.
@@ -1874,6 +2134,16 @@
return (tmpinp);
/*
+ * Then look in lb group
+ */
+ if (pcbinfo->ipi_lbgrouphashbase != NULL) {
+ inp = in_pcblookup_lbgroup(pcbinfo, &laddr, lport, &faddr, fport, lookupflags);
+ if (inp != NULL) {
+ return inp;
+ }
+ }
+
+ /*
* Then look for a wildcard match, if requested.
*/
if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
@@ -2075,6 +2345,7 @@
struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
struct inpcbport *phd;
u_int32_t hashkey_faddr;
+ int so_options;
INP_WLOCK_ASSERT(inp);
INP_HASH_WLOCK_ASSERT(pcbinfo);
@@ -2095,6 +2366,16 @@
pcbporthash = &pcbinfo->ipi_porthashbase[
INP_PCBPORTHASH(inp->inp_lport, pcbinfo->ipi_porthashmask)];
+
+ /*
+ * Add entry in lb group
+ * Only do this if SO_REUSEPORT_LB is set
+ */
+ so_options = inp_so_options(inp);
+ if(so_options & SO_REUSEPORT_LB) {
+ in_pcbinslbgrouphash(inp, pcbinfo);
+ }
+
/*
* Go through port list and look for a head for this lport.
*/
@@ -2221,6 +2502,10 @@
struct inpcbport *phd = inp->inp_phd;
INP_HASH_WLOCK(pcbinfo);
+
+ // XXX Only do if SO_REUSEPORT_LB set?
+ in_pcbremlbgrouphash(inp, pcbinfo);
+
LIST_REMOVE(inp, inp_hash);
LIST_REMOVE(inp, inp_portlist);
if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
@@ -2309,7 +2594,7 @@
callout_stop(&ipport_tick_callout);
}
-/*
+/*
* The ipport_callout should start running at about the time we attach the
* inet or inet6 domains.
*/
@@ -2323,7 +2608,7 @@
EVENTHANDLER_REGISTER(shutdown_pre_sync, ip_fini, NULL,
SHUTDOWN_PRI_DEFAULT);
}
-SYSINIT(ipport_tick_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE,
+SYSINIT(ipport_tick_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE,
ipport_tick_init, NULL);
void
Index: sys/netinet/ip_output.c
===================================================================
--- sys/netinet/ip_output.c
+++ sys/netinet/ip_output.c
@@ -993,6 +993,15 @@
INP_WUNLOCK(inp);
error = 0;
break;
+ case SO_REUSEPORT_LB:
+ INP_WLOCK(inp);
+ if ((so->so_options & SO_REUSEPORT_LB) != 0)
+ inp->inp_flags2 |= INP_REUSEPORT_LB;
+ else
+ inp->inp_flags2 &= ~INP_REUSEPORT_LB;
+ INP_WUNLOCK(inp);
+ error = 0;
+ break;
case SO_SETFIB:
INP_WLOCK(inp);
inp->inp_inc.inc_fibnum = so->so_fibnum;
Index: sys/netinet6/ip6_output.c
===================================================================
--- sys/netinet6/ip6_output.c
+++ sys/netinet6/ip6_output.c
@@ -1459,6 +1459,15 @@
INP_WUNLOCK(in6p);
error = 0;
break;
+ case SO_REUSEPORT_LB:
+ INP_WLOCK(in6p);
+ if ((so->so_options & SO_REUSEPORT_LB) != 0)
+ in6p->inp_flags2 |= INP_REUSEPORT_LB;
+ else
+ in6p->inp_flags2 &= ~INP_REUSEPORT_LB;
+ INP_WUNLOCK(in6p);
+ error = 0;
+ break;
case SO_SETFIB:
INP_WLOCK(in6p);
in6p->inp_inc.inc_fibnum = so->so_fibnum;
Index: sys/sys/socket.h
===================================================================
--- sys/sys/socket.h
+++ sys/sys/socket.h
@@ -137,6 +137,9 @@
#define SO_NO_OFFLOAD 0x4000 /* socket cannot be offloaded */
#define SO_NO_DDP 0x8000 /* disable direct data placement */
+// XXX: so_options only 16 bit.. (increased to 32)
+#define SO_REUSEPORT_LB 0x00010000 /* reuse with load balancing */
+
/*
* Additional options, not kept in so_options.
*/
Index: sys/sys/socketvar.h
===================================================================
--- sys/sys/socketvar.h
+++ sys/sys/socketvar.h
@@ -70,12 +70,12 @@
* (h) locked by global mutex so_global_mtx.
*/
struct socket {
- int so_count; /* (b) reference count */
+ int so_count; /* (b) reference count */
short so_type; /* (a) generic type, see socket.h */
- short so_options; /* from socket call, see socket.h */
+ int so_options; /* from socket call, see socket.h */
short so_linger; /* time to linger while closing */
short so_state; /* (b) internal state flags SS_* */
- int so_qstate; /* (e) internal state flags SQ_* */
+ int so_qstate; /* (e) internal state flags SQ_* */
void *so_pcb; /* protocol control block */
struct vnet *so_vnet; /* (a) network stack instance */
struct protosw *so_proto; /* (a) protocol handle */
@@ -170,12 +170,12 @@
size_t xso_len; /* length of this structure */
struct socket *xso_so; /* makes a convenient handle sometimes */
short so_type;
- short so_options;
+ int so_options;
short so_linger;
short so_state;
caddr_t so_pcb; /* another convenient handle */
- int xso_protocol;
- int xso_family;
+ int xso_protocol;
+ int xso_family;
u_int so_qlen;
u_int so_incqlen;
u_int so_qlimit;
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mon, Apr 6, 1:21 AM (52 m, 33 s)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28270219
Default Alt Text
D11003.1775438491.diff (20 KB)
Attached To
Mode
D11003: Load balance sockets with new SO_REUSEPORT_LB option
Attached
Detach File
Event Timeline
Log In to Comment