Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F81970365
D8560.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
27 KB
Referenced Files
None
Subscribers
None
D8560.diff
View Options
Index: head/sys/dev/hyperv/netvsc/hn_rndis.c
===================================================================
--- head/sys/dev/hyperv/netvsc/hn_rndis.c
+++ head/sys/dev/hyperv/netvsc/hn_rndis.c
@@ -838,11 +838,15 @@
error = EIO;
goto done;
}
+ sc->hn_rndis_agg_size = comp->rm_pktmaxsz;
+ sc->hn_rndis_agg_pkts = comp->rm_pktmaxcnt;
+ sc->hn_rndis_agg_align = 1U << comp->rm_align;
+
if (bootverbose) {
if_printf(sc->hn_ifp, "RNDIS ver %u.%u, pktsz %u, pktcnt %u, "
"align %u\n", comp->rm_ver_major, comp->rm_ver_minor,
- comp->rm_pktmaxsz, comp->rm_pktmaxcnt,
- 1U << comp->rm_align);
+ sc->hn_rndis_agg_size, sc->hn_rndis_agg_pkts,
+ sc->hn_rndis_agg_align);
}
error = 0;
done:
Index: head/sys/dev/hyperv/netvsc/if_hn.c
===================================================================
--- head/sys/dev/hyperv/netvsc/if_hn.c
+++ head/sys/dev/hyperv/netvsc/if_hn.c
@@ -159,10 +159,22 @@
#define HN_CSUM_IP6_HWASSIST(sc) \
((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP6_MASK)
+#define HN_PKTSIZE_MIN(align) \
+ roundup2(ETHER_MIN_LEN + ETHER_VLAN_ENCAP_LEN - ETHER_CRC_LEN + \
+ HN_RNDIS_PKT_LEN, (align))
+#define HN_PKTSIZE(m, align) \
+ roundup2((m)->m_pkthdr.len + HN_RNDIS_PKT_LEN, (align))
+
struct hn_txdesc {
#ifndef HN_USE_TXDESC_BUFRING
SLIST_ENTRY(hn_txdesc) link;
#endif
+ STAILQ_ENTRY(hn_txdesc) agg_link;
+
+ /* Aggregated txdescs, in sending order. */
+ STAILQ_HEAD(, hn_txdesc) agg_list;
+
+ /* The oldest packet, if transmission aggregation happens. */
struct mbuf *m;
struct hn_tx_ring *txr;
int refs;
@@ -180,6 +192,7 @@
#define HN_TXD_FLAG_ONLIST 0x0001
#define HN_TXD_FLAG_DMAMAP 0x0002
+#define HN_TXD_FLAG_ONAGG 0x0004
struct hn_rxinfo {
uint32_t vlan_info;
@@ -259,6 +272,10 @@
static int hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS);
static int hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS);
static int hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS);
+static int hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS);
+static int hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS);
+static int hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS);
+static int hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS);
static void hn_stop(struct hn_softc *);
static void hn_init_locked(struct hn_softc *);
@@ -306,7 +323,7 @@
static void hn_fixup_tx_data(struct hn_softc *);
static void hn_destroy_tx_data(struct hn_softc *);
static void hn_txdesc_dmamap_destroy(struct hn_txdesc *);
-static int hn_encap(struct hn_tx_ring *,
+static int hn_encap(struct ifnet *, struct hn_tx_ring *,
struct hn_txdesc *, struct mbuf **);
static int hn_txpkt(struct ifnet *, struct hn_tx_ring *,
struct hn_txdesc *);
@@ -315,6 +332,10 @@
static bool hn_tx_ring_pending(struct hn_tx_ring *);
static void hn_tx_ring_qflush(struct hn_tx_ring *);
static void hn_resume_tx(struct hn_softc *, int);
+static void hn_set_txagg(struct hn_softc *);
+static void *hn_try_txagg(struct ifnet *,
+ struct hn_tx_ring *, struct hn_txdesc *,
+ int);
static int hn_get_txswq_depth(const struct hn_tx_ring *);
static void hn_txpkt_done(struct hn_nvs_sendctx *,
struct hn_softc *, struct vmbus_channel *,
@@ -430,6 +451,16 @@
&hn_lro_mbufq_depth, 0, "Depth of LRO mbuf queue");
#endif
+/* Packet transmission aggregation size limit */
+static int hn_tx_agg_size = -1;
+SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_size, CTLFLAG_RDTUN,
+ &hn_tx_agg_size, 0, "Packet transmission aggregation size limit");
+
+/* Packet transmission aggregation count limit */
+static int hn_tx_agg_pkts = 0;
+SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_pkts, CTLFLAG_RDTUN,
+ &hn_tx_agg_pkts, 0, "Packet transmission aggregation packet limit");
+
static u_int hn_cpu_index; /* next CPU for channel */
static struct taskqueue *hn_tx_taskq; /* shared TX taskqueue */
@@ -658,6 +689,84 @@
return (error);
}
+static void
+hn_set_txagg(struct hn_softc *sc)
+{
+ uint32_t size, pkts;
+ int i;
+
+ /*
+ * Setup aggregation size.
+ */
+ if (sc->hn_agg_size < 0)
+ size = UINT32_MAX;
+ else
+ size = sc->hn_agg_size;
+
+ if (sc->hn_rndis_agg_size < size)
+ size = sc->hn_rndis_agg_size;
+
+ if (size <= 2 * HN_PKTSIZE_MIN(sc->hn_rndis_agg_align)) {
+ /* Disable */
+ size = 0;
+ pkts = 0;
+ goto done;
+ }
+
+ /* NOTE: Type of the per TX ring setting is 'int'. */
+ if (size > INT_MAX)
+ size = INT_MAX;
+
+ /* NOTE: We only aggregate packets using chimney sending buffers. */
+ if (size > (uint32_t)sc->hn_chim_szmax)
+ size = sc->hn_chim_szmax;
+
+ /*
+ * Setup aggregation packet count.
+ */
+ if (sc->hn_agg_pkts < 0)
+ pkts = UINT32_MAX;
+ else
+ pkts = sc->hn_agg_pkts;
+
+ if (sc->hn_rndis_agg_pkts < pkts)
+ pkts = sc->hn_rndis_agg_pkts;
+
+ if (pkts <= 1) {
+ /* Disable */
+ size = 0;
+ pkts = 0;
+ goto done;
+ }
+
+ /* NOTE: Type of the per TX ring setting is 'short'. */
+ if (pkts > SHRT_MAX)
+ pkts = SHRT_MAX;
+
+done:
+ /* NOTE: Type of the per TX ring setting is 'short'. */
+ if (sc->hn_rndis_agg_align > SHRT_MAX) {
+ /* Disable */
+ size = 0;
+ pkts = 0;
+ }
+
+ if (bootverbose) {
+ if_printf(sc->hn_ifp, "TX agg size %u, pkts %u, align %u\n",
+ size, pkts, sc->hn_rndis_agg_align);
+ }
+
+ for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
+ struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
+
+ mtx_lock(&txr->hn_tx_lock);
+ txr->hn_agg_szmax = size;
+ txr->hn_agg_pktmax = pkts;
+ txr->hn_agg_align = sc->hn_rndis_agg_align;
+ mtx_unlock(&txr->hn_tx_lock);
+ }
+}
+
static int
hn_get_txswq_depth(const struct hn_tx_ring *txr)
{
@@ -785,6 +894,12 @@
HN_LOCK_INIT(sc);
/*
+ * Initialize these tunables once.
+ */
+ sc->hn_agg_size = hn_tx_agg_size;
+ sc->hn_agg_pkts = hn_tx_agg_pkts;
+
+ /*
* Setup taskqueue for transmission.
*/
if (hn_tx_taskq == NULL) {
@@ -939,6 +1054,24 @@
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_ind",
CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
hn_rss_ind_sysctl, "IU", "RSS indirect table");
+ SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_size",
+ CTLFLAG_RD, &sc->hn_rndis_agg_size, 0,
+ "RNDIS offered packet transmission aggregation size limit");
+ SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_pkts",
+ CTLFLAG_RD, &sc->hn_rndis_agg_pkts, 0,
+ "RNDIS offered packet transmission aggregation count limit");
+ SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_align",
+ CTLFLAG_RD, &sc->hn_rndis_agg_align, 0,
+ "RNDIS packet transmission aggregation alignment");
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_size",
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
+ hn_txagg_size_sysctl, "I",
+ "Packet transmission aggregation size, 0 -- disable, -1 -- auto");
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pkts",
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
+ hn_txagg_pkts_sysctl, "I",
+ "Packet transmission aggregation packets, "
+ "0 -- disable, -1 -- auto");
/*
* Setup the ifmedia, which has been initialized earlier.
@@ -1189,16 +1322,45 @@
KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0,
("put an onlist txd %#x", txd->flags));
+ KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0,
+ ("put an onagg txd %#x", txd->flags));
KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs));
if (atomic_fetchadd_int(&txd->refs, -1) != 1)
return 0;
+ if (!STAILQ_EMPTY(&txd->agg_list)) {
+ struct hn_txdesc *tmp_txd;
+
+ while ((tmp_txd = STAILQ_FIRST(&txd->agg_list)) != NULL) {
+ int freed;
+
+ KASSERT(STAILQ_EMPTY(&tmp_txd->agg_list),
+ ("resursive aggregation on aggregated txdesc"));
+ KASSERT((tmp_txd->flags & HN_TXD_FLAG_ONAGG),
+ ("not aggregated txdesc"));
+ KASSERT((tmp_txd->flags & HN_TXD_FLAG_DMAMAP) == 0,
+ ("aggregated txdesc uses dmamap"));
+ KASSERT(tmp_txd->chim_index == HN_NVS_CHIM_IDX_INVALID,
+ ("aggregated txdesc consumes "
+ "chimney sending buffer"));
+ KASSERT(tmp_txd->chim_size == 0,
+ ("aggregated txdesc has non-zero "
+ "chimney sending size"));
+
+ STAILQ_REMOVE_HEAD(&txd->agg_list, agg_link);
+ tmp_txd->flags &= ~HN_TXD_FLAG_ONAGG;
+ freed = hn_txdesc_put(txr, tmp_txd);
+ KASSERT(freed, ("failed to free aggregated txdesc"));
+ }
+ }
+
if (txd->chim_index != HN_NVS_CHIM_IDX_INVALID) {
KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0,
("chim txd uses dmamap"));
hn_chim_free(txr->hn_sc, txd->chim_index);
txd->chim_index = HN_NVS_CHIM_IDX_INVALID;
+ txd->chim_size = 0;
} else if (txd->flags & HN_TXD_FLAG_DMAMAP) {
bus_dmamap_sync(txr->hn_tx_data_dtag,
txd->data_dmap, BUS_DMASYNC_POSTWRITE);
@@ -1253,8 +1415,11 @@
atomic_subtract_int(&txr->hn_txdesc_avail, 1);
#endif
KASSERT(txd->m == NULL && txd->refs == 0 &&
+ STAILQ_EMPTY(&txd->agg_list) &&
txd->chim_index == HN_NVS_CHIM_IDX_INVALID &&
+ txd->chim_size == 0 &&
(txd->flags & HN_TXD_FLAG_ONLIST) &&
+ (txd->flags & HN_TXD_FLAG_ONAGG) == 0 &&
(txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("invalid txd"));
txd->flags &= ~HN_TXD_FLAG_ONLIST;
txd->refs = 1;
@@ -1271,6 +1436,22 @@
atomic_add_int(&txd->refs, 1);
}
+static __inline void
+hn_txdesc_agg(struct hn_txdesc *agg_txd, struct hn_txdesc *txd)
+{
+
+ KASSERT((agg_txd->flags & HN_TXD_FLAG_ONAGG) == 0,
+ ("recursive aggregation on aggregating txdesc"));
+
+ KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0,
+ ("already aggregated"));
+ KASSERT(STAILQ_EMPTY(&txd->agg_list),
+ ("recursive aggregation on to-be-aggregated txdesc"));
+
+ txd->flags |= HN_TXD_FLAG_ONAGG;
+ STAILQ_INSERT_TAIL(&agg_txd->agg_list, txd, agg_link);
+}
+
static bool
hn_tx_ring_pending(struct hn_tx_ring *txr)
{
@@ -1382,12 +1563,123 @@
return (pi->rm_data);
}
+static __inline int
+hn_flush_txagg(struct ifnet *ifp, struct hn_tx_ring *txr)
+{
+ struct hn_txdesc *txd;
+ struct mbuf *m;
+ int error, pkts;
+
+ txd = txr->hn_agg_txd;
+ KASSERT(txd != NULL, ("no aggregate txdesc"));
+
+ /*
+ * Since hn_txpkt() will reset this temporary stat, save
+ * it now, so that oerrors can be updated properly, if
+ * hn_txpkt() ever fails.
+ */
+ pkts = txr->hn_stat_pkts;
+
+ /*
+ * Since txd's mbuf will _not_ be freed upon hn_txpkt()
+ * failure, save it for later freeing, if hn_txpkt() ever
+ * fails.
+ */
+ m = txd->m;
+ error = hn_txpkt(ifp, txr, txd);
+ if (__predict_false(error)) {
+ /* txd is freed, but m is not. */
+ m_freem(m);
+
+ txr->hn_flush_failed++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, pkts);
+ }
+
+ /* Reset all aggregation states. */
+ txr->hn_agg_txd = NULL;
+ txr->hn_agg_szleft = 0;
+ txr->hn_agg_pktleft = 0;
+ txr->hn_agg_prevpkt = NULL;
+
+ return (error);
+}
+
+static void *
+hn_try_txagg(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd,
+ int pktsize)
+{
+ void *chim;
+
+ if (txr->hn_agg_txd != NULL) {
+ if (txr->hn_agg_pktleft >= 1 && txr->hn_agg_szleft > pktsize) {
+ struct hn_txdesc *agg_txd = txr->hn_agg_txd;
+ struct rndis_packet_msg *pkt = txr->hn_agg_prevpkt;
+ int olen;
+
+ /*
+ * Update the previous RNDIS packet's total length,
+ * it can be increased due to the mandatory alignment
+ * padding for this RNDIS packet. And update the
+ * aggregating txdesc's chimney sending buffer size
+ * accordingly.
+ *
+ * XXX
+ * Zero-out the padding, as required by the RNDIS spec.
+ */
+ olen = pkt->rm_len;
+ pkt->rm_len = roundup2(olen, txr->hn_agg_align);
+ agg_txd->chim_size += pkt->rm_len - olen;
+
+ /* Link this txdesc to the parent. */
+ hn_txdesc_agg(agg_txd, txd);
+
+ chim = (uint8_t *)pkt + pkt->rm_len;
+ /* Save the current packet for later fixup. */
+ txr->hn_agg_prevpkt = chim;
+
+ txr->hn_agg_pktleft--;
+ txr->hn_agg_szleft -= pktsize;
+ if (txr->hn_agg_szleft <=
+ HN_PKTSIZE_MIN(txr->hn_agg_align)) {
+ /*
+ * Probably can't aggregate more packets,
+ * flush this aggregating txdesc proactively.
+ */
+ txr->hn_agg_pktleft = 0;
+ }
+ /* Done! */
+ return (chim);
+ }
+ hn_flush_txagg(ifp, txr);
+ }
+ KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc"));
+
+ txr->hn_tx_chimney_tried++;
+ txd->chim_index = hn_chim_alloc(txr->hn_sc);
+ if (txd->chim_index == HN_NVS_CHIM_IDX_INVALID)
+ return (NULL);
+ txr->hn_tx_chimney++;
+
+ chim = txr->hn_sc->hn_chim +
+ (txd->chim_index * txr->hn_sc->hn_chim_szmax);
+
+ if (txr->hn_agg_pktmax > 1 &&
+ txr->hn_agg_szmax > pktsize + HN_PKTSIZE_MIN(txr->hn_agg_align)) {
+ txr->hn_agg_txd = txd;
+ txr->hn_agg_pktleft = txr->hn_agg_pktmax - 1;
+ txr->hn_agg_szleft = txr->hn_agg_szmax - pktsize;
+ txr->hn_agg_prevpkt = chim;
+ }
+ return (chim);
+}
+
/*
* NOTE:
* If this function fails, then both txd and m_head0 will be freed.
*/
static int
-hn_encap(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head0)
+hn_encap(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd,
+ struct mbuf **m_head0)
{
bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX];
int error, nsegs, i;
@@ -1395,33 +1687,30 @@
struct rndis_packet_msg *pkt;
uint32_t *pi_data;
void *chim = NULL;
- int pktlen;
+ int pkt_hlen, pkt_size;
pkt = txd->rndis_pkt;
- if (m_head->m_pkthdr.len + HN_RNDIS_PKT_LEN < txr->hn_chim_size) {
- /*
- * This packet is small enough to fit into a chimney sending
- * buffer. Try allocating one chimney sending buffer now.
- */
- txr->hn_tx_chimney_tried++;
- txd->chim_index = hn_chim_alloc(txr->hn_sc);
- if (txd->chim_index != HN_NVS_CHIM_IDX_INVALID) {
- chim = txr->hn_sc->hn_chim +
- (txd->chim_index * txr->hn_sc->hn_chim_szmax);
- /*
- * Directly fill the chimney sending buffer w/ the
- * RNDIS packet message.
- */
+ pkt_size = HN_PKTSIZE(m_head, txr->hn_agg_align);
+ if (pkt_size < txr->hn_chim_size) {
+ chim = hn_try_txagg(ifp, txr, txd, pkt_size);
+ if (chim != NULL)
pkt = chim;
- }
+ } else {
+ if (txr->hn_agg_txd != NULL)
+ hn_flush_txagg(ifp, txr);
}
pkt->rm_type = REMOTE_NDIS_PACKET_MSG;
pkt->rm_len = sizeof(*pkt) + m_head->m_pkthdr.len;
pkt->rm_dataoffset = sizeof(*pkt);
pkt->rm_datalen = m_head->m_pkthdr.len;
+ pkt->rm_oobdataoffset = 0;
+ pkt->rm_oobdatalen = 0;
+ pkt->rm_oobdataelements = 0;
pkt->rm_pktinfooffset = sizeof(*pkt);
pkt->rm_pktinfolen = 0;
+ pkt->rm_vchandle = 0;
+ pkt->rm_reserved = 0;
if (txr->hn_tx_flags & HN_TX_FLAG_HASHVAL) {
/*
@@ -1482,7 +1771,7 @@
*pi_data |= NDIS_TXCSUM_INFO_UDPCS;
}
- pktlen = pkt->rm_pktinfooffset + pkt->rm_pktinfolen;
+ pkt_hlen = pkt->rm_pktinfooffset + pkt->rm_pktinfolen;
/* Convert RNDIS packet message offsets */
pkt->rm_dataoffset = hn_rndis_pktmsg_offset(pkt->rm_dataoffset);
pkt->rm_pktinfooffset = hn_rndis_pktmsg_offset(pkt->rm_pktinfooffset);
@@ -1491,25 +1780,36 @@
* Fast path: Chimney sending.
*/
if (chim != NULL) {
- KASSERT(txd->chim_index != HN_NVS_CHIM_IDX_INVALID,
- ("chimney buffer is not used"));
- KASSERT(pkt == chim, ("RNDIS pkt not in chimney buffer"));
+ struct hn_txdesc *tgt_txd = txd;
+
+ if (txr->hn_agg_txd != NULL) {
+ tgt_txd = txr->hn_agg_txd;
+#ifdef INVARIANTS
+ *m_head0 = NULL;
+#endif
+ }
+
+ KASSERT(pkt == chim,
+ ("RNDIS pkt not in chimney sending buffer"));
+ KASSERT(tgt_txd->chim_index != HN_NVS_CHIM_IDX_INVALID,
+ ("chimney sending buffer is not used"));
+ tgt_txd->chim_size += pkt->rm_len;
m_copydata(m_head, 0, m_head->m_pkthdr.len,
- ((uint8_t *)chim) + pktlen);
+ ((uint8_t *)chim) + pkt_hlen);
- txd->chim_size = pkt->rm_len;
txr->hn_gpa_cnt = 0;
- txr->hn_tx_chimney++;
txr->hn_sendpkt = hn_txpkt_chim;
goto done;
}
+
+ KASSERT(txr->hn_agg_txd == NULL, ("aggregating sglist txdesc"));
KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID,
("chimney buffer is used"));
KASSERT(pkt == txd->rndis_pkt, ("RNDIS pkt not in txdesc"));
error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs);
- if (error) {
+ if (__predict_false(error)) {
int freed;
/*
@@ -1523,7 +1823,7 @@
("fail to free txd upon txdma error"));
txr->hn_txdma_failed++;
- if_inc_counter(txr->hn_sc->hn_ifp, IFCOUNTER_OERRORS, 1);
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
return error;
}
*m_head0 = m_head;
@@ -1534,7 +1834,7 @@
/* send packet with page buffer */
txr->hn_gpa[0].gpa_page = atop(txd->rndis_pkt_paddr);
txr->hn_gpa[0].gpa_ofs = txd->rndis_pkt_paddr & PAGE_MASK;
- txr->hn_gpa[0].gpa_len = pktlen;
+ txr->hn_gpa[0].gpa_len = pkt_hlen;
/*
* Fill the page buffers with mbuf info after the page
@@ -1557,6 +1857,12 @@
/* Set the completion routine */
hn_nvs_sendctx_init(&txd->send_ctx, hn_txpkt_done, txd);
+ /* Update temporary stats for later use. */
+ txr->hn_stat_pkts++;
+ txr->hn_stat_size += m_head->m_pkthdr.len;
+ if (m_head->m_flags & M_MCAST)
+ txr->hn_stat_mcasts++;
+
return 0;
}
@@ -1572,23 +1878,34 @@
again:
/*
- * Make sure that txd is not freed before ETHER_BPF_MTAP.
+ * Make sure that this txd and any aggregated txds are not freed
+ * before ETHER_BPF_MTAP.
*/
hn_txdesc_hold(txd);
error = txr->hn_sendpkt(txr, txd);
if (!error) {
- ETHER_BPF_MTAP(ifp, txd->m);
- if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+ if (bpf_peers_present(ifp->if_bpf)) {
+ const struct hn_txdesc *tmp_txd;
+
+ ETHER_BPF_MTAP(ifp, txd->m);
+ STAILQ_FOREACH(tmp_txd, &txd->agg_list, agg_link)
+ ETHER_BPF_MTAP(ifp, tmp_txd->m);
+ }
+
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, txr->hn_stat_pkts);
#ifdef HN_IFSTART_SUPPORT
if (!hn_use_if_start)
#endif
{
if_inc_counter(ifp, IFCOUNTER_OBYTES,
- txd->m->m_pkthdr.len);
- if (txd->m->m_flags & M_MCAST)
- if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
+ txr->hn_stat_size);
+ if (txr->hn_stat_mcasts != 0) {
+ if_inc_counter(ifp, IFCOUNTER_OMCASTS,
+ txr->hn_stat_mcasts);
+ }
}
- txr->hn_pkts++;
+ txr->hn_pkts += txr->hn_stat_pkts;
+ txr->hn_sends++;
}
hn_txdesc_put(txr, txd);
@@ -1628,7 +1945,13 @@
txr->hn_send_failed++;
}
- return error;
+
+ /* Reset temporary stats, after this sending is done. */
+ txr->hn_stat_size = 0;
+ txr->hn_stat_pkts = 0;
+ txr->hn_stat_mcasts = 0;
+
+ return (error);
}
/*
@@ -2412,6 +2735,64 @@
}
static int
+hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS)
+{
+ struct hn_softc *sc = arg1;
+ int error, size;
+
+ size = sc->hn_agg_size;
+ error = sysctl_handle_int(oidp, &size, 0, req);
+ if (error || req->newptr == NULL)
+ return (error);
+
+ HN_LOCK(sc);
+ sc->hn_agg_size = size;
+ hn_set_txagg(sc);
+ HN_UNLOCK(sc);
+
+ return (0);
+}
+
+static int
+hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS)
+{
+ struct hn_softc *sc = arg1;
+ int error, pkts;
+
+ pkts = sc->hn_agg_pkts;
+ error = sysctl_handle_int(oidp, &pkts, 0, req);
+ if (error || req->newptr == NULL)
+ return (error);
+
+ HN_LOCK(sc);
+ sc->hn_agg_pkts = pkts;
+ hn_set_txagg(sc);
+ HN_UNLOCK(sc);
+
+ return (0);
+}
+
+static int
+hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS)
+{
+ struct hn_softc *sc = arg1;
+ int pkts;
+
+ pkts = sc->hn_tx_ring[0].hn_agg_pktmax;
+ return (sysctl_handle_int(oidp, &pkts, 0, req));
+}
+
+static int
+hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS)
+{
+ struct hn_softc *sc = arg1;
+ int align;
+
+ align = sc->hn_tx_ring[0].hn_agg_align;
+ return (sysctl_handle_int(oidp, &align, 0, req));
+}
+
+static int
hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS)
{
struct hn_softc *sc = arg1;
@@ -2954,6 +3335,7 @@
txd->txr = txr;
txd->chim_index = HN_NVS_CHIM_IDX_INVALID;
+ STAILQ_INIT(&txd->agg_list);
/*
* Allocate and load RNDIS packet message.
@@ -3037,6 +3419,8 @@
SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "packets",
CTLFLAG_RW, &txr->hn_pkts,
"# of packets transmitted");
+ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "sends",
+ CTLFLAG_RW, &txr->hn_sends, "# of sends");
}
}
@@ -3151,6 +3535,11 @@
CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
__offsetof(struct hn_tx_ring, hn_txdma_failed),
hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure");
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_flush_failed",
+ CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
+ __offsetof(struct hn_tx_ring, hn_flush_failed),
+ hn_tx_stat_ulong_sysctl, "LU",
+ "# of packet transmission aggregation flush failure");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed",
CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
__offsetof(struct hn_tx_ring, hn_tx_collapsed),
@@ -3187,6 +3576,17 @@
CTLFLAG_RD, &sc->hn_tx_ring_cnt, 0, "# created TX rings");
SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_inuse",
CTLFLAG_RD, &sc->hn_tx_ring_inuse, 0, "# used TX rings");
+ SYSCTL_ADD_INT(ctx, child, OID_AUTO, "agg_szmax",
+ CTLFLAG_RD, &sc->hn_tx_ring[0].hn_agg_szmax, 0,
+ "Applied packet transmission aggregation size");
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pktmax",
+ CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
+ hn_txagg_pktmax_sysctl, "I",
+ "Applied packet transmission aggregation packets");
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_align",
+ CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
+ hn_txagg_align_sysctl, "I",
+ "Applied packet transmission aggregation alignment");
return 0;
}
@@ -3306,18 +3706,20 @@
{
struct hn_softc *sc = txr->hn_sc;
struct ifnet *ifp = sc->hn_ifp;
+ int sched = 0;
KASSERT(hn_use_if_start,
("hn_start_locked is called, when if_start is disabled"));
KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
mtx_assert(&txr->hn_tx_lock, MA_OWNED);
+ KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc"));
if (__predict_false(txr->hn_suspended))
- return 0;
+ return (0);
if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
IFF_DRV_RUNNING)
- return 0;
+ return (0);
while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
struct hn_txdesc *txd;
@@ -3335,7 +3737,8 @@
* following up packets) to tx taskqueue.
*/
IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
- return 1;
+ sched = 1;
+ break;
}
#if defined(INET6) || defined(INET)
@@ -3356,21 +3759,50 @@
break;
}
- error = hn_encap(txr, txd, &m_head);
+ error = hn_encap(ifp, txr, txd, &m_head);
if (error) {
/* Both txd and m_head are freed */
+ KASSERT(txr->hn_agg_txd == NULL,
+ ("encap failed w/ pending aggregating txdesc"));
continue;
}
- error = hn_txpkt(ifp, txr, txd);
- if (__predict_false(error)) {
- /* txd is freed, but m_head is not */
- IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
- atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
- break;
+ if (txr->hn_agg_pktleft == 0) {
+ if (txr->hn_agg_txd != NULL) {
+ KASSERT(m_head == NULL,
+ ("pending mbuf for aggregating txdesc"));
+ error = hn_flush_txagg(ifp, txr);
+ if (__predict_false(error)) {
+ atomic_set_int(&ifp->if_drv_flags,
+ IFF_DRV_OACTIVE);
+ break;
+ }
+ } else {
+ KASSERT(m_head != NULL, ("mbuf was freed"));
+ error = hn_txpkt(ifp, txr, txd);
+ if (__predict_false(error)) {
+ /* txd is freed, but m_head is not */
+ IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
+ atomic_set_int(&ifp->if_drv_flags,
+ IFF_DRV_OACTIVE);
+ break;
+ }
+ }
+ }
+#ifdef INVARIANTS
+ else {
+ KASSERT(txr->hn_agg_txd != NULL,
+ ("no aggregating txdesc"));
+ KASSERT(m_head == NULL,
+ ("pending mbuf for aggregating txdesc"));
}
+#endif
}
- return 0;
+
+ /* Flush pending aggerated transmission. */
+ if (txr->hn_agg_txd != NULL)
+ hn_flush_txagg(ifp, txr);
+ return (sched);
}
static void
@@ -3447,18 +3879,20 @@
struct hn_softc *sc = txr->hn_sc;
struct ifnet *ifp = sc->hn_ifp;
struct mbuf *m_head;
+ int sched = 0;
mtx_assert(&txr->hn_tx_lock, MA_OWNED);
#ifdef HN_IFSTART_SUPPORT
KASSERT(hn_use_if_start == 0,
("hn_xmit is called, when if_start is enabled"));
#endif
+ KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc"));
if (__predict_false(txr->hn_suspended))
- return 0;
+ return (0);
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || txr->hn_oactive)
- return 0;
+ return (0);
while ((m_head = drbr_peek(ifp, txr->hn_mbuf_br)) != NULL) {
struct hn_txdesc *txd;
@@ -3471,7 +3905,8 @@
* following up packets) to tx taskqueue.
*/
drbr_putback(ifp, txr->hn_mbuf_br, m_head);
- return 1;
+ sched = 1;
+ break;
}
txd = hn_txdesc_get(txr);
@@ -3482,25 +3917,53 @@
break;
}
- error = hn_encap(txr, txd, &m_head);
+ error = hn_encap(ifp, txr, txd, &m_head);
if (error) {
/* Both txd and m_head are freed; discard */
+ KASSERT(txr->hn_agg_txd == NULL,
+ ("encap failed w/ pending aggregating txdesc"));
drbr_advance(ifp, txr->hn_mbuf_br);
continue;
}
- error = hn_txpkt(ifp, txr, txd);
- if (__predict_false(error)) {
- /* txd is freed, but m_head is not */
- drbr_putback(ifp, txr->hn_mbuf_br, m_head);
- txr->hn_oactive = 1;
- break;
+ if (txr->hn_agg_pktleft == 0) {
+ if (txr->hn_agg_txd != NULL) {
+ KASSERT(m_head == NULL,
+ ("pending mbuf for aggregating txdesc"));
+ error = hn_flush_txagg(ifp, txr);
+ if (__predict_false(error)) {
+ txr->hn_oactive = 1;
+ break;
+ }
+ } else {
+ KASSERT(m_head != NULL, ("mbuf was freed"));
+ error = hn_txpkt(ifp, txr, txd);
+ if (__predict_false(error)) {
+ /* txd is freed, but m_head is not */
+ drbr_putback(ifp, txr->hn_mbuf_br,
+ m_head);
+ txr->hn_oactive = 1;
+ break;
+ }
+ }
}
+#ifdef INVARIANTS
+ else {
+ KASSERT(txr->hn_agg_txd != NULL,
+ ("no aggregating txdesc"));
+ KASSERT(m_head == NULL,
+ ("pending mbuf for aggregating txdesc"));
+ }
+#endif
/* Sent */
drbr_advance(ifp, txr->hn_mbuf_br);
}
- return 0;
+
+ /* Flush pending aggerated transmission. */
+ if (txr->hn_agg_txd != NULL)
+ hn_flush_txagg(ifp, txr);
+ return (sched);
}
static int
@@ -3978,6 +4441,11 @@
if (error)
return (error);
+ /*
+ * Fixup transmission aggregation setup.
+ */
+ hn_set_txagg(sc);
+
sc->hn_flags |= HN_FLAG_SYNTH_ATTACHED;
return (0);
}
Index: head/sys/dev/hyperv/netvsc/if_hnvar.h
===================================================================
--- head/sys/dev/hyperv/netvsc/if_hnvar.h
+++ head/sys/dev/hyperv/netvsc/if_hnvar.h
@@ -125,6 +125,22 @@
bus_dma_tag_t hn_tx_data_dtag;
uint64_t hn_csum_assist;
+ /* Applied packet transmission aggregation limits. */
+ int hn_agg_szmax;
+ short hn_agg_pktmax;
+ short hn_agg_align;
+
+ /* Packet transmission aggregation states. */
+ struct hn_txdesc *hn_agg_txd;
+ int hn_agg_szleft;
+ short hn_agg_pktleft;
+ struct rndis_packet_msg *hn_agg_prevpkt;
+
+ /* Temporary stats for each sends. */
+ int hn_stat_size;
+ short hn_stat_pkts;
+ short hn_stat_mcasts;
+
int (*hn_sendpkt)(struct hn_tx_ring *, struct hn_txdesc *);
int hn_suspended;
int hn_gpa_cnt;
@@ -137,6 +153,8 @@
u_long hn_tx_chimney_tried;
u_long hn_tx_chimney;
u_long hn_pkts;
+ u_long hn_sends;
+ u_long hn_flush_failed;
/* Rarely used stuffs */
struct hn_txdesc *hn_txdesc;
@@ -180,6 +198,10 @@
uint32_t hn_nvs_ver;
uint32_t hn_rx_filter;
+ /* Packet transmission aggregation user settings. */
+ int hn_agg_size;
+ int hn_agg_pkts;
+
struct taskqueue *hn_mgmt_taskq;
struct taskqueue *hn_mgmt_taskq0;
struct task hn_link_task;
@@ -200,6 +222,9 @@
uint32_t hn_ndis_ver;
int hn_ndis_tso_szmax;
int hn_ndis_tso_sgmin;
+ uint32_t hn_rndis_agg_size;
+ uint32_t hn_rndis_agg_pkts;
+ uint32_t hn_rndis_agg_align;
int hn_rss_ind_size;
uint32_t hn_rss_hash; /* NDIS_HASH_ */
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Dec 15, 5:32 PM (22 h, 5 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
9091813
Default Alt Text
D8560.diff (27 KB)
Attached To
Mode
D8560: hyperv/hn: Implement RNDIS multi-packet message support.
Attached
Detach File
Event Timeline
Log In to Comment