diff --git a/sys/cam/ctl/ctl_ha.c b/sys/cam/ctl/ctl_ha.c index 0828c46c8863..695006ed99e1 100644 --- a/sys/cam/ctl/ctl_ha.c +++ b/sys/cam/ctl/ctl_ha.c @@ -1,947 +1,942 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2015 Alexander Motin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct ha_msg_wire { uint32_t channel; uint32_t length; }; struct ha_dt_msg_wire { ctl_ha_dt_cmd command; uint32_t size; uint8_t *local; uint8_t *remote; }; struct ha_softc { struct ctl_softc *ha_ctl_softc; ctl_evt_handler ha_handler[CTL_HA_CHAN_MAX]; char ha_peer[128]; struct sockaddr_in ha_peer_in; struct socket *ha_lso; struct socket *ha_so; struct mbufq ha_sendq; struct mbuf *ha_sending; struct mtx ha_lock; int ha_connect; int ha_listen; int ha_connected; int ha_receiving; int ha_wakeup; int ha_disconnect; int ha_shutdown; eventhandler_tag ha_shutdown_eh; TAILQ_HEAD(, ctl_ha_dt_req) ha_dts; } ha_softc; static void ctl_ha_conn_wake(struct ha_softc *softc) { mtx_lock(&softc->ha_lock); softc->ha_wakeup = 1; mtx_unlock(&softc->ha_lock); wakeup(&softc->ha_wakeup); } static int ctl_ha_lupcall(struct socket *so, void *arg, int waitflag) { struct ha_softc *softc = arg; ctl_ha_conn_wake(softc); return (SU_OK); } static int ctl_ha_rupcall(struct socket *so, void *arg, int waitflag) { struct ha_softc *softc = arg; wakeup(&softc->ha_receiving); return (SU_OK); } static int ctl_ha_supcall(struct socket *so, void *arg, int waitflag) { struct ha_softc *softc = arg; ctl_ha_conn_wake(softc); return (SU_OK); } static void ctl_ha_evt(struct ha_softc *softc, ctl_ha_channel ch, ctl_ha_event evt, int param) { int i; if (ch < CTL_HA_CHAN_MAX) { if (softc->ha_handler[ch]) softc->ha_handler[ch](ch, evt, param); return; } for (i = 0; i < CTL_HA_CHAN_MAX; i++) { if (softc->ha_handler[i]) softc->ha_handler[i](i, evt, param); } } static void ctl_ha_close(struct ha_softc *softc) { struct socket *so = softc->ha_so; int report = 0; if (softc->ha_connected || softc->ha_disconnect) { softc->ha_connected = 0; mbufq_drain(&softc->ha_sendq); m_freem(softc->ha_sending); softc->ha_sending = NULL; report = 1; } if (so) { SOCKBUF_LOCK(&so->so_rcv); soupcall_clear(so, SO_RCV); while (softc->ha_receiving) { wakeup(&softc->ha_receiving); msleep(&softc->ha_receiving, SOCKBUF_MTX(&so->so_rcv), 0, "ha_rx exit", 0); } SOCKBUF_UNLOCK(&so->so_rcv); SOCKBUF_LOCK(&so->so_snd); soupcall_clear(so, SO_SND); SOCKBUF_UNLOCK(&so->so_snd); softc->ha_so = NULL; if (softc->ha_connect) pause("reconnect", hz / 2); soclose(so); } if (report) { ctl_ha_evt(softc, CTL_HA_CHAN_MAX, CTL_HA_EVT_LINK_CHANGE, (softc->ha_connect || softc->ha_listen) ? CTL_HA_LINK_UNKNOWN : CTL_HA_LINK_OFFLINE); } } static void ctl_ha_lclose(struct ha_softc *softc) { if (softc->ha_lso) { if (SOLISTENING(softc->ha_lso)) { SOLISTEN_LOCK(softc->ha_lso); solisten_upcall_set(softc->ha_lso, NULL, NULL); SOLISTEN_UNLOCK(softc->ha_lso); } soclose(softc->ha_lso); softc->ha_lso = NULL; } } static void ctl_ha_rx_thread(void *arg) { struct ha_softc *softc = arg; struct socket *so = softc->ha_so; struct ha_msg_wire wire_hdr; struct uio uio; struct iovec iov; int error, flags, next; bzero(&wire_hdr, sizeof(wire_hdr)); while (1) { if (wire_hdr.length > 0) next = wire_hdr.length; else next = sizeof(wire_hdr); SOCKBUF_LOCK(&so->so_rcv); while (sbavail(&so->so_rcv) < next || softc->ha_disconnect) { if (softc->ha_connected == 0 || softc->ha_disconnect || so->so_error || (so->so_rcv.sb_state & SBS_CANTRCVMORE)) { goto errout; } so->so_rcv.sb_lowat = next; msleep(&softc->ha_receiving, SOCKBUF_MTX(&so->so_rcv), 0, "-", 0); } SOCKBUF_UNLOCK(&so->so_rcv); if (wire_hdr.length == 0) { iov.iov_base = &wire_hdr; iov.iov_len = sizeof(wire_hdr); uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_rw = UIO_READ; uio.uio_segflg = UIO_SYSSPACE; uio.uio_td = curthread; uio.uio_resid = sizeof(wire_hdr); flags = MSG_DONTWAIT; error = soreceive(softc->ha_so, NULL, &uio, NULL, NULL, &flags); if (error != 0) { printf("%s: header receive error %d\n", __func__, error); SOCKBUF_LOCK(&so->so_rcv); goto errout; } } else { ctl_ha_evt(softc, wire_hdr.channel, CTL_HA_EVT_MSG_RECV, wire_hdr.length); wire_hdr.length = 0; } } errout: softc->ha_receiving = 0; wakeup(&softc->ha_receiving); SOCKBUF_UNLOCK(&so->so_rcv); ctl_ha_conn_wake(softc); kthread_exit(); } static void ctl_ha_send(struct ha_softc *softc) { struct socket *so = softc->ha_so; int error; while (1) { if (softc->ha_sending == NULL) { mtx_lock(&softc->ha_lock); softc->ha_sending = mbufq_dequeue(&softc->ha_sendq); mtx_unlock(&softc->ha_lock); if (softc->ha_sending == NULL) { so->so_snd.sb_lowat = so->so_snd.sb_hiwat + 1; break; } } SOCKBUF_LOCK(&so->so_snd); if (sbspace(&so->so_snd) < softc->ha_sending->m_pkthdr.len) { so->so_snd.sb_lowat = softc->ha_sending->m_pkthdr.len; SOCKBUF_UNLOCK(&so->so_snd); break; } SOCKBUF_UNLOCK(&so->so_snd); error = sosend(softc->ha_so, NULL, NULL, softc->ha_sending, NULL, MSG_DONTWAIT, curthread); softc->ha_sending = NULL; if (error != 0) { printf("%s: sosend() error %d\n", __func__, error); return; } } } static void ctl_ha_sock_setup(struct ha_softc *softc) { struct sockopt opt; struct socket *so = softc->ha_so; int error, val; val = 1024 * 1024; error = soreserve(so, val, val); if (error) printf("%s: soreserve failed %d\n", __func__, error); SOCKBUF_LOCK(&so->so_rcv); so->so_rcv.sb_lowat = sizeof(struct ha_msg_wire); soupcall_set(so, SO_RCV, ctl_ha_rupcall, softc); SOCKBUF_UNLOCK(&so->so_rcv); SOCKBUF_LOCK(&so->so_snd); so->so_snd.sb_lowat = sizeof(struct ha_msg_wire); soupcall_set(so, SO_SND, ctl_ha_supcall, softc); SOCKBUF_UNLOCK(&so->so_snd); bzero(&opt, sizeof(struct sockopt)); opt.sopt_dir = SOPT_SET; opt.sopt_level = SOL_SOCKET; opt.sopt_name = SO_KEEPALIVE; opt.sopt_val = &val; opt.sopt_valsize = sizeof(val); val = 1; error = sosetopt(so, &opt); if (error) printf("%s: KEEPALIVE setting failed %d\n", __func__, error); opt.sopt_level = IPPROTO_TCP; opt.sopt_name = TCP_NODELAY; val = 1; error = sosetopt(so, &opt); if (error) printf("%s: NODELAY setting failed %d\n", __func__, error); opt.sopt_name = TCP_KEEPINIT; val = 3; error = sosetopt(so, &opt); if (error) printf("%s: KEEPINIT setting failed %d\n", __func__, error); opt.sopt_name = TCP_KEEPIDLE; val = 1; error = sosetopt(so, &opt); if (error) printf("%s: KEEPIDLE setting failed %d\n", __func__, error); opt.sopt_name = TCP_KEEPINTVL; val = 1; error = sosetopt(so, &opt); if (error) printf("%s: KEEPINTVL setting failed %d\n", __func__, error); opt.sopt_name = TCP_KEEPCNT; val = 5; error = sosetopt(so, &opt); if (error) printf("%s: KEEPCNT setting failed %d\n", __func__, error); } static int ctl_ha_connect(struct ha_softc *softc) { struct thread *td = curthread; struct sockaddr_in sa; struct socket *so; int error; /* Create the socket */ error = socreate(PF_INET, &so, SOCK_STREAM, IPPROTO_TCP, td->td_ucred, td); if (error != 0) { printf("%s: socreate() error %d\n", __func__, error); return (error); } softc->ha_so = so; ctl_ha_sock_setup(softc); memcpy(&sa, &softc->ha_peer_in, sizeof(sa)); error = soconnect(so, (struct sockaddr *)&sa, td); if (error != 0) { if (bootverbose) printf("%s: soconnect() error %d\n", __func__, error); goto out; } return (0); out: ctl_ha_close(softc); return (error); } static int ctl_ha_accept(struct ha_softc *softc) { struct socket *lso, *so; - struct sockaddr *sap; + struct sockaddr_in sin = { .sin_len = sizeof(sin) }; int error; lso = softc->ha_lso; SOLISTEN_LOCK(lso); error = solisten_dequeue(lso, &so, 0); if (error == EWOULDBLOCK) return (error); if (error) { printf("%s: socket error %d\n", __func__, error); goto out; } - sap = NULL; - error = soaccept(so, &sap); + error = soaccept(so, (struct sockaddr *)&sin); if (error != 0) { printf("%s: soaccept() error %d\n", __func__, error); - if (sap != NULL) - free(sap, M_SONAME); goto out; } - if (sap != NULL) - free(sap, M_SONAME); softc->ha_so = so; ctl_ha_sock_setup(softc); return (0); out: ctl_ha_lclose(softc); return (error); } static int ctl_ha_listen(struct ha_softc *softc) { struct thread *td = curthread; struct sockaddr_in sa; struct sockopt opt; int error, val; /* Create the socket */ if (softc->ha_lso == NULL) { error = socreate(PF_INET, &softc->ha_lso, SOCK_STREAM, IPPROTO_TCP, td->td_ucred, td); if (error != 0) { printf("%s: socreate() error %d\n", __func__, error); return (error); } bzero(&opt, sizeof(struct sockopt)); opt.sopt_dir = SOPT_SET; opt.sopt_level = SOL_SOCKET; opt.sopt_name = SO_REUSEADDR; opt.sopt_val = &val; opt.sopt_valsize = sizeof(val); val = 1; error = sosetopt(softc->ha_lso, &opt); if (error) { printf("%s: REUSEADDR setting failed %d\n", __func__, error); } bzero(&opt, sizeof(struct sockopt)); opt.sopt_dir = SOPT_SET; opt.sopt_level = SOL_SOCKET; opt.sopt_name = SO_REUSEPORT; opt.sopt_val = &val; opt.sopt_valsize = sizeof(val); val = 1; error = sosetopt(softc->ha_lso, &opt); if (error) { printf("%s: REUSEPORT setting failed %d\n", __func__, error); } } memcpy(&sa, &softc->ha_peer_in, sizeof(sa)); error = sobind(softc->ha_lso, (struct sockaddr *)&sa, td); if (error != 0) { printf("%s: sobind() error %d\n", __func__, error); goto out; } error = solisten(softc->ha_lso, 1, td); if (error != 0) { printf("%s: solisten() error %d\n", __func__, error); goto out; } SOLISTEN_LOCK(softc->ha_lso); softc->ha_lso->so_state |= SS_NBIO; solisten_upcall_set(softc->ha_lso, ctl_ha_lupcall, softc); SOLISTEN_UNLOCK(softc->ha_lso); return (0); out: ctl_ha_lclose(softc); return (error); } static void ctl_ha_conn_thread(void *arg) { struct ha_softc *softc = arg; int error; while (1) { if (softc->ha_disconnect || softc->ha_shutdown) { ctl_ha_close(softc); if (softc->ha_disconnect == 2 || softc->ha_shutdown) ctl_ha_lclose(softc); softc->ha_disconnect = 0; if (softc->ha_shutdown) break; } else if (softc->ha_so != NULL && (softc->ha_so->so_error || softc->ha_so->so_rcv.sb_state & SBS_CANTRCVMORE)) ctl_ha_close(softc); if (softc->ha_so == NULL) { if (softc->ha_lso != NULL) ctl_ha_accept(softc); else if (softc->ha_listen) ctl_ha_listen(softc); else if (softc->ha_connect) ctl_ha_connect(softc); } if (softc->ha_so != NULL) { if (softc->ha_connected == 0 && softc->ha_so->so_error == 0 && (softc->ha_so->so_state & SS_ISCONNECTING) == 0) { softc->ha_connected = 1; ctl_ha_evt(softc, CTL_HA_CHAN_MAX, CTL_HA_EVT_LINK_CHANGE, CTL_HA_LINK_ONLINE); softc->ha_receiving = 1; error = kproc_kthread_add(ctl_ha_rx_thread, softc, &softc->ha_ctl_softc->ctl_proc, NULL, 0, 0, "ctl", "ha_rx"); if (error != 0) { printf("Error creating CTL HA rx thread!\n"); softc->ha_receiving = 0; softc->ha_disconnect = 1; } } ctl_ha_send(softc); } mtx_lock(&softc->ha_lock); if (softc->ha_so != NULL && (softc->ha_so->so_error || softc->ha_so->so_rcv.sb_state & SBS_CANTRCVMORE)) ; else if (!softc->ha_wakeup) msleep(&softc->ha_wakeup, &softc->ha_lock, 0, "-", hz); softc->ha_wakeup = 0; mtx_unlock(&softc->ha_lock); } mtx_lock(&softc->ha_lock); softc->ha_shutdown = 2; wakeup(&softc->ha_wakeup); mtx_unlock(&softc->ha_lock); kthread_exit(); } static int ctl_ha_peer_sysctl(SYSCTL_HANDLER_ARGS) { struct ha_softc *softc = (struct ha_softc *)arg1; struct sockaddr_in *sa; int error, b1, b2, b3, b4, p, num; char buf[128]; strlcpy(buf, softc->ha_peer, sizeof(buf)); error = sysctl_handle_string(oidp, buf, sizeof(buf), req); if ((error != 0) || (req->newptr == NULL) || strncmp(buf, softc->ha_peer, sizeof(buf)) == 0) return (error); sa = &softc->ha_peer_in; mtx_lock(&softc->ha_lock); if ((num = sscanf(buf, "connect %d.%d.%d.%d:%d", &b1, &b2, &b3, &b4, &p)) >= 4) { softc->ha_connect = 1; softc->ha_listen = 0; } else if ((num = sscanf(buf, "listen %d.%d.%d.%d:%d", &b1, &b2, &b3, &b4, &p)) >= 4) { softc->ha_connect = 0; softc->ha_listen = 1; } else { softc->ha_connect = 0; softc->ha_listen = 0; if (buf[0] != 0) { buf[0] = 0; error = EINVAL; } } strlcpy(softc->ha_peer, buf, sizeof(softc->ha_peer)); if (softc->ha_connect || softc->ha_listen) { memset(sa, 0, sizeof(*sa)); sa->sin_len = sizeof(struct sockaddr_in); sa->sin_family = AF_INET; sa->sin_port = htons((num >= 5) ? p : 999); sa->sin_addr.s_addr = htonl((b1 << 24) + (b2 << 16) + (b3 << 8) + b4); } softc->ha_disconnect = 2; softc->ha_wakeup = 1; mtx_unlock(&softc->ha_lock); wakeup(&softc->ha_wakeup); return (error); } ctl_ha_status ctl_ha_msg_register(ctl_ha_channel channel, ctl_evt_handler handler) { struct ha_softc *softc = &ha_softc; KASSERT(channel < CTL_HA_CHAN_MAX, ("Wrong CTL HA channel %d", channel)); softc->ha_handler[channel] = handler; return (CTL_HA_STATUS_SUCCESS); } ctl_ha_status ctl_ha_msg_deregister(ctl_ha_channel channel) { struct ha_softc *softc = &ha_softc; KASSERT(channel < CTL_HA_CHAN_MAX, ("Wrong CTL HA channel %d", channel)); softc->ha_handler[channel] = NULL; return (CTL_HA_STATUS_SUCCESS); } /* * Receive a message of the specified size. */ ctl_ha_status ctl_ha_msg_recv(ctl_ha_channel channel, void *addr, size_t len, int wait) { struct ha_softc *softc = &ha_softc; struct uio uio; struct iovec iov; int error, flags; if (!softc->ha_connected) return (CTL_HA_STATUS_DISCONNECT); iov.iov_base = addr; iov.iov_len = len; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_rw = UIO_READ; uio.uio_segflg = UIO_SYSSPACE; uio.uio_td = curthread; uio.uio_resid = len; flags = wait ? 0 : MSG_DONTWAIT; error = soreceive(softc->ha_so, NULL, &uio, NULL, NULL, &flags); if (error == 0) return (CTL_HA_STATUS_SUCCESS); /* Consider all errors fatal for HA sanity. */ mtx_lock(&softc->ha_lock); if (softc->ha_connected) { softc->ha_disconnect = 1; softc->ha_wakeup = 1; wakeup(&softc->ha_wakeup); } mtx_unlock(&softc->ha_lock); return (CTL_HA_STATUS_ERROR); } /* * Send a message of the specified size. */ ctl_ha_status ctl_ha_msg_send2(ctl_ha_channel channel, const void *addr, size_t len, const void *addr2, size_t len2, int wait) { struct ha_softc *softc = &ha_softc; struct mbuf *mb, *newmb; struct ha_msg_wire hdr; size_t copylen, off; if (!softc->ha_connected) return (CTL_HA_STATUS_DISCONNECT); newmb = m_getm2(NULL, sizeof(hdr) + len + len2, wait, MT_DATA, M_PKTHDR); if (newmb == NULL) { /* Consider all errors fatal for HA sanity. */ mtx_lock(&softc->ha_lock); if (softc->ha_connected) { softc->ha_disconnect = 1; softc->ha_wakeup = 1; wakeup(&softc->ha_wakeup); } mtx_unlock(&softc->ha_lock); printf("%s: Can't allocate mbuf chain\n", __func__); return (CTL_HA_STATUS_ERROR); } hdr.channel = channel; hdr.length = len + len2; mb = newmb; memcpy(mtodo(mb, 0), &hdr, sizeof(hdr)); mb->m_len += sizeof(hdr); off = 0; for (; mb != NULL && off < len; mb = mb->m_next) { copylen = min(M_TRAILINGSPACE(mb), len - off); memcpy(mtodo(mb, mb->m_len), (const char *)addr + off, copylen); mb->m_len += copylen; off += copylen; if (off == len) break; } KASSERT(off == len, ("%s: off (%zu) != len (%zu)", __func__, off, len)); off = 0; for (; mb != NULL && off < len2; mb = mb->m_next) { copylen = min(M_TRAILINGSPACE(mb), len2 - off); memcpy(mtodo(mb, mb->m_len), (const char *)addr2 + off, copylen); mb->m_len += copylen; off += copylen; } KASSERT(off == len2, ("%s: off (%zu) != len2 (%zu)", __func__, off, len2)); newmb->m_pkthdr.len = sizeof(hdr) + len + len2; mtx_lock(&softc->ha_lock); if (!softc->ha_connected) { mtx_unlock(&softc->ha_lock); m_freem(newmb); return (CTL_HA_STATUS_DISCONNECT); } mbufq_enqueue(&softc->ha_sendq, newmb); softc->ha_wakeup = 1; mtx_unlock(&softc->ha_lock); wakeup(&softc->ha_wakeup); return (CTL_HA_STATUS_SUCCESS); } ctl_ha_status ctl_ha_msg_send(ctl_ha_channel channel, const void *addr, size_t len, int wait) { return (ctl_ha_msg_send2(channel, addr, len, NULL, 0, wait)); } ctl_ha_status ctl_ha_msg_abort(ctl_ha_channel channel) { struct ha_softc *softc = &ha_softc; mtx_lock(&softc->ha_lock); softc->ha_disconnect = 1; softc->ha_wakeup = 1; mtx_unlock(&softc->ha_lock); wakeup(&softc->ha_wakeup); return (CTL_HA_STATUS_SUCCESS); } /* * Allocate a data transfer request structure. */ struct ctl_ha_dt_req * ctl_dt_req_alloc(void) { return (malloc(sizeof(struct ctl_ha_dt_req), M_CTL, M_WAITOK | M_ZERO)); } /* * Free a data transfer request structure. */ void ctl_dt_req_free(struct ctl_ha_dt_req *req) { free(req, M_CTL); } /* * Issue a DMA request for a single buffer. */ ctl_ha_status ctl_dt_single(struct ctl_ha_dt_req *req) { struct ha_softc *softc = &ha_softc; struct ha_dt_msg_wire wire_dt; ctl_ha_status status; wire_dt.command = req->command; wire_dt.size = req->size; wire_dt.local = req->local; wire_dt.remote = req->remote; if (req->command == CTL_HA_DT_CMD_READ && req->callback != NULL) { mtx_lock(&softc->ha_lock); TAILQ_INSERT_TAIL(&softc->ha_dts, req, links); mtx_unlock(&softc->ha_lock); ctl_ha_msg_send(CTL_HA_CHAN_DATA, &wire_dt, sizeof(wire_dt), M_WAITOK); return (CTL_HA_STATUS_WAIT); } if (req->command == CTL_HA_DT_CMD_READ) { status = ctl_ha_msg_send(CTL_HA_CHAN_DATA, &wire_dt, sizeof(wire_dt), M_WAITOK); } else { status = ctl_ha_msg_send2(CTL_HA_CHAN_DATA, &wire_dt, sizeof(wire_dt), req->local, req->size, M_WAITOK); } return (status); } static void ctl_dt_event_handler(ctl_ha_channel channel, ctl_ha_event event, int param) { struct ha_softc *softc = &ha_softc; struct ctl_ha_dt_req *req; ctl_ha_status isc_status; if (event == CTL_HA_EVT_MSG_RECV) { struct ha_dt_msg_wire wire_dt; uint8_t *tmp; int size; size = min(sizeof(wire_dt), param); isc_status = ctl_ha_msg_recv(CTL_HA_CHAN_DATA, &wire_dt, size, M_WAITOK); if (isc_status != CTL_HA_STATUS_SUCCESS) { printf("%s: Error receiving message: %d\n", __func__, isc_status); return; } if (wire_dt.command == CTL_HA_DT_CMD_READ) { wire_dt.command = CTL_HA_DT_CMD_WRITE; tmp = wire_dt.local; wire_dt.local = wire_dt.remote; wire_dt.remote = tmp; ctl_ha_msg_send2(CTL_HA_CHAN_DATA, &wire_dt, sizeof(wire_dt), wire_dt.local, wire_dt.size, M_WAITOK); } else if (wire_dt.command == CTL_HA_DT_CMD_WRITE) { isc_status = ctl_ha_msg_recv(CTL_HA_CHAN_DATA, wire_dt.remote, wire_dt.size, M_WAITOK); mtx_lock(&softc->ha_lock); TAILQ_FOREACH(req, &softc->ha_dts, links) { if (req->local == wire_dt.remote) { TAILQ_REMOVE(&softc->ha_dts, req, links); break; } } mtx_unlock(&softc->ha_lock); if (req) { req->ret = isc_status; req->callback(req); } } } else if (event == CTL_HA_EVT_LINK_CHANGE) { CTL_DEBUG_PRINT(("%s: Link state change to %d\n", __func__, param)); if (param != CTL_HA_LINK_ONLINE) { mtx_lock(&softc->ha_lock); while ((req = TAILQ_FIRST(&softc->ha_dts)) != NULL) { TAILQ_REMOVE(&softc->ha_dts, req, links); mtx_unlock(&softc->ha_lock); req->ret = CTL_HA_STATUS_DISCONNECT; req->callback(req); mtx_lock(&softc->ha_lock); } mtx_unlock(&softc->ha_lock); } } else { printf("%s: Unknown event %d\n", __func__, event); } } ctl_ha_status ctl_ha_msg_init(struct ctl_softc *ctl_softc) { struct ha_softc *softc = &ha_softc; int error; softc->ha_ctl_softc = ctl_softc; mtx_init(&softc->ha_lock, "CTL HA mutex", NULL, MTX_DEF); mbufq_init(&softc->ha_sendq, INT_MAX); TAILQ_INIT(&softc->ha_dts); error = kproc_kthread_add(ctl_ha_conn_thread, softc, &ctl_softc->ctl_proc, NULL, 0, 0, "ctl", "ha_tx"); if (error != 0) { printf("error creating CTL HA connection thread!\n"); mtx_destroy(&softc->ha_lock); return (CTL_HA_STATUS_ERROR); } softc->ha_shutdown_eh = EVENTHANDLER_REGISTER(shutdown_pre_sync, ctl_ha_msg_shutdown, ctl_softc, SHUTDOWN_PRI_FIRST); SYSCTL_ADD_PROC(&ctl_softc->sysctl_ctx, SYSCTL_CHILDREN(ctl_softc->sysctl_tree), OID_AUTO, "ha_peer", CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, softc, 0, ctl_ha_peer_sysctl, "A", "HA peer connection method"); if (ctl_ha_msg_register(CTL_HA_CHAN_DATA, ctl_dt_event_handler) != CTL_HA_STATUS_SUCCESS) { printf("%s: ctl_ha_msg_register failed.\n", __func__); } return (CTL_HA_STATUS_SUCCESS); }; void ctl_ha_msg_shutdown(struct ctl_softc *ctl_softc) { struct ha_softc *softc = &ha_softc; if (SCHEDULER_STOPPED()) return; /* Disconnect and shutdown threads. */ mtx_lock(&softc->ha_lock); if (softc->ha_shutdown < 2) { softc->ha_shutdown = 1; softc->ha_wakeup = 1; wakeup(&softc->ha_wakeup); while (softc->ha_shutdown < 2) { msleep(&softc->ha_wakeup, &softc->ha_lock, 0, "shutdown", hz); } } mtx_unlock(&softc->ha_lock); }; ctl_ha_status ctl_ha_msg_destroy(struct ctl_softc *ctl_softc) { struct ha_softc *softc = &ha_softc; if (softc->ha_shutdown_eh != NULL) { EVENTHANDLER_DEREGISTER(shutdown_pre_sync, softc->ha_shutdown_eh); softc->ha_shutdown_eh = NULL; } ctl_ha_msg_shutdown(ctl_softc); /* Just in case. */ if (ctl_ha_msg_deregister(CTL_HA_CHAN_DATA) != CTL_HA_STATUS_SUCCESS) printf("%s: ctl_ha_msg_deregister failed.\n", __func__); mtx_destroy(&softc->ha_lock); return (CTL_HA_STATUS_SUCCESS); }; diff --git a/sys/compat/linux/linux_socket.c b/sys/compat/linux/linux_socket.c index a5ed5c5c62db..2893e93bbcd7 100644 --- a/sys/compat/linux/linux_socket.c +++ b/sys/compat/linux/linux_socket.c @@ -1,2741 +1,2741 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 1995 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #include #endif #ifdef COMPAT_LINUX32 #include #include #include #else #include #include #endif #include #include #include #include #include #include #include _Static_assert(offsetof(struct l_ifreq, ifr_ifru) == offsetof(struct ifreq, ifr_ifru), "Linux ifreq members names should be equal to FreeeBSD"); _Static_assert(offsetof(struct l_ifreq, ifr_index) == offsetof(struct ifreq, ifr_index), "Linux ifreq members names should be equal to FreeeBSD"); _Static_assert(offsetof(struct l_ifreq, ifr_name) == offsetof(struct ifreq, ifr_name), "Linux ifreq members names should be equal to FreeeBSD"); #define SECURITY_CONTEXT_STRING "unconfined" static int linux_sendmsg_common(struct thread *, l_int, struct l_msghdr *, l_uint); static int linux_recvmsg_common(struct thread *, l_int, struct l_msghdr *, l_uint, struct msghdr *); static int linux_set_socket_flags(int, int *); #define SOL_NETLINK 270 static int linux_to_bsd_sockopt_level(int level) { if (level == LINUX_SOL_SOCKET) return (SOL_SOCKET); /* Remaining values are RFC-defined protocol numbers. */ return (level); } static int bsd_to_linux_sockopt_level(int level) { if (level == SOL_SOCKET) return (LINUX_SOL_SOCKET); return (level); } static int linux_to_bsd_ip_sockopt(int opt) { switch (opt) { /* known and translated sockopts */ case LINUX_IP_TOS: return (IP_TOS); case LINUX_IP_TTL: return (IP_TTL); case LINUX_IP_HDRINCL: return (IP_HDRINCL); case LINUX_IP_OPTIONS: return (IP_OPTIONS); case LINUX_IP_RECVOPTS: LINUX_RATELIMIT_MSG_NOTTESTED("IPv4 socket option IP_RECVOPTS"); return (IP_RECVOPTS); case LINUX_IP_RETOPTS: LINUX_RATELIMIT_MSG_NOTTESTED("IPv4 socket option IP_REETOPTS"); return (IP_RETOPTS); case LINUX_IP_RECVTTL: LINUX_RATELIMIT_MSG_NOTTESTED("IPv4 socket option IP_RECVTTL"); return (IP_RECVTTL); case LINUX_IP_RECVTOS: LINUX_RATELIMIT_MSG_NOTTESTED("IPv4 socket option IP_RECVTOS"); return (IP_RECVTOS); case LINUX_IP_FREEBIND: LINUX_RATELIMIT_MSG_NOTTESTED("IPv4 socket option IP_FREEBIND"); return (IP_BINDANY); case LINUX_IP_IPSEC_POLICY: /* we have this option, but not documented in ip(4) manpage */ LINUX_RATELIMIT_MSG_NOTTESTED("IPv4 socket option IP_IPSEC_POLICY"); return (IP_IPSEC_POLICY); case LINUX_IP_MINTTL: LINUX_RATELIMIT_MSG_NOTTESTED("IPv4 socket option IP_MINTTL"); return (IP_MINTTL); case LINUX_IP_MULTICAST_IF: return (IP_MULTICAST_IF); case LINUX_IP_MULTICAST_TTL: return (IP_MULTICAST_TTL); case LINUX_IP_MULTICAST_LOOP: return (IP_MULTICAST_LOOP); case LINUX_IP_ADD_MEMBERSHIP: return (IP_ADD_MEMBERSHIP); case LINUX_IP_DROP_MEMBERSHIP: return (IP_DROP_MEMBERSHIP); case LINUX_IP_UNBLOCK_SOURCE: LINUX_RATELIMIT_MSG_NOTTESTED("IPv4 socket option IP_UNBLOCK_SOURCE"); return (IP_UNBLOCK_SOURCE); case LINUX_IP_BLOCK_SOURCE: LINUX_RATELIMIT_MSG_NOTTESTED("IPv4 socket option IP_BLOCK_SOURCE"); return (IP_BLOCK_SOURCE); case LINUX_IP_ADD_SOURCE_MEMBERSHIP: LINUX_RATELIMIT_MSG_NOTTESTED("IPv4 socket option IP_ADD_SOURCE_MEMBERSHIP"); return (IP_ADD_SOURCE_MEMBERSHIP); case LINUX_IP_DROP_SOURCE_MEMBERSHIP: LINUX_RATELIMIT_MSG_NOTTESTED("IPv4 socket option IP_DROP_SOURCE_MEMBERSHIP"); return (IP_DROP_SOURCE_MEMBERSHIP); case LINUX_MCAST_JOIN_GROUP: LINUX_RATELIMIT_MSG_NOTTESTED("IPv4 socket option IP_MCAST_JOIN_GROUP"); return (MCAST_JOIN_GROUP); case LINUX_MCAST_LEAVE_GROUP: LINUX_RATELIMIT_MSG_NOTTESTED("IPv4 socket option IP_MCAST_LEAVE_GROUP"); return (MCAST_LEAVE_GROUP); case LINUX_MCAST_JOIN_SOURCE_GROUP: LINUX_RATELIMIT_MSG_NOTTESTED("IPv4 socket option IP_MCAST_JOIN_SOURCE_GROUP"); return (MCAST_JOIN_SOURCE_GROUP); case LINUX_MCAST_LEAVE_SOURCE_GROUP: LINUX_RATELIMIT_MSG_NOTTESTED("IPv4 socket option IP_MCAST_LEAVE_SOURCE_GROUP"); return (MCAST_LEAVE_SOURCE_GROUP); case LINUX_IP_RECVORIGDSTADDR: return (IP_RECVORIGDSTADDR); /* known but not implemented sockopts */ case LINUX_IP_ROUTER_ALERT: LINUX_RATELIMIT_MSG_OPT1( "unsupported IPv4 socket option IP_ROUTER_ALERT (%d), you can not do user-space routing from linux programs", opt); return (-2); case LINUX_IP_PKTINFO: LINUX_RATELIMIT_MSG_OPT1( "unsupported IPv4 socket option IP_PKTINFO (%d), you can not get extended packet info for datagram sockets in linux programs", opt); return (-2); case LINUX_IP_PKTOPTIONS: LINUX_RATELIMIT_MSG_OPT1( "unsupported IPv4 socket option IP_PKTOPTIONS (%d)", opt); return (-2); case LINUX_IP_MTU_DISCOVER: LINUX_RATELIMIT_MSG_OPT1( "unsupported IPv4 socket option IP_MTU_DISCOVER (%d), your linux program can not control path-MTU discovery", opt); return (-2); case LINUX_IP_RECVERR: /* needed by steam */ LINUX_RATELIMIT_MSG_OPT1( "unsupported IPv4 socket option IP_RECVERR (%d), you can not get extended reliability info in linux programs", opt); return (-2); case LINUX_IP_MTU: LINUX_RATELIMIT_MSG_OPT1( "unsupported IPv4 socket option IP_MTU (%d), your linux program can not control the MTU on this socket", opt); return (-2); case LINUX_IP_XFRM_POLICY: LINUX_RATELIMIT_MSG_OPT1( "unsupported IPv4 socket option IP_XFRM_POLICY (%d)", opt); return (-2); case LINUX_IP_PASSSEC: /* needed by steam */ LINUX_RATELIMIT_MSG_OPT1( "unsupported IPv4 socket option IP_PASSSEC (%d), you can not get IPSEC related credential information associated with this socket in linux programs -- if you do not use IPSEC, you can ignore this", opt); return (-2); case LINUX_IP_TRANSPARENT: /* IP_BINDANY or more? */ LINUX_RATELIMIT_MSG_OPT1( "unsupported IPv4 socket option IP_TRANSPARENT (%d), you can not enable transparent proxying in linux programs -- note, IP_FREEBIND is supported, no idea if the FreeBSD IP_BINDANY is equivalent to the Linux IP_TRANSPARENT or not, any info is welcome", opt); return (-2); case LINUX_IP_NODEFRAG: LINUX_RATELIMIT_MSG_OPT1( "unsupported IPv4 socket option IP_NODEFRAG (%d)", opt); return (-2); case LINUX_IP_CHECKSUM: LINUX_RATELIMIT_MSG_OPT1( "unsupported IPv4 socket option IP_CHECKSUM (%d)", opt); return (-2); case LINUX_IP_BIND_ADDRESS_NO_PORT: LINUX_RATELIMIT_MSG_OPT1( "unsupported IPv4 socket option IP_BIND_ADDRESS_NO_PORT (%d)", opt); return (-2); case LINUX_IP_RECVFRAGSIZE: LINUX_RATELIMIT_MSG_OPT1( "unsupported IPv4 socket option IP_RECVFRAGSIZE (%d)", opt); return (-2); case LINUX_MCAST_MSFILTER: LINUX_RATELIMIT_MSG_OPT1( "unsupported IPv4 socket option IP_MCAST_MSFILTER (%d)", opt); return (-2); case LINUX_IP_MULTICAST_ALL: LINUX_RATELIMIT_MSG_OPT1( "unsupported IPv4 socket option IP_MULTICAST_ALL (%d), your linux program will not see all multicast groups joined by the entire system, only those the program joined itself on this socket", opt); return (-2); case LINUX_IP_UNICAST_IF: LINUX_RATELIMIT_MSG_OPT1( "unsupported IPv4 socket option IP_UNICAST_IF (%d)", opt); return (-2); /* unknown sockopts */ default: return (-1); } } static int linux_to_bsd_ip6_sockopt(int opt) { switch (opt) { /* known and translated sockopts */ case LINUX_IPV6_2292PKTINFO: LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_2292PKTINFO"); return (IPV6_2292PKTINFO); case LINUX_IPV6_2292HOPOPTS: LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_2292HOPOPTS"); return (IPV6_2292HOPOPTS); case LINUX_IPV6_2292DSTOPTS: LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_2292DSTOPTS"); return (IPV6_2292DSTOPTS); case LINUX_IPV6_2292RTHDR: LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_2292RTHDR"); return (IPV6_2292RTHDR); case LINUX_IPV6_2292PKTOPTIONS: LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_2292PKTOPTIONS"); return (IPV6_2292PKTOPTIONS); case LINUX_IPV6_CHECKSUM: LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_CHECKSUM"); return (IPV6_CHECKSUM); case LINUX_IPV6_2292HOPLIMIT: LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_2292HOPLIMIT"); return (IPV6_2292HOPLIMIT); case LINUX_IPV6_NEXTHOP: return (IPV6_NEXTHOP); case LINUX_IPV6_UNICAST_HOPS: return (IPV6_UNICAST_HOPS); case LINUX_IPV6_MULTICAST_IF: return (IPV6_MULTICAST_IF); case LINUX_IPV6_MULTICAST_HOPS: return (IPV6_MULTICAST_HOPS); case LINUX_IPV6_MULTICAST_LOOP: return (IPV6_MULTICAST_LOOP); case LINUX_IPV6_ADD_MEMBERSHIP: return (IPV6_JOIN_GROUP); case LINUX_IPV6_DROP_MEMBERSHIP: return (IPV6_LEAVE_GROUP); case LINUX_IPV6_V6ONLY: return (IPV6_V6ONLY); case LINUX_IPV6_IPSEC_POLICY: /* we have this option, but not documented in ip6(4) manpage */ LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_IPSEC_POLICY"); return (IPV6_IPSEC_POLICY); case LINUX_MCAST_JOIN_GROUP: LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_JOIN_GROUP"); return (IPV6_JOIN_GROUP); case LINUX_MCAST_LEAVE_GROUP: LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_LEAVE_GROUP"); return (IPV6_LEAVE_GROUP); case LINUX_IPV6_RECVPKTINFO: LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_RECVPKTINFO"); return (IPV6_RECVPKTINFO); case LINUX_IPV6_PKTINFO: LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_PKTINFO"); return (IPV6_PKTINFO); case LINUX_IPV6_RECVHOPLIMIT: LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_RECVHOPLIMIT"); return (IPV6_RECVHOPLIMIT); case LINUX_IPV6_HOPLIMIT: LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_HOPLIMIT"); return (IPV6_HOPLIMIT); case LINUX_IPV6_RECVHOPOPTS: LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_RECVHOPOPTS"); return (IPV6_RECVHOPOPTS); case LINUX_IPV6_HOPOPTS: LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_HOPOPTS"); return (IPV6_HOPOPTS); case LINUX_IPV6_RTHDRDSTOPTS: LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_RTHDRDSTOPTS"); return (IPV6_RTHDRDSTOPTS); case LINUX_IPV6_RECVRTHDR: LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_RECVRTHDR"); return (IPV6_RECVRTHDR); case LINUX_IPV6_RTHDR: LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_RTHDR"); return (IPV6_RTHDR); case LINUX_IPV6_RECVDSTOPTS: LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_RECVDSTOPTS"); return (IPV6_RECVDSTOPTS); case LINUX_IPV6_DSTOPTS: LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_DSTOPTS"); return (IPV6_DSTOPTS); case LINUX_IPV6_RECVPATHMTU: LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_RECVPATHMTU"); return (IPV6_RECVPATHMTU); case LINUX_IPV6_PATHMTU: LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_PATHMTU"); return (IPV6_PATHMTU); case LINUX_IPV6_DONTFRAG: return (IPV6_DONTFRAG); case LINUX_IPV6_AUTOFLOWLABEL: LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_AUTOFLOWLABEL"); return (IPV6_AUTOFLOWLABEL); case LINUX_IPV6_ORIGDSTADDR: LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_ORIGDSTADDR"); return (IPV6_ORIGDSTADDR); case LINUX_IPV6_FREEBIND: LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_FREEBIND"); return (IPV6_BINDANY); /* known but not implemented sockopts */ case LINUX_IPV6_ADDRFORM: LINUX_RATELIMIT_MSG_OPT1( "unsupported IPv6 socket option IPV6_ADDRFORM (%d), you linux program can not convert the socket to IPv4", opt); return (-2); case LINUX_IPV6_AUTHHDR: LINUX_RATELIMIT_MSG_OPT1( "unsupported IPv6 socket option IPV6_AUTHHDR (%d), your linux program can not get the authentication header info of IPv6 packets", opt); return (-2); case LINUX_IPV6_FLOWINFO: LINUX_RATELIMIT_MSG_OPT1( "unsupported IPv6 socket option IPV6_FLOWINFO (%d), your linux program can not get the flowid of IPv6 packets", opt); return (-2); case LINUX_IPV6_ROUTER_ALERT: LINUX_RATELIMIT_MSG_OPT1( "unsupported IPv6 socket option IPV6_ROUTER_ALERT (%d), you can not do user-space routing from linux programs", opt); return (-2); case LINUX_IPV6_MTU_DISCOVER: LINUX_RATELIMIT_MSG_OPT1( "unsupported IPv6 socket option IPV6_MTU_DISCOVER (%d), your linux program can not control path-MTU discovery", opt); return (-2); case LINUX_IPV6_MTU: LINUX_RATELIMIT_MSG_OPT1( "unsupported IPv6 socket option IPV6_MTU (%d), your linux program can not control the MTU on this socket", opt); return (-2); case LINUX_IPV6_JOIN_ANYCAST: LINUX_RATELIMIT_MSG_OPT1( "unsupported IPv6 socket option IPV6_JOIN_ANYCAST (%d)", opt); return (-2); case LINUX_IPV6_LEAVE_ANYCAST: LINUX_RATELIMIT_MSG_OPT1( "unsupported IPv6 socket option IPV6_LEAVE_ANYCAST (%d)", opt); return (-2); case LINUX_IPV6_MULTICAST_ALL: LINUX_RATELIMIT_MSG_OPT1( "unsupported IPv6 socket option IPV6_MULTICAST_ALL (%d)", opt); return (-2); case LINUX_IPV6_ROUTER_ALERT_ISOLATE: LINUX_RATELIMIT_MSG_OPT1( "unsupported IPv6 socket option IPV6_ROUTER_ALERT_ISOLATE (%d)", opt); return (-2); case LINUX_IPV6_FLOWLABEL_MGR: LINUX_RATELIMIT_MSG_OPT1( "unsupported IPv6 socket option IPV6_FLOWLABEL_MGR (%d)", opt); return (-2); case LINUX_IPV6_FLOWINFO_SEND: LINUX_RATELIMIT_MSG_OPT1( "unsupported IPv6 socket option IPV6_FLOWINFO_SEND (%d)", opt); return (-2); case LINUX_IPV6_XFRM_POLICY: LINUX_RATELIMIT_MSG_OPT1( "unsupported IPv6 socket option IPV6_XFRM_POLICY (%d)", opt); return (-2); case LINUX_IPV6_HDRINCL: LINUX_RATELIMIT_MSG_OPT1( "unsupported IPv6 socket option IPV6_HDRINCL (%d)", opt); return (-2); case LINUX_MCAST_BLOCK_SOURCE: LINUX_RATELIMIT_MSG_OPT1( "unsupported IPv6 socket option MCAST_BLOCK_SOURCE (%d), your linux program may see more multicast stuff than it wants", opt); return (-2); case LINUX_MCAST_UNBLOCK_SOURCE: LINUX_RATELIMIT_MSG_OPT1( "unsupported IPv6 socket option MCAST_UNBLOCK_SOURCE (%d), your linux program may not see all the multicast stuff it wants", opt); return (-2); case LINUX_MCAST_JOIN_SOURCE_GROUP: LINUX_RATELIMIT_MSG_OPT1( "unsupported IPv6 socket option MCAST_JOIN_SOURCE_GROUP (%d), your linux program is not able to join a multicast source group", opt); return (-2); case LINUX_MCAST_LEAVE_SOURCE_GROUP: LINUX_RATELIMIT_MSG_OPT1( "unsupported IPv6 socket option MCAST_LEAVE_SOURCE_GROUP (%d), your linux program is not able to leave a multicast source group -- but it was also not able to join one, so no issue", opt); return (-2); case LINUX_MCAST_MSFILTER: LINUX_RATELIMIT_MSG_OPT1( "unsupported IPv6 socket option MCAST_MSFILTER (%d), your linux program can not manipulate the multicast filter, it may see more multicast data than it wants to see", opt); return (-2); case LINUX_IPV6_ADDR_PREFERENCES: LINUX_RATELIMIT_MSG_OPT1( "unsupported IPv6 socket option IPV6_ADDR_PREFERENCES (%d)", opt); return (-2); case LINUX_IPV6_MINHOPCOUNT: LINUX_RATELIMIT_MSG_OPT1( "unsupported IPv6 socket option IPV6_MINHOPCOUNT (%d)", opt); return (-2); case LINUX_IPV6_TRANSPARENT: /* IP_BINDANY or more? */ LINUX_RATELIMIT_MSG_OPT1( "unsupported IPv6 socket option IPV6_TRANSPARENT (%d), you can not enable transparent proxying in linux programs -- note, IP_FREEBIND is supported, no idea if the FreeBSD IP_BINDANY is equivalent to the Linux IP_TRANSPARENT or not, any info is welcome", opt); return (-2); case LINUX_IPV6_UNICAST_IF: LINUX_RATELIMIT_MSG_OPT1( "unsupported IPv6 socket option IPV6_UNICAST_IF (%d)", opt); return (-2); case LINUX_IPV6_RECVFRAGSIZE: LINUX_RATELIMIT_MSG_OPT1( "unsupported IPv6 socket option IPV6_RECVFRAGSIZE (%d)", opt); return (-2); /* unknown sockopts */ default: return (-1); } } static int linux_to_bsd_so_sockopt(int opt) { switch (opt) { case LINUX_SO_DEBUG: return (SO_DEBUG); case LINUX_SO_REUSEADDR: return (SO_REUSEADDR); case LINUX_SO_TYPE: return (SO_TYPE); case LINUX_SO_ERROR: return (SO_ERROR); case LINUX_SO_DONTROUTE: return (SO_DONTROUTE); case LINUX_SO_BROADCAST: return (SO_BROADCAST); case LINUX_SO_SNDBUF: case LINUX_SO_SNDBUFFORCE: return (SO_SNDBUF); case LINUX_SO_RCVBUF: case LINUX_SO_RCVBUFFORCE: return (SO_RCVBUF); case LINUX_SO_KEEPALIVE: return (SO_KEEPALIVE); case LINUX_SO_OOBINLINE: return (SO_OOBINLINE); case LINUX_SO_LINGER: return (SO_LINGER); case LINUX_SO_REUSEPORT: return (SO_REUSEPORT_LB); case LINUX_SO_PASSCRED: return (LOCAL_CREDS_PERSISTENT); case LINUX_SO_PEERCRED: return (LOCAL_PEERCRED); case LINUX_SO_RCVLOWAT: return (SO_RCVLOWAT); case LINUX_SO_SNDLOWAT: return (SO_SNDLOWAT); case LINUX_SO_RCVTIMEO: return (SO_RCVTIMEO); case LINUX_SO_SNDTIMEO: return (SO_SNDTIMEO); case LINUX_SO_TIMESTAMPO: case LINUX_SO_TIMESTAMPN: return (SO_TIMESTAMP); case LINUX_SO_TIMESTAMPNSO: case LINUX_SO_TIMESTAMPNSN: return (SO_BINTIME); case LINUX_SO_ACCEPTCONN: return (SO_ACCEPTCONN); case LINUX_SO_PROTOCOL: return (SO_PROTOCOL); case LINUX_SO_DOMAIN: return (SO_DOMAIN); } return (-1); } static int linux_to_bsd_tcp_sockopt(int opt) { switch (opt) { case LINUX_TCP_NODELAY: return (TCP_NODELAY); case LINUX_TCP_MAXSEG: return (TCP_MAXSEG); case LINUX_TCP_CORK: return (TCP_NOPUSH); case LINUX_TCP_KEEPIDLE: return (TCP_KEEPIDLE); case LINUX_TCP_KEEPINTVL: return (TCP_KEEPINTVL); case LINUX_TCP_KEEPCNT: return (TCP_KEEPCNT); case LINUX_TCP_INFO: LINUX_RATELIMIT_MSG_OPT1( "unsupported TCP socket option TCP_INFO (%d)", opt); return (-2); case LINUX_TCP_MD5SIG: return (TCP_MD5SIG); } return (-1); } static int linux_to_bsd_msg_flags(int flags) { int ret_flags = 0; if (flags & LINUX_MSG_OOB) ret_flags |= MSG_OOB; if (flags & LINUX_MSG_PEEK) ret_flags |= MSG_PEEK; if (flags & LINUX_MSG_DONTROUTE) ret_flags |= MSG_DONTROUTE; if (flags & LINUX_MSG_CTRUNC) ret_flags |= MSG_CTRUNC; if (flags & LINUX_MSG_TRUNC) ret_flags |= MSG_TRUNC; if (flags & LINUX_MSG_DONTWAIT) ret_flags |= MSG_DONTWAIT; if (flags & LINUX_MSG_EOR) ret_flags |= MSG_EOR; if (flags & LINUX_MSG_WAITALL) ret_flags |= MSG_WAITALL; if (flags & LINUX_MSG_NOSIGNAL) ret_flags |= MSG_NOSIGNAL; if (flags & LINUX_MSG_PROXY) LINUX_RATELIMIT_MSG_OPT1("socket message flag MSG_PROXY (%d) not handled", LINUX_MSG_PROXY); if (flags & LINUX_MSG_FIN) LINUX_RATELIMIT_MSG_OPT1("socket message flag MSG_FIN (%d) not handled", LINUX_MSG_FIN); if (flags & LINUX_MSG_SYN) LINUX_RATELIMIT_MSG_OPT1("socket message flag MSG_SYN (%d) not handled", LINUX_MSG_SYN); if (flags & LINUX_MSG_CONFIRM) LINUX_RATELIMIT_MSG_OPT1("socket message flag MSG_CONFIRM (%d) not handled", LINUX_MSG_CONFIRM); if (flags & LINUX_MSG_RST) LINUX_RATELIMIT_MSG_OPT1("socket message flag MSG_RST (%d) not handled", LINUX_MSG_RST); if (flags & LINUX_MSG_ERRQUEUE) LINUX_RATELIMIT_MSG_OPT1("socket message flag MSG_ERRQUEUE (%d) not handled", LINUX_MSG_ERRQUEUE); return (ret_flags); } static int linux_to_bsd_cmsg_type(int cmsg_type) { switch (cmsg_type) { case LINUX_SCM_RIGHTS: return (SCM_RIGHTS); case LINUX_SCM_CREDENTIALS: return (SCM_CREDS); } return (-1); } static int bsd_to_linux_ip_cmsg_type(int cmsg_type) { switch (cmsg_type) { case IP_RECVORIGDSTADDR: return (LINUX_IP_RECVORIGDSTADDR); } return (-1); } static int bsd_to_linux_cmsg_type(struct proc *p, int cmsg_type, int cmsg_level) { struct linux_pemuldata *pem; if (cmsg_level == IPPROTO_IP) return (bsd_to_linux_ip_cmsg_type(cmsg_type)); if (cmsg_level != SOL_SOCKET) return (-1); pem = pem_find(p); switch (cmsg_type) { case SCM_RIGHTS: return (LINUX_SCM_RIGHTS); case SCM_CREDS: return (LINUX_SCM_CREDENTIALS); case SCM_CREDS2: return (LINUX_SCM_CREDENTIALS); case SCM_TIMESTAMP: return (pem->so_timestamp); case SCM_BINTIME: return (pem->so_timestampns); } return (-1); } static int linux_to_bsd_msghdr(struct msghdr *bhdr, const struct l_msghdr *lhdr) { if (lhdr->msg_controllen > INT_MAX) return (ENOBUFS); bhdr->msg_name = PTRIN(lhdr->msg_name); bhdr->msg_namelen = lhdr->msg_namelen; bhdr->msg_iov = PTRIN(lhdr->msg_iov); bhdr->msg_iovlen = lhdr->msg_iovlen; bhdr->msg_control = PTRIN(lhdr->msg_control); /* * msg_controllen is skipped since BSD and LINUX control messages * are potentially different sizes (e.g. the cred structure used * by SCM_CREDS is different between the two operating system). * * The caller can set it (if necessary) after converting all the * control messages. */ bhdr->msg_flags = linux_to_bsd_msg_flags(lhdr->msg_flags); return (0); } static int bsd_to_linux_msghdr(const struct msghdr *bhdr, struct l_msghdr *lhdr) { lhdr->msg_name = PTROUT(bhdr->msg_name); lhdr->msg_namelen = bhdr->msg_namelen; lhdr->msg_iov = PTROUT(bhdr->msg_iov); lhdr->msg_iovlen = bhdr->msg_iovlen; lhdr->msg_control = PTROUT(bhdr->msg_control); /* * msg_controllen is skipped since BSD and LINUX control messages * are potentially different sizes (e.g. the cred structure used * by SCM_CREDS is different between the two operating system). * * The caller can set it (if necessary) after converting all the * control messages. */ /* msg_flags skipped */ return (0); } static int linux_set_socket_flags(int lflags, int *flags) { if (lflags & ~(LINUX_SOCK_CLOEXEC | LINUX_SOCK_NONBLOCK)) return (EINVAL); if (lflags & LINUX_SOCK_NONBLOCK) *flags |= SOCK_NONBLOCK; if (lflags & LINUX_SOCK_CLOEXEC) *flags |= SOCK_CLOEXEC; return (0); } static int linux_copyout_sockaddr(const struct sockaddr *sa, void *uaddr, size_t len) { struct l_sockaddr *lsa; int error; error = bsd_to_linux_sockaddr(sa, &lsa, len); if (error != 0) return (error); error = copyout(lsa, uaddr, len); free(lsa, M_LINUX); return (error); } static int linux_sendit(struct thread *td, int s, struct msghdr *mp, int flags, struct mbuf *control, enum uio_seg segflg) { struct sockaddr *to; int error, len; if (mp->msg_name != NULL) { len = mp->msg_namelen; error = linux_to_bsd_sockaddr(mp->msg_name, &to, &len); if (error != 0) return (error); mp->msg_name = to; } else to = NULL; error = kern_sendit(td, s, mp, linux_to_bsd_msg_flags(flags), control, segflg); if (to) free(to, M_SONAME); return (error); } /* Return 0 if IP_HDRINCL is set for the given socket. */ static int linux_check_hdrincl(struct thread *td, int s) { int error, optval; socklen_t size_val; size_val = sizeof(optval); error = kern_getsockopt(td, s, IPPROTO_IP, IP_HDRINCL, &optval, UIO_SYSSPACE, &size_val); if (error != 0) return (error); return (optval == 0); } /* * Updated sendto() when IP_HDRINCL is set: * tweak endian-dependent fields in the IP packet. */ static int linux_sendto_hdrincl(struct thread *td, struct linux_sendto_args *linux_args) { /* * linux_ip_copysize defines how many bytes we should copy * from the beginning of the IP packet before we customize it for BSD. * It should include all the fields we modify (ip_len and ip_off). */ #define linux_ip_copysize 8 struct ip *packet; struct msghdr msg; struct iovec aiov[1]; int error; /* Check that the packet isn't too big or too small. */ if (linux_args->len < linux_ip_copysize || linux_args->len > IP_MAXPACKET) return (EINVAL); packet = (struct ip *)malloc(linux_args->len, M_LINUX, M_WAITOK); /* Make kernel copy of the packet to be sent */ if ((error = copyin(PTRIN(linux_args->msg), packet, linux_args->len))) goto goout; /* Convert fields from Linux to BSD raw IP socket format */ packet->ip_len = linux_args->len; packet->ip_off = ntohs(packet->ip_off); /* Prepare the msghdr and iovec structures describing the new packet */ msg.msg_name = PTRIN(linux_args->to); msg.msg_namelen = linux_args->tolen; msg.msg_iov = aiov; msg.msg_iovlen = 1; msg.msg_control = NULL; msg.msg_flags = 0; aiov[0].iov_base = (char *)packet; aiov[0].iov_len = linux_args->len; error = linux_sendit(td, linux_args->s, &msg, linux_args->flags, NULL, UIO_SYSSPACE); goout: free(packet, M_LINUX); return (error); } static const char *linux_netlink_names[] = { [LINUX_NETLINK_ROUTE] = "ROUTE", [LINUX_NETLINK_SOCK_DIAG] = "SOCK_DIAG", [LINUX_NETLINK_NFLOG] = "NFLOG", [LINUX_NETLINK_SELINUX] = "SELINUX", [LINUX_NETLINK_AUDIT] = "AUDIT", [LINUX_NETLINK_FIB_LOOKUP] = "FIB_LOOKUP", [LINUX_NETLINK_NETFILTER] = "NETFILTER", [LINUX_NETLINK_KOBJECT_UEVENT] = "KOBJECT_UEVENT", }; int linux_socket(struct thread *td, struct linux_socket_args *args) { int domain, retval_socket, type; type = args->type & LINUX_SOCK_TYPE_MASK; if (type < 0 || type > LINUX_SOCK_MAX) return (EINVAL); retval_socket = linux_set_socket_flags(args->type & ~LINUX_SOCK_TYPE_MASK, &type); if (retval_socket != 0) return (retval_socket); domain = linux_to_bsd_domain(args->domain); if (domain == -1) { /* Mask off SOCK_NONBLOCK / CLOEXEC for error messages. */ type = args->type & LINUX_SOCK_TYPE_MASK; if (args->domain == LINUX_AF_NETLINK && args->protocol == LINUX_NETLINK_AUDIT) { ; /* Do nothing, quietly. */ } else if (args->domain == LINUX_AF_NETLINK) { const char *nl_name; if (args->protocol >= 0 && args->protocol < nitems(linux_netlink_names)) nl_name = linux_netlink_names[args->protocol]; else nl_name = NULL; if (nl_name != NULL) linux_msg(curthread, "unsupported socket(AF_NETLINK, %d, " "NETLINK_%s)", type, nl_name); else linux_msg(curthread, "unsupported socket(AF_NETLINK, %d, %d)", type, args->protocol); } else { linux_msg(curthread, "unsupported socket domain %d, " "type %d, protocol %d", args->domain, type, args->protocol); } return (EAFNOSUPPORT); } retval_socket = kern_socket(td, domain, type, args->protocol); if (retval_socket) return (retval_socket); if (type == SOCK_RAW && (args->protocol == IPPROTO_RAW || args->protocol == 0) && domain == PF_INET) { /* It's a raw IP socket: set the IP_HDRINCL option. */ int hdrincl; hdrincl = 1; /* We ignore any error returned by kern_setsockopt() */ kern_setsockopt(td, td->td_retval[0], IPPROTO_IP, IP_HDRINCL, &hdrincl, UIO_SYSSPACE, sizeof(hdrincl)); } #ifdef INET6 /* * Linux AF_INET6 socket has IPV6_V6ONLY setsockopt set to 0 by default * and some apps depend on this. So, set V6ONLY to 0 for Linux apps. * For simplicity we do this unconditionally of the net.inet6.ip6.v6only * sysctl value. */ if (domain == PF_INET6) { int v6only; v6only = 0; /* We ignore any error returned by setsockopt() */ kern_setsockopt(td, td->td_retval[0], IPPROTO_IPV6, IPV6_V6ONLY, &v6only, UIO_SYSSPACE, sizeof(v6only)); } #endif return (retval_socket); } int linux_bind(struct thread *td, struct linux_bind_args *args) { struct sockaddr *sa; int error; error = linux_to_bsd_sockaddr(PTRIN(args->name), &sa, &args->namelen); if (error != 0) return (error); error = kern_bindat(td, AT_FDCWD, args->s, sa); free(sa, M_SONAME); /* XXX */ if (error == EADDRNOTAVAIL && args->namelen != sizeof(struct sockaddr_in)) return (EINVAL); return (error); } int linux_connect(struct thread *td, struct linux_connect_args *args) { struct socket *so; struct sockaddr *sa; struct file *fp; int error; error = linux_to_bsd_sockaddr(PTRIN(args->name), &sa, &args->namelen); if (error != 0) return (error); error = kern_connectat(td, AT_FDCWD, args->s, sa); free(sa, M_SONAME); if (error != EISCONN) return (error); /* * Linux doesn't return EISCONN the first time it occurs, * when on a non-blocking socket. Instead it returns the * error getsockopt(SOL_SOCKET, SO_ERROR) would return on BSD. */ error = getsock(td, args->s, &cap_connect_rights, &fp); if (error != 0) return (error); error = EISCONN; so = fp->f_data; if (atomic_load_int(&fp->f_flag) & FNONBLOCK) { SOCK_LOCK(so); if (so->so_emuldata == 0) error = so->so_error; so->so_emuldata = (void *)1; SOCK_UNLOCK(so); } fdrop(fp, td); return (error); } int linux_listen(struct thread *td, struct linux_listen_args *args) { return (kern_listen(td, args->s, args->backlog)); } static int linux_accept_common(struct thread *td, int s, l_uintptr_t addr, l_uintptr_t namelen, int flags) { - struct sockaddr *sa; + struct sockaddr_storage ss = { .ss_len = sizeof(ss) }; struct file *fp, *fp1; - int bflags, len; struct socket *so; - int error, error1; + socklen_t len; + int bflags, error, error1; bflags = 0; fp = NULL; - sa = NULL; error = linux_set_socket_flags(flags, &bflags); if (error != 0) return (error); - if (PTRIN(addr) == NULL) { - len = 0; - error = kern_accept4(td, s, NULL, NULL, bflags, NULL); - } else { + if (PTRIN(addr) != NULL) { error = copyin(PTRIN(namelen), &len, sizeof(len)); if (error != 0) return (error); if (len < 0) return (EINVAL); - error = kern_accept4(td, s, &sa, &len, bflags, &fp); - } + } else + len = 0; + + error = kern_accept4(td, s, (struct sockaddr *)&ss, bflags, &fp); /* * Translate errno values into ones used by Linux. */ if (error != 0) { /* * XXX. This is wrong, different sockaddr structures * have different sizes. */ switch (error) { case EFAULT: if (namelen != sizeof(struct sockaddr_in)) error = EINVAL; break; case EINVAL: error1 = getsock(td, s, &cap_accept_rights, &fp1); if (error1 != 0) { error = error1; break; } so = fp1->f_data; if (so->so_type == SOCK_DGRAM) error = EOPNOTSUPP; fdrop(fp1, td); break; } return (error); } - if (len != 0) { - error = linux_copyout_sockaddr(sa, PTRIN(addr), len); - if (error == 0) - error = copyout(&len, PTRIN(namelen), - sizeof(len)); + if (PTRIN(addr) != NULL) { + len = min(ss.ss_len, len); + error = linux_copyout_sockaddr((struct sockaddr *)&ss, + PTRIN(addr), len); + if (error == 0) { + len = ss.ss_len; + error = copyout(&len, PTRIN(namelen), sizeof(len)); + } if (error != 0) { fdclose(td, fp, td->td_retval[0]); td->td_retval[0] = 0; } } if (fp != NULL) fdrop(fp, td); - free(sa, M_SONAME); return (error); } int linux_accept(struct thread *td, struct linux_accept_args *args) { return (linux_accept_common(td, args->s, args->addr, args->namelen, 0)); } int linux_accept4(struct thread *td, struct linux_accept4_args *args) { return (linux_accept_common(td, args->s, args->addr, args->namelen, args->flags)); } int linux_getsockname(struct thread *td, struct linux_getsockname_args *args) { struct sockaddr *sa; int len, error; error = copyin(PTRIN(args->namelen), &len, sizeof(len)); if (error != 0) return (error); error = kern_getsockname(td, args->s, &sa, &len); if (error != 0) return (error); if (len != 0) error = linux_copyout_sockaddr(sa, PTRIN(args->addr), len); free(sa, M_SONAME); if (error == 0) error = copyout(&len, PTRIN(args->namelen), sizeof(len)); return (error); } int linux_getpeername(struct thread *td, struct linux_getpeername_args *args) { struct sockaddr *sa; int len, error; error = copyin(PTRIN(args->namelen), &len, sizeof(len)); if (error != 0) return (error); if (len < 0) return (EINVAL); error = kern_getpeername(td, args->s, &sa, &len); if (error != 0) return (error); if (len != 0) error = linux_copyout_sockaddr(sa, PTRIN(args->addr), len); free(sa, M_SONAME); if (error == 0) error = copyout(&len, PTRIN(args->namelen), sizeof(len)); return (error); } int linux_socketpair(struct thread *td, struct linux_socketpair_args *args) { int domain, error, sv[2], type; domain = linux_to_bsd_domain(args->domain); if (domain != PF_LOCAL) return (EAFNOSUPPORT); type = args->type & LINUX_SOCK_TYPE_MASK; if (type < 0 || type > LINUX_SOCK_MAX) return (EINVAL); error = linux_set_socket_flags(args->type & ~LINUX_SOCK_TYPE_MASK, &type); if (error != 0) return (error); if (args->protocol != 0 && args->protocol != PF_UNIX) { /* * Use of PF_UNIX as protocol argument is not right, * but Linux does it. * Do not map PF_UNIX as its Linux value is identical * to FreeBSD one. */ return (EPROTONOSUPPORT); } error = kern_socketpair(td, domain, type, 0, sv); if (error != 0) return (error); error = copyout(sv, PTRIN(args->rsv), 2 * sizeof(int)); if (error != 0) { (void)kern_close(td, sv[0]); (void)kern_close(td, sv[1]); } return (error); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) struct linux_send_args { register_t s; register_t msg; register_t len; register_t flags; }; static int linux_send(struct thread *td, struct linux_send_args *args) { struct sendto_args /* { int s; caddr_t buf; int len; int flags; caddr_t to; int tolen; } */ bsd_args; struct file *fp; int error; bsd_args.s = args->s; bsd_args.buf = (caddr_t)PTRIN(args->msg); bsd_args.len = args->len; bsd_args.flags = linux_to_bsd_msg_flags(args->flags); bsd_args.to = NULL; bsd_args.tolen = 0; error = sys_sendto(td, &bsd_args); if (error == ENOTCONN) { /* * Linux doesn't return ENOTCONN for non-blocking sockets. * Instead it returns the EAGAIN. */ error = getsock(td, args->s, &cap_send_rights, &fp); if (error == 0) { if (atomic_load_int(&fp->f_flag) & FNONBLOCK) error = EAGAIN; fdrop(fp, td); } } return (error); } struct linux_recv_args { register_t s; register_t msg; register_t len; register_t flags; }; static int linux_recv(struct thread *td, struct linux_recv_args *args) { struct recvfrom_args /* { int s; caddr_t buf; int len; int flags; struct sockaddr *from; socklen_t fromlenaddr; } */ bsd_args; bsd_args.s = args->s; bsd_args.buf = (caddr_t)PTRIN(args->msg); bsd_args.len = args->len; bsd_args.flags = linux_to_bsd_msg_flags(args->flags); bsd_args.from = NULL; bsd_args.fromlenaddr = 0; return (sys_recvfrom(td, &bsd_args)); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_sendto(struct thread *td, struct linux_sendto_args *args) { struct msghdr msg; struct iovec aiov; struct socket *so; struct file *fp; int error; if (linux_check_hdrincl(td, args->s) == 0) /* IP_HDRINCL set, tweak the packet before sending */ return (linux_sendto_hdrincl(td, args)); bzero(&msg, sizeof(msg)); error = getsock(td, args->s, &cap_send_connect_rights, &fp); if (error != 0) return (error); so = fp->f_data; if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0) { msg.msg_name = PTRIN(args->to); msg.msg_namelen = args->tolen; } msg.msg_iov = &aiov; msg.msg_iovlen = 1; aiov.iov_base = PTRIN(args->msg); aiov.iov_len = args->len; fdrop(fp, td); return (linux_sendit(td, args->s, &msg, args->flags, NULL, UIO_USERSPACE)); } int linux_recvfrom(struct thread *td, struct linux_recvfrom_args *args) { struct sockaddr *sa; struct msghdr msg; struct iovec aiov; int error, fromlen; if (PTRIN(args->fromlen) != NULL) { error = copyin(PTRIN(args->fromlen), &fromlen, sizeof(fromlen)); if (error != 0) return (error); if (fromlen < 0) return (EINVAL); fromlen = min(fromlen, SOCK_MAXADDRLEN); sa = malloc(fromlen, M_SONAME, M_WAITOK); } else { fromlen = 0; sa = NULL; } msg.msg_name = sa; msg.msg_namelen = fromlen; msg.msg_iov = &aiov; msg.msg_iovlen = 1; aiov.iov_base = PTRIN(args->buf); aiov.iov_len = args->len; msg.msg_control = 0; msg.msg_flags = linux_to_bsd_msg_flags(args->flags); error = kern_recvit(td, args->s, &msg, UIO_SYSSPACE, NULL); if (error != 0) goto out; /* * XXX. Seems that FreeBSD is different from Linux here. Linux * fill source address if underlying protocol provides it, while * FreeBSD fill it if underlying protocol is not connection-oriented. * So, kern_recvit() set msg.msg_namelen to 0 if protocol pr_flags * does not contains PR_ADDR flag. */ if (PTRIN(args->from) != NULL && msg.msg_namelen != 0) error = linux_copyout_sockaddr(sa, PTRIN(args->from), msg.msg_namelen); if (error == 0 && PTRIN(args->fromlen) != NULL) error = copyout(&msg.msg_namelen, PTRIN(args->fromlen), sizeof(msg.msg_namelen)); out: free(sa, M_SONAME); return (error); } static int linux_sendmsg_common(struct thread *td, l_int s, struct l_msghdr *msghdr, l_uint flags) { struct cmsghdr *cmsg; struct mbuf *control; struct msghdr msg; struct l_cmsghdr linux_cmsg; struct l_cmsghdr *ptr_cmsg; struct l_msghdr linux_msghdr; struct iovec *iov; socklen_t datalen; struct sockaddr *sa; struct socket *so; sa_family_t sa_family; struct file *fp; void *data; l_size_t len; l_size_t clen; int error; error = copyin(msghdr, &linux_msghdr, sizeof(linux_msghdr)); if (error != 0) return (error); /* * Some Linux applications (ping) define a non-NULL control data * pointer, but a msg_controllen of 0, which is not allowed in the * FreeBSD system call interface. NULL the msg_control pointer in * order to handle this case. This should be checked, but allows the * Linux ping to work. */ if (PTRIN(linux_msghdr.msg_control) != NULL && linux_msghdr.msg_controllen == 0) linux_msghdr.msg_control = PTROUT(NULL); error = linux_to_bsd_msghdr(&msg, &linux_msghdr); if (error != 0) return (error); #ifdef COMPAT_LINUX32 error = freebsd32_copyiniov(PTRIN(msg.msg_iov), msg.msg_iovlen, &iov, EMSGSIZE); #else error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); #endif if (error != 0) return (error); control = NULL; error = kern_getsockname(td, s, &sa, &datalen); if (error != 0) goto bad; sa_family = sa->sa_family; free(sa, M_SONAME); if (flags & LINUX_MSG_OOB) { error = EOPNOTSUPP; if (sa_family == AF_UNIX) goto bad; error = getsock(td, s, &cap_send_rights, &fp); if (error != 0) goto bad; so = fp->f_data; if (so->so_type != SOCK_STREAM) error = EOPNOTSUPP; fdrop(fp, td); if (error != 0) goto bad; } if (linux_msghdr.msg_controllen >= sizeof(struct l_cmsghdr)) { error = ENOBUFS; control = m_get(M_WAITOK, MT_CONTROL); MCLGET(control, M_WAITOK); data = mtod(control, void *); datalen = 0; ptr_cmsg = PTRIN(linux_msghdr.msg_control); clen = linux_msghdr.msg_controllen; do { error = copyin(ptr_cmsg, &linux_cmsg, sizeof(struct l_cmsghdr)); if (error != 0) goto bad; error = EINVAL; if (linux_cmsg.cmsg_len < sizeof(struct l_cmsghdr) || linux_cmsg.cmsg_len > clen) goto bad; if (datalen + CMSG_HDRSZ > MCLBYTES) goto bad; /* * Now we support only SCM_RIGHTS and SCM_CRED, * so return EINVAL in any other cmsg_type */ cmsg = data; cmsg->cmsg_type = linux_to_bsd_cmsg_type(linux_cmsg.cmsg_type); cmsg->cmsg_level = linux_to_bsd_sockopt_level(linux_cmsg.cmsg_level); if (cmsg->cmsg_type == -1 || cmsg->cmsg_level != SOL_SOCKET) { linux_msg(curthread, "unsupported sendmsg cmsg level %d type %d", linux_cmsg.cmsg_level, linux_cmsg.cmsg_type); goto bad; } /* * Some applications (e.g. pulseaudio) attempt to * send ancillary data even if the underlying protocol * doesn't support it which is not allowed in the * FreeBSD system call interface. */ if (sa_family != AF_UNIX) goto next; if (cmsg->cmsg_type == SCM_CREDS) { len = sizeof(struct cmsgcred); if (datalen + CMSG_SPACE(len) > MCLBYTES) goto bad; /* * The lower levels will fill in the structure */ memset(CMSG_DATA(data), 0, len); } else { len = linux_cmsg.cmsg_len - L_CMSG_HDRSZ; if (datalen + CMSG_SPACE(len) < datalen || datalen + CMSG_SPACE(len) > MCLBYTES) goto bad; error = copyin(LINUX_CMSG_DATA(ptr_cmsg), CMSG_DATA(data), len); if (error != 0) goto bad; } cmsg->cmsg_len = CMSG_LEN(len); data = (char *)data + CMSG_SPACE(len); datalen += CMSG_SPACE(len); next: if (clen <= LINUX_CMSG_ALIGN(linux_cmsg.cmsg_len)) break; clen -= LINUX_CMSG_ALIGN(linux_cmsg.cmsg_len); ptr_cmsg = (struct l_cmsghdr *)((char *)ptr_cmsg + LINUX_CMSG_ALIGN(linux_cmsg.cmsg_len)); } while(clen >= sizeof(struct l_cmsghdr)); control->m_len = datalen; if (datalen == 0) { m_freem(control); control = NULL; } } msg.msg_iov = iov; msg.msg_flags = 0; error = linux_sendit(td, s, &msg, flags, control, UIO_USERSPACE); control = NULL; bad: m_freem(control); free(iov, M_IOV); return (error); } int linux_sendmsg(struct thread *td, struct linux_sendmsg_args *args) { return (linux_sendmsg_common(td, args->s, PTRIN(args->msg), args->flags)); } int linux_sendmmsg(struct thread *td, struct linux_sendmmsg_args *args) { struct l_mmsghdr *msg; l_uint retval; int error, datagrams; if (args->vlen > UIO_MAXIOV) args->vlen = UIO_MAXIOV; msg = PTRIN(args->msg); datagrams = 0; while (datagrams < args->vlen) { error = linux_sendmsg_common(td, args->s, &msg->msg_hdr, args->flags); if (error != 0) break; retval = td->td_retval[0]; error = copyout(&retval, &msg->msg_len, sizeof(msg->msg_len)); if (error != 0) break; ++msg; ++datagrams; } if (error == 0) td->td_retval[0] = datagrams; return (error); } static int recvmsg_scm_rights(struct thread *td, l_uint flags, socklen_t *datalen, void **data, void **udata) { int i, fd, fds, *fdp; if (flags & LINUX_MSG_CMSG_CLOEXEC) { fds = *datalen / sizeof(int); fdp = *data; for (i = 0; i < fds; i++) { fd = *fdp++; (void)kern_fcntl(td, fd, F_SETFD, FD_CLOEXEC); } } return (0); } static int recvmsg_scm_creds(socklen_t *datalen, void **data, void **udata) { struct cmsgcred *cmcred; struct l_ucred lu; cmcred = *data; lu.pid = cmcred->cmcred_pid; lu.uid = cmcred->cmcred_uid; lu.gid = cmcred->cmcred_gid; memmove(*data, &lu, sizeof(lu)); *datalen = sizeof(lu); return (0); } _Static_assert(sizeof(struct cmsgcred) >= sizeof(struct l_ucred), "scm_creds sizeof l_ucred"); static int recvmsg_scm_creds2(socklen_t *datalen, void **data, void **udata) { struct sockcred2 *scred; struct l_ucred lu; scred = *data; lu.pid = scred->sc_pid; lu.uid = scred->sc_uid; lu.gid = scred->sc_gid; memmove(*data, &lu, sizeof(lu)); *datalen = sizeof(lu); return (0); } _Static_assert(sizeof(struct sockcred2) >= sizeof(struct l_ucred), "scm_creds2 sizeof l_ucred"); #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) static int recvmsg_scm_timestamp(l_int msg_type, socklen_t *datalen, void **data, void **udata) { l_sock_timeval ltv64; l_timeval ltv; struct timeval *tv; socklen_t len; void *buf; if (*datalen != sizeof(struct timeval)) return (EMSGSIZE); tv = *data; #if defined(COMPAT_LINUX32) if (msg_type == LINUX_SCM_TIMESTAMPO && (tv->tv_sec > INT_MAX || tv->tv_sec < INT_MIN)) return (EOVERFLOW); #endif if (msg_type == LINUX_SCM_TIMESTAMPN) len = sizeof(ltv64); else len = sizeof(ltv); buf = malloc(len, M_LINUX, M_WAITOK); if (msg_type == LINUX_SCM_TIMESTAMPN) { ltv64.tv_sec = tv->tv_sec; ltv64.tv_usec = tv->tv_usec; memmove(buf, <v64, len); } else { ltv.tv_sec = tv->tv_sec; ltv.tv_usec = tv->tv_usec; memmove(buf, <v, len); } *data = *udata = buf; *datalen = len; return (0); } #else _Static_assert(sizeof(struct timeval) == sizeof(l_timeval), "scm_timestamp sizeof l_timeval"); #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) static int recvmsg_scm_timestampns(l_int msg_type, socklen_t *datalen, void **data, void **udata) { struct l_timespec64 ts64; struct l_timespec ts32; struct timespec ts; socklen_t len; void *buf; if (msg_type == LINUX_SCM_TIMESTAMPNSO) len = sizeof(ts32); else len = sizeof(ts64); buf = malloc(len, M_LINUX, M_WAITOK); bintime2timespec(*data, &ts); if (msg_type == LINUX_SCM_TIMESTAMPNSO) { ts32.tv_sec = ts.tv_sec; ts32.tv_nsec = ts.tv_nsec; memmove(buf, &ts32, len); } else { ts64.tv_sec = ts.tv_sec; ts64.tv_nsec = ts.tv_nsec; memmove(buf, &ts64, len); } *data = *udata = buf; *datalen = len; return (0); } #else static int recvmsg_scm_timestampns(l_int msg_type, socklen_t *datalen, void **data, void **udata) { struct timespec ts; bintime2timespec(*data, &ts); memmove(*data, &ts, sizeof(struct timespec)); *datalen = sizeof(struct timespec); return (0); } _Static_assert(sizeof(struct bintime) >= sizeof(struct timespec), "scm_timestampns sizeof timespec"); #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ static int recvmsg_scm_sol_socket(struct thread *td, l_int msg_type, l_int lmsg_type, l_uint flags, socklen_t *datalen, void **data, void **udata) { int error; error = 0; switch (msg_type) { case SCM_RIGHTS: error = recvmsg_scm_rights(td, flags, datalen, data, udata); break; case SCM_CREDS: error = recvmsg_scm_creds(datalen, data, udata); break; case SCM_CREDS2: error = recvmsg_scm_creds2(datalen, data, udata); break; case SCM_TIMESTAMP: #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) error = recvmsg_scm_timestamp(lmsg_type, datalen, data, udata); #endif break; case SCM_BINTIME: error = recvmsg_scm_timestampns(lmsg_type, datalen, data, udata); break; } return (error); } static int recvmsg_scm_ip_origdstaddr(socklen_t *datalen, void **data, void **udata) { struct l_sockaddr *lsa; int error; error = bsd_to_linux_sockaddr(*data, &lsa, *datalen); if (error == 0) { *data = *udata = lsa; *datalen = sizeof(*lsa); } return (error); } static int recvmsg_scm_ipproto_ip(l_int msg_type, l_int lmsg_type, socklen_t *datalen, void **data, void **udata) { int error; error = 0; switch (msg_type) { case IP_ORIGDSTADDR: error = recvmsg_scm_ip_origdstaddr(datalen, data, udata); break; } return (error); } static int linux_recvmsg_common(struct thread *td, l_int s, struct l_msghdr *msghdr, l_uint flags, struct msghdr *msg) { struct proc *p = td->td_proc; struct cmsghdr *cm; struct l_cmsghdr *lcm = NULL; socklen_t datalen, maxlen, outlen; struct l_msghdr l_msghdr; struct iovec *iov, *uiov; struct mbuf *m, *control = NULL; struct mbuf **controlp; struct sockaddr *sa; caddr_t outbuf; void *data, *udata; int error, skiped; error = copyin(msghdr, &l_msghdr, sizeof(l_msghdr)); if (error != 0) return (error); /* * Pass user-supplied recvmsg() flags in msg_flags field, * following sys_recvmsg() convention. */ l_msghdr.msg_flags = flags; error = linux_to_bsd_msghdr(msg, &l_msghdr); if (error != 0) return (error); #ifdef COMPAT_LINUX32 error = freebsd32_copyiniov(PTRIN(msg->msg_iov), msg->msg_iovlen, &iov, EMSGSIZE); #else error = copyiniov(msg->msg_iov, msg->msg_iovlen, &iov, EMSGSIZE); #endif if (error != 0) return (error); if (msg->msg_name != NULL && msg->msg_namelen > 0) { msg->msg_namelen = min(msg->msg_namelen, SOCK_MAXADDRLEN); sa = malloc(msg->msg_namelen, M_SONAME, M_WAITOK); msg->msg_name = sa; } else { sa = NULL; msg->msg_name = NULL; } uiov = msg->msg_iov; msg->msg_iov = iov; controlp = (msg->msg_control != NULL) ? &control : NULL; error = kern_recvit(td, s, msg, UIO_SYSSPACE, controlp); msg->msg_iov = uiov; if (error != 0) goto bad; /* * Note that kern_recvit() updates msg->msg_namelen. */ if (msg->msg_name != NULL && msg->msg_namelen > 0) { msg->msg_name = PTRIN(l_msghdr.msg_name); error = linux_copyout_sockaddr(sa, msg->msg_name, msg->msg_namelen); if (error != 0) goto bad; } error = bsd_to_linux_msghdr(msg, &l_msghdr); if (error != 0) goto bad; skiped = outlen = 0; maxlen = l_msghdr.msg_controllen; if (control == NULL) goto out; lcm = malloc(L_CMSG_HDRSZ, M_LINUX, M_WAITOK | M_ZERO); msg->msg_control = mtod(control, struct cmsghdr *); msg->msg_controllen = control->m_len; outbuf = PTRIN(l_msghdr.msg_control); for (m = control; m != NULL; m = m->m_next) { cm = mtod(m, struct cmsghdr *); lcm->cmsg_type = bsd_to_linux_cmsg_type(p, cm->cmsg_type, cm->cmsg_level); lcm->cmsg_level = bsd_to_linux_sockopt_level(cm->cmsg_level); if (lcm->cmsg_type == -1 || cm->cmsg_level == -1) { LINUX_RATELIMIT_MSG_OPT2( "unsupported recvmsg cmsg level %d type %d", cm->cmsg_level, cm->cmsg_type); /* Skip unsupported messages */ skiped++; continue; } data = CMSG_DATA(cm); datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; udata = NULL; error = 0; switch (cm->cmsg_level) { case IPPROTO_IP: error = recvmsg_scm_ipproto_ip(cm->cmsg_type, lcm->cmsg_type, &datalen, &data, &udata); break; case SOL_SOCKET: error = recvmsg_scm_sol_socket(td, cm->cmsg_type, lcm->cmsg_type, flags, &datalen, &data, &udata); break; } /* The recvmsg_scm_ is responsible to free udata on error. */ if (error != 0) goto bad; if (outlen + LINUX_CMSG_LEN(datalen) > maxlen) { if (outlen == 0) { error = EMSGSIZE; goto err; } else { l_msghdr.msg_flags |= LINUX_MSG_CTRUNC; m_dispose_extcontrolm(control); free(udata, M_LINUX); goto out; } } lcm->cmsg_len = LINUX_CMSG_LEN(datalen); error = copyout(lcm, outbuf, L_CMSG_HDRSZ); if (error == 0) { error = copyout(data, LINUX_CMSG_DATA(outbuf), datalen); if (error == 0) { outbuf += LINUX_CMSG_SPACE(datalen); outlen += LINUX_CMSG_SPACE(datalen); } } err: free(udata, M_LINUX); if (error != 0) goto bad; } if (outlen == 0 && skiped > 0) { error = EINVAL; goto bad; } out: l_msghdr.msg_controllen = outlen; error = copyout(&l_msghdr, msghdr, sizeof(l_msghdr)); bad: if (control != NULL) { if (error != 0) m_dispose_extcontrolm(control); m_freem(control); } free(iov, M_IOV); free(lcm, M_LINUX); free(sa, M_SONAME); return (error); } int linux_recvmsg(struct thread *td, struct linux_recvmsg_args *args) { struct msghdr bsd_msg; struct file *fp; int error; error = getsock(td, args->s, &cap_recv_rights, &fp); if (error != 0) return (error); fdrop(fp, td); return (linux_recvmsg_common(td, args->s, PTRIN(args->msg), args->flags, &bsd_msg)); } static int linux_recvmmsg_common(struct thread *td, l_int s, struct l_mmsghdr *msg, l_uint vlen, l_uint flags, struct timespec *tts) { struct msghdr bsd_msg; struct timespec ts; struct file *fp; l_uint retval; int error, datagrams; error = getsock(td, s, &cap_recv_rights, &fp); if (error != 0) return (error); datagrams = 0; while (datagrams < vlen) { error = linux_recvmsg_common(td, s, &msg->msg_hdr, flags & ~LINUX_MSG_WAITFORONE, &bsd_msg); if (error != 0) break; retval = td->td_retval[0]; error = copyout(&retval, &msg->msg_len, sizeof(msg->msg_len)); if (error != 0) break; ++msg; ++datagrams; /* * MSG_WAITFORONE turns on MSG_DONTWAIT after one packet. */ if (flags & LINUX_MSG_WAITFORONE) flags |= LINUX_MSG_DONTWAIT; /* * See BUGS section of recvmmsg(2). */ if (tts) { getnanotime(&ts); timespecsub(&ts, tts, &ts); if (!timespecisset(&ts) || ts.tv_sec > 0) break; } /* Out of band data, return right away. */ if (bsd_msg.msg_flags & MSG_OOB) break; } if (error == 0) td->td_retval[0] = datagrams; fdrop(fp, td); return (error); } int linux_recvmmsg(struct thread *td, struct linux_recvmmsg_args *args) { struct timespec ts, tts, *ptts; int error; if (args->timeout) { error = linux_get_timespec(&ts, args->timeout); if (error != 0) return (error); getnanotime(&tts); timespecadd(&tts, &ts, &tts); ptts = &tts; } else ptts = NULL; return (linux_recvmmsg_common(td, args->s, PTRIN(args->msg), args->vlen, args->flags, ptts)); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_recvmmsg_time64(struct thread *td, struct linux_recvmmsg_time64_args *args) { struct timespec ts, tts, *ptts; int error; if (args->timeout) { error = linux_get_timespec64(&ts, args->timeout); if (error != 0) return (error); getnanotime(&tts); timespecadd(&tts, &ts, &tts); ptts = &tts; } else ptts = NULL; return (linux_recvmmsg_common(td, args->s, PTRIN(args->msg), args->vlen, args->flags, ptts)); } #endif int linux_shutdown(struct thread *td, struct linux_shutdown_args *args) { return (kern_shutdown(td, args->s, args->how)); } int linux_setsockopt(struct thread *td, struct linux_setsockopt_args *args) { struct proc *p = td->td_proc; struct linux_pemuldata *pem; l_timeval linux_tv; struct sockaddr *sa; struct timeval tv; socklen_t len; int error, level, name, val; level = linux_to_bsd_sockopt_level(args->level); switch (level) { case SOL_SOCKET: name = linux_to_bsd_so_sockopt(args->optname); switch (name) { case LOCAL_CREDS_PERSISTENT: level = SOL_LOCAL; break; case SO_RCVTIMEO: /* FALLTHROUGH */ case SO_SNDTIMEO: error = copyin(PTRIN(args->optval), &linux_tv, sizeof(linux_tv)); if (error != 0) return (error); tv.tv_sec = linux_tv.tv_sec; tv.tv_usec = linux_tv.tv_usec; return (kern_setsockopt(td, args->s, level, name, &tv, UIO_SYSSPACE, sizeof(tv))); /* NOTREACHED */ case SO_TIMESTAMP: /* overwrite SO_BINTIME */ val = 0; error = kern_setsockopt(td, args->s, level, SO_BINTIME, &val, UIO_SYSSPACE, sizeof(val)); if (error != 0) return (error); pem = pem_find(p); pem->so_timestamp = args->optname; break; case SO_BINTIME: /* overwrite SO_TIMESTAMP */ val = 0; error = kern_setsockopt(td, args->s, level, SO_TIMESTAMP, &val, UIO_SYSSPACE, sizeof(val)); if (error != 0) return (error); pem = pem_find(p); pem->so_timestampns = args->optname; break; default: break; } break; case IPPROTO_IP: if (args->optname == LINUX_IP_RECVERR && linux_ignore_ip_recverr) { /* * XXX: This is a hack to unbreak DNS resolution * with glibc 2.30 and above. */ return (0); } name = linux_to_bsd_ip_sockopt(args->optname); break; case IPPROTO_IPV6: name = linux_to_bsd_ip6_sockopt(args->optname); break; case IPPROTO_TCP: name = linux_to_bsd_tcp_sockopt(args->optname); break; case SOL_NETLINK: level = SOL_SOCKET; name = args->optname; break; default: name = -1; break; } if (name < 0) { if (name == -1) linux_msg(curthread, "unsupported setsockopt level %d optname %d", args->level, args->optname); return (ENOPROTOOPT); } if (name == IPV6_NEXTHOP) { len = args->optlen; error = linux_to_bsd_sockaddr(PTRIN(args->optval), &sa, &len); if (error != 0) return (error); error = kern_setsockopt(td, args->s, level, name, sa, UIO_SYSSPACE, len); free(sa, M_SONAME); } else { error = kern_setsockopt(td, args->s, level, name, PTRIN(args->optval), UIO_USERSPACE, args->optlen); } return (error); } static int linux_sockopt_copyout(struct thread *td, void *val, socklen_t len, struct linux_getsockopt_args *args) { int error; error = copyout(val, PTRIN(args->optval), len); if (error == 0) error = copyout(&len, PTRIN(args->optlen), sizeof(len)); return (error); } static int linux_getsockopt_so_peergroups(struct thread *td, struct linux_getsockopt_args *args) { struct xucred xu; socklen_t xulen, len; int error, i; xulen = sizeof(xu); error = kern_getsockopt(td, args->s, 0, LOCAL_PEERCRED, &xu, UIO_SYSSPACE, &xulen); if (error != 0) return (error); len = xu.cr_ngroups * sizeof(l_gid_t); if (args->optlen < len) { error = copyout(&len, PTRIN(args->optlen), sizeof(len)); if (error == 0) error = ERANGE; return (error); } /* * "- 1" to skip the primary group. */ for (i = 0; i < xu.cr_ngroups - 1; i++) { error = copyout(xu.cr_groups + i + 1, (void *)(args->optval + i * sizeof(l_gid_t)), sizeof(l_gid_t)); if (error != 0) return (error); } error = copyout(&len, PTRIN(args->optlen), sizeof(len)); return (error); } static int linux_getsockopt_so_peersec(struct thread *td, struct linux_getsockopt_args *args) { socklen_t len; int error; len = sizeof(SECURITY_CONTEXT_STRING); if (args->optlen < len) { error = copyout(&len, PTRIN(args->optlen), sizeof(len)); if (error == 0) error = ERANGE; return (error); } return (linux_sockopt_copyout(td, SECURITY_CONTEXT_STRING, len, args)); } static int linux_getsockopt_so_linger(struct thread *td, struct linux_getsockopt_args *args) { struct linger ling; socklen_t len; int error; len = sizeof(ling); error = kern_getsockopt(td, args->s, SOL_SOCKET, SO_LINGER, &ling, UIO_SYSSPACE, &len); if (error != 0) return (error); ling.l_onoff = ((ling.l_onoff & SO_LINGER) != 0); return (linux_sockopt_copyout(td, &ling, len, args)); } int linux_getsockopt(struct thread *td, struct linux_getsockopt_args *args) { l_timeval linux_tv; struct timeval tv; socklen_t tv_len, xulen, len; struct sockaddr *sa; struct xucred xu; struct l_ucred lxu; int error, level, name, newval; level = linux_to_bsd_sockopt_level(args->level); switch (level) { case SOL_SOCKET: switch (args->optname) { case LINUX_SO_PEERGROUPS: return (linux_getsockopt_so_peergroups(td, args)); case LINUX_SO_PEERSEC: return (linux_getsockopt_so_peersec(td, args)); default: break; } name = linux_to_bsd_so_sockopt(args->optname); switch (name) { case LOCAL_CREDS_PERSISTENT: level = SOL_LOCAL; break; case SO_RCVTIMEO: /* FALLTHROUGH */ case SO_SNDTIMEO: tv_len = sizeof(tv); error = kern_getsockopt(td, args->s, level, name, &tv, UIO_SYSSPACE, &tv_len); if (error != 0) return (error); linux_tv.tv_sec = tv.tv_sec; linux_tv.tv_usec = tv.tv_usec; return (linux_sockopt_copyout(td, &linux_tv, sizeof(linux_tv), args)); /* NOTREACHED */ case LOCAL_PEERCRED: if (args->optlen < sizeof(lxu)) return (EINVAL); /* * LOCAL_PEERCRED is not served at the SOL_SOCKET level, * but by the Unix socket's level 0. */ level = 0; xulen = sizeof(xu); error = kern_getsockopt(td, args->s, level, name, &xu, UIO_SYSSPACE, &xulen); if (error != 0) return (error); lxu.pid = xu.cr_pid; lxu.uid = xu.cr_uid; lxu.gid = xu.cr_gid; return (linux_sockopt_copyout(td, &lxu, sizeof(lxu), args)); /* NOTREACHED */ case SO_ERROR: len = sizeof(newval); error = kern_getsockopt(td, args->s, level, name, &newval, UIO_SYSSPACE, &len); if (error != 0) return (error); newval = -bsd_to_linux_errno(newval); return (linux_sockopt_copyout(td, &newval, len, args)); /* NOTREACHED */ case SO_DOMAIN: len = sizeof(newval); error = kern_getsockopt(td, args->s, level, name, &newval, UIO_SYSSPACE, &len); if (error != 0) return (error); newval = bsd_to_linux_domain(newval); if (newval == -1) return (ENOPROTOOPT); return (linux_sockopt_copyout(td, &newval, len, args)); /* NOTREACHED */ case SO_LINGER: return (linux_getsockopt_so_linger(td, args)); /* NOTREACHED */ default: break; } break; case IPPROTO_IP: name = linux_to_bsd_ip_sockopt(args->optname); break; case IPPROTO_IPV6: name = linux_to_bsd_ip6_sockopt(args->optname); break; case IPPROTO_TCP: name = linux_to_bsd_tcp_sockopt(args->optname); break; default: name = -1; break; } if (name < 0) { if (name == -1) linux_msg(curthread, "unsupported getsockopt level %d optname %d", args->level, args->optname); return (EINVAL); } if (name == IPV6_NEXTHOP) { error = copyin(PTRIN(args->optlen), &len, sizeof(len)); if (error != 0) return (error); sa = malloc(len, M_SONAME, M_WAITOK); error = kern_getsockopt(td, args->s, level, name, sa, UIO_SYSSPACE, &len); if (error != 0) goto out; error = linux_copyout_sockaddr(sa, PTRIN(args->optval), len); if (error == 0) error = copyout(&len, PTRIN(args->optlen), sizeof(len)); out: free(sa, M_SONAME); } else { if (args->optval) { error = copyin(PTRIN(args->optlen), &len, sizeof(len)); if (error != 0) return (error); } error = kern_getsockopt(td, args->s, level, name, PTRIN(args->optval), UIO_USERSPACE, &len); if (error == 0) error = copyout(&len, PTRIN(args->optlen), sizeof(len)); } return (error); } /* * Based on sendfile_getsock from kern_sendfile.c * Determines whether an fd is a stream socket that can be used * with FreeBSD sendfile. */ static bool is_sendfile(struct file *fp, struct file *ofp) { struct socket *so; /* * FreeBSD sendfile() system call sends a regular file or * shared memory object out a stream socket. */ if ((fp->f_type != DTYPE_SHM && fp->f_type != DTYPE_VNODE) || (fp->f_type == DTYPE_VNODE && (fp->f_vnode == NULL || fp->f_vnode->v_type != VREG))) return (false); /* * The socket must be a stream socket and connected. */ if (ofp->f_type != DTYPE_SOCKET) return (false); so = ofp->f_data; if (so->so_type != SOCK_STREAM) return (false); /* * SCTP one-to-one style sockets currently don't work with * sendfile(). */ if (so->so_proto->pr_protocol == IPPROTO_SCTP) return (false); return (!SOLISTENING(so)); } static bool is_regular_file(struct file *fp) { return (fp->f_type == DTYPE_VNODE && fp->f_vnode != NULL && fp->f_vnode->v_type == VREG); } static int sendfile_fallback(struct thread *td, struct file *fp, l_int out, off_t *offset, l_size_t count, off_t *sbytes) { off_t current_offset, out_offset, to_send; l_size_t bytes_sent, n_read; struct file *ofp; struct iovec aiov; struct uio auio; bool seekable; size_t bufsz; void *buf; int flags, error; if (offset == NULL) { if ((error = fo_seek(fp, 0, SEEK_CUR, td)) != 0) return (error); current_offset = td->td_uretoff.tdu_off; } else { if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) return (ESPIPE); current_offset = *offset; } error = fget_write(td, out, &cap_pwrite_rights, &ofp); if (error != 0) return (error); seekable = (ofp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0; if (seekable) { if ((error = fo_seek(ofp, 0, SEEK_CUR, td)) != 0) goto drop; out_offset = td->td_uretoff.tdu_off; } else out_offset = 0; flags = FOF_OFFSET | FOF_NOUPDATE; bufsz = min(count, MAXPHYS); buf = malloc(bufsz, M_LINUX, M_WAITOK); bytes_sent = 0; while (bytes_sent < count) { to_send = min(count - bytes_sent, bufsz); aiov.iov_base = buf; aiov.iov_len = bufsz; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_segflg = UIO_SYSSPACE; auio.uio_td = td; auio.uio_rw = UIO_READ; auio.uio_offset = current_offset; auio.uio_resid = to_send; error = fo_read(fp, &auio, fp->f_cred, flags, td); if (error != 0) break; n_read = to_send - auio.uio_resid; if (n_read == 0) break; aiov.iov_base = buf; aiov.iov_len = bufsz; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_segflg = UIO_SYSSPACE; auio.uio_td = td; auio.uio_rw = UIO_WRITE; auio.uio_offset = (seekable) ? out_offset : 0; auio.uio_resid = n_read; error = fo_write(ofp, &auio, ofp->f_cred, flags, td); if (error != 0) break; bytes_sent += n_read; current_offset += n_read; out_offset += n_read; } free(buf, M_LINUX); if (error == 0) { *sbytes = bytes_sent; if (offset != NULL) *offset = current_offset; else error = fo_seek(fp, current_offset, SEEK_SET, td); } if (error == 0 && seekable) error = fo_seek(ofp, out_offset, SEEK_SET, td); drop: fdrop(ofp, td); return (error); } static int sendfile_sendfile(struct thread *td, struct file *fp, l_int out, off_t *offset, l_size_t count, off_t *sbytes) { off_t current_offset; int error; if (offset == NULL) { if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) return (ESPIPE); if ((error = fo_seek(fp, 0, SEEK_CUR, td)) != 0) return (error); current_offset = td->td_uretoff.tdu_off; } else current_offset = *offset; error = fo_sendfile(fp, out, NULL, NULL, current_offset, count, sbytes, 0, td); if (error == 0) { current_offset += *sbytes; if (offset != NULL) *offset = current_offset; else error = fo_seek(fp, current_offset, SEEK_SET, td); } return (error); } static int linux_sendfile_common(struct thread *td, l_int out, l_int in, off_t *offset, l_size_t count) { struct file *fp, *ofp; off_t sbytes; int error; /* Linux cannot have 0 count. */ if (count <= 0 || (offset != NULL && *offset < 0)) return (EINVAL); AUDIT_ARG_FD(in); error = fget_read(td, in, &cap_pread_rights, &fp); if (error != 0) return (error); if ((fp->f_type != DTYPE_SHM && fp->f_type != DTYPE_VNODE) || (fp->f_type == DTYPE_VNODE && (fp->f_vnode == NULL || fp->f_vnode->v_type != VREG))) { error = EINVAL; goto drop; } error = fget_unlocked(td, out, &cap_no_rights, &ofp); if (error != 0) goto drop; if (is_regular_file(fp) && is_regular_file(ofp)) { error = kern_copy_file_range(td, in, offset, out, NULL, count, 0); } else { sbytes = 0; if (is_sendfile(fp, ofp)) error = sendfile_sendfile(td, fp, out, offset, count, &sbytes); else error = sendfile_fallback(td, fp, out, offset, count, &sbytes); if (error == ENOBUFS && (ofp->f_flag & FNONBLOCK) != 0) error = EAGAIN; if (error == 0) td->td_retval[0] = sbytes; } fdrop(ofp, td); drop: fdrop(fp, td); return (error); } int linux_sendfile(struct thread *td, struct linux_sendfile_args *arg) { /* * Differences between FreeBSD and Linux sendfile: * - Linux doesn't send anything when count is 0 (FreeBSD uses 0 to * mean send the whole file). * - Linux can send to any fd whereas FreeBSD only supports sockets. * We therefore use FreeBSD sendfile where possible for performance, * but fall back on a manual copy (sendfile_fallback). * - Linux doesn't have an equivalent for FreeBSD's flags and sf_hdtr. * - Linux takes an offset pointer and updates it to the read location. * FreeBSD takes in an offset and a 'bytes read' parameter which is * only filled if it isn't NULL. We use this parameter to update the * offset pointer if it exists. * - Linux sendfile returns bytes read on success while FreeBSD * returns 0. We use the 'bytes read' parameter to get this value. */ off_t offset64; l_off_t offset; int error; if (arg->offset != NULL) { error = copyin(arg->offset, &offset, sizeof(offset)); if (error != 0) return (error); offset64 = offset; } error = linux_sendfile_common(td, arg->out, arg->in, arg->offset != NULL ? &offset64 : NULL, arg->count); if (error == 0 && arg->offset != NULL) { #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) if (offset64 > INT32_MAX) return (EOVERFLOW); #endif offset = (l_off_t)offset64; error = copyout(&offset, arg->offset, sizeof(offset)); } return (error); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_sendfile64(struct thread *td, struct linux_sendfile64_args *arg) { off_t offset; int error; if (arg->offset != NULL) { error = copyin(arg->offset, &offset, sizeof(offset)); if (error != 0) return (error); } error = linux_sendfile_common(td, arg->out, arg->in, arg->offset != NULL ? &offset : NULL, arg->count); if (error == 0 && arg->offset != NULL) error = copyout(&offset, arg->offset, sizeof(offset)); return (error); } /* Argument list sizes for linux_socketcall */ static const unsigned char lxs_args_cnt[] = { 0 /* unused*/, 3 /* socket */, 3 /* bind */, 3 /* connect */, 2 /* listen */, 3 /* accept */, 3 /* getsockname */, 3 /* getpeername */, 4 /* socketpair */, 4 /* send */, 4 /* recv */, 6 /* sendto */, 6 /* recvfrom */, 2 /* shutdown */, 5 /* setsockopt */, 5 /* getsockopt */, 3 /* sendmsg */, 3 /* recvmsg */, 4 /* accept4 */, 5 /* recvmmsg */, 4 /* sendmmsg */, 4 /* sendfile */ }; #define LINUX_ARGS_CNT (nitems(lxs_args_cnt) - 1) #define LINUX_ARG_SIZE(x) (lxs_args_cnt[x] * sizeof(l_ulong)) int linux_socketcall(struct thread *td, struct linux_socketcall_args *args) { l_ulong a[6]; #if defined(__amd64__) && defined(COMPAT_LINUX32) register_t l_args[6]; #endif void *arg; int error; if (args->what < LINUX_SOCKET || args->what > LINUX_ARGS_CNT) return (EINVAL); error = copyin(PTRIN(args->args), a, LINUX_ARG_SIZE(args->what)); if (error != 0) return (error); #if defined(__amd64__) && defined(COMPAT_LINUX32) for (int i = 0; i < lxs_args_cnt[args->what]; ++i) l_args[i] = a[i]; arg = l_args; #else arg = a; #endif switch (args->what) { case LINUX_SOCKET: return (linux_socket(td, arg)); case LINUX_BIND: return (linux_bind(td, arg)); case LINUX_CONNECT: return (linux_connect(td, arg)); case LINUX_LISTEN: return (linux_listen(td, arg)); case LINUX_ACCEPT: return (linux_accept(td, arg)); case LINUX_GETSOCKNAME: return (linux_getsockname(td, arg)); case LINUX_GETPEERNAME: return (linux_getpeername(td, arg)); case LINUX_SOCKETPAIR: return (linux_socketpair(td, arg)); case LINUX_SEND: return (linux_send(td, arg)); case LINUX_RECV: return (linux_recv(td, arg)); case LINUX_SENDTO: return (linux_sendto(td, arg)); case LINUX_RECVFROM: return (linux_recvfrom(td, arg)); case LINUX_SHUTDOWN: return (linux_shutdown(td, arg)); case LINUX_SETSOCKOPT: return (linux_setsockopt(td, arg)); case LINUX_GETSOCKOPT: return (linux_getsockopt(td, arg)); case LINUX_SENDMSG: return (linux_sendmsg(td, arg)); case LINUX_RECVMSG: return (linux_recvmsg(td, arg)); case LINUX_ACCEPT4: return (linux_accept4(td, arg)); case LINUX_RECVMMSG: return (linux_recvmmsg(td, arg)); case LINUX_SENDMMSG: return (linux_sendmmsg(td, arg)); case LINUX_SENDFILE: return (linux_sendfile(td, arg)); } linux_msg(td, "socket type %d not implemented", args->what); return (ENOSYS); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ diff --git a/sys/dev/cxgbe/iw_cxgbe/cm.c b/sys/dev/cxgbe/iw_cxgbe/cm.c index 77158eb855df..84d6df3f2832 100644 --- a/sys/dev/cxgbe/iw_cxgbe/cm.c +++ b/sys/dev/cxgbe/iw_cxgbe/cm.c @@ -1,3068 +1,3065 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2009-2013, 2016 Chelsio, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include "opt_inet.h" #ifdef TCP_OFFLOAD #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct sge_iq; struct rss_header; struct cpl_set_tcb_rpl; #include #include "offload.h" #include "tom/t4_tom.h" #define TOEPCB(so) ((struct toepcb *)(sototcpcb((so))->t_toe)) #include "iw_cxgbe.h" #include #include #include #include #include static spinlock_t req_lock; static TAILQ_HEAD(c4iw_ep_list, c4iw_ep_common) req_list; static struct work_struct c4iw_task; static struct workqueue_struct *c4iw_taskq; static LIST_HEAD(err_cqe_list); static spinlock_t err_cqe_lock; static LIST_HEAD(listen_port_list); static DEFINE_MUTEX(listen_port_mutex); static void process_req(struct work_struct *ctx); static void start_ep_timer(struct c4iw_ep *ep); static int stop_ep_timer(struct c4iw_ep *ep); static int set_tcpinfo(struct c4iw_ep *ep); static void process_timeout(struct c4iw_ep *ep); static void process_err_cqes(void); static void *alloc_ep(int size, gfp_t flags); static void close_socket(struct socket *so); static int send_mpa_req(struct c4iw_ep *ep); static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen); static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen); static void close_complete_upcall(struct c4iw_ep *ep, int status); static int send_abort(struct c4iw_ep *ep); static void peer_close_upcall(struct c4iw_ep *ep); static void peer_abort_upcall(struct c4iw_ep *ep); static void connect_reply_upcall(struct c4iw_ep *ep, int status); static int connect_request_upcall(struct c4iw_ep *ep); static void established_upcall(struct c4iw_ep *ep); static int process_mpa_reply(struct c4iw_ep *ep); static int process_mpa_request(struct c4iw_ep *ep); static void process_peer_close(struct c4iw_ep *ep); static void process_conn_error(struct c4iw_ep *ep); static void process_close_complete(struct c4iw_ep *ep); static void ep_timeout(unsigned long arg); static void setiwsockopt(struct socket *so); static void init_iwarp_socket(struct socket *so, void *arg); static void uninit_iwarp_socket(struct socket *so); static void process_data(struct c4iw_ep *ep); static void process_connected(struct c4iw_ep *ep); static int c4iw_so_upcall(struct socket *so, void *arg, int waitflag); static void process_socket_event(struct c4iw_ep *ep); static void release_ep_resources(struct c4iw_ep *ep); static int process_terminate(struct c4iw_ep *ep); static int terminate(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m); static int add_ep_to_req_list(struct c4iw_ep *ep, int ep_events); static struct listen_port_info * add_ep_to_listenlist(struct c4iw_listen_ep *lep); static int rem_ep_from_listenlist(struct c4iw_listen_ep *lep); static struct c4iw_listen_ep * find_real_listen_ep(struct c4iw_listen_ep *master_lep, struct socket *so); static int get_ifnet_from_raddr(struct sockaddr_storage *raddr, if_t *ifp); static void process_newconn(struct c4iw_listen_ep *master_lep, struct socket *new_so); #define START_EP_TIMER(ep) \ do { \ CTR3(KTR_IW_CXGBE, "start_ep_timer (%s:%d) ep %p", \ __func__, __LINE__, (ep)); \ start_ep_timer(ep); \ } while (0) #define STOP_EP_TIMER(ep) \ ({ \ CTR3(KTR_IW_CXGBE, "stop_ep_timer (%s:%d) ep %p", \ __func__, __LINE__, (ep)); \ stop_ep_timer(ep); \ }) #define GET_LOCAL_ADDR(pladdr, so) \ do { \ struct sockaddr_storage *__a = NULL; \ struct inpcb *__inp = sotoinpcb(so); \ KASSERT(__inp != NULL, \ ("GET_LOCAL_ADDR(%s):so:%p, inp = NULL", __func__, so)); \ if (__inp->inp_vflag & INP_IPV4) \ in_getsockaddr(so, (struct sockaddr **)&__a); \ else \ in6_getsockaddr(so, (struct sockaddr **)&__a); \ *(pladdr) = *__a; \ free(__a, M_SONAME); \ } while (0) #define GET_REMOTE_ADDR(praddr, so) \ do { \ struct sockaddr_storage *__a = NULL; \ struct inpcb *__inp = sotoinpcb(so); \ KASSERT(__inp != NULL, \ ("GET_REMOTE_ADDR(%s):so:%p, inp = NULL", __func__, so)); \ if (__inp->inp_vflag & INP_IPV4) \ in_getpeeraddr(so, (struct sockaddr **)&__a); \ else \ in6_getpeeraddr(so, (struct sockaddr **)&__a); \ *(praddr) = *__a; \ free(__a, M_SONAME); \ } while (0) static char *states[] = { "idle", "listen", "connecting", "mpa_wait_req", "mpa_req_sent", "mpa_req_rcvd", "mpa_rep_sent", "fpdu_mode", "aborting", "closing", "moribund", "dead", NULL, }; static void deref_cm_id(struct c4iw_ep_common *epc) { epc->cm_id->rem_ref(epc->cm_id); epc->cm_id = NULL; set_bit(CM_ID_DEREFED, &epc->history); } static void ref_cm_id(struct c4iw_ep_common *epc) { set_bit(CM_ID_REFED, &epc->history); epc->cm_id->add_ref(epc->cm_id); } static void deref_qp(struct c4iw_ep *ep) { c4iw_qp_rem_ref(&ep->com.qp->ibqp); clear_bit(QP_REFERENCED, &ep->com.flags); set_bit(QP_DEREFED, &ep->com.history); } static void ref_qp(struct c4iw_ep *ep) { set_bit(QP_REFERENCED, &ep->com.flags); set_bit(QP_REFED, &ep->com.history); c4iw_qp_add_ref(&ep->com.qp->ibqp); } /* allocated per TCP port while listening */ struct listen_port_info { uint16_t port_num; /* TCP port address */ struct list_head list; /* belongs to listen_port_list */ struct list_head lep_list; /* per port lep list */ uint32_t refcnt; /* number of lep's listening */ }; /* * Following two lists are used to manage INADDR_ANY listeners: * 1)listen_port_list * 2)lep_list * * Below is the INADDR_ANY listener lists overview on a system with a two port * adapter: * |------------------| * |listen_port_list | * |------------------| * | * | |-----------| |-----------| * | | port_num:X| | port_num:X| * |--------------|-list------|-------|-list------|-------.... * | lep_list----| | lep_list----| * | refcnt | | | refcnt | | * | | | | | | * | | | | | | * |-----------| | |-----------| | * | | * | | * | | * | | lep1 lep2 * | | |----------------| |----------------| * | |----| listen_ep_list |----| listen_ep_list | * | |----------------| |----------------| * | * | * | lep1 lep2 * | |----------------| |----------------| * |---| listen_ep_list |----| listen_ep_list | * |----------------| |----------------| * * Because of two port adapter, the number of lep's are two(lep1 & lep2) for * each TCP port number. * * Here 'lep1' is always marked as Master lep, because solisten() is always * called through first lep. * */ static struct listen_port_info * add_ep_to_listenlist(struct c4iw_listen_ep *lep) { uint16_t port; struct listen_port_info *port_info = NULL; struct sockaddr_storage *laddr = &lep->com.local_addr; port = (laddr->ss_family == AF_INET) ? ((struct sockaddr_in *)laddr)->sin_port : ((struct sockaddr_in6 *)laddr)->sin6_port; mutex_lock(&listen_port_mutex); list_for_each_entry(port_info, &listen_port_list, list) if (port_info->port_num == port) goto found_port; port_info = malloc(sizeof(*port_info), M_CXGBE, M_WAITOK); port_info->port_num = port; port_info->refcnt = 0; list_add_tail(&port_info->list, &listen_port_list); INIT_LIST_HEAD(&port_info->lep_list); found_port: port_info->refcnt++; list_add_tail(&lep->listen_ep_list, &port_info->lep_list); mutex_unlock(&listen_port_mutex); return port_info; } static int rem_ep_from_listenlist(struct c4iw_listen_ep *lep) { uint16_t port; struct listen_port_info *port_info = NULL; struct sockaddr_storage *laddr = &lep->com.local_addr; int refcnt = 0; port = (laddr->ss_family == AF_INET) ? ((struct sockaddr_in *)laddr)->sin_port : ((struct sockaddr_in6 *)laddr)->sin6_port; mutex_lock(&listen_port_mutex); /* get the port_info structure based on the lep's port address */ list_for_each_entry(port_info, &listen_port_list, list) { if (port_info->port_num == port) { port_info->refcnt--; refcnt = port_info->refcnt; /* remove the current lep from the listen list */ list_del(&lep->listen_ep_list); if (port_info->refcnt == 0) { /* Remove this entry from the list as there * are no more listeners for this port_num. */ list_del(&port_info->list); kfree(port_info); } break; } } mutex_unlock(&listen_port_mutex); return refcnt; } /* * Find the lep that belongs to the ifnet on which the SYN frame was received. */ struct c4iw_listen_ep * find_real_listen_ep(struct c4iw_listen_ep *master_lep, struct socket *so) { struct adapter *adap = NULL; struct c4iw_listen_ep *lep = NULL; if_t ifp = NULL, hw_ifp = NULL; struct listen_port_info *port_info = NULL; int i = 0, found_portinfo = 0, found_lep = 0; uint16_t port; /* * STEP 1: Figure out 'ifp' of the physical interface, not pseudo * interfaces like vlan, lagg, etc.. * TBD: lagg support, lagg + vlan support. */ ifp = TOEPCB(so)->l2te->ifp; if (if_gettype(ifp) == IFT_L2VLAN) { hw_ifp = VLAN_TRUNKDEV(ifp); if (hw_ifp == NULL) { CTR4(KTR_IW_CXGBE, "%s: Failed to get parent ifnet of " "vlan ifnet %p, sock %p, master_lep %p", __func__, ifp, so, master_lep); return (NULL); } } else hw_ifp = ifp; /* STEP 2: Find 'port_info' with listener local port address. */ port = (master_lep->com.local_addr.ss_family == AF_INET) ? ((struct sockaddr_in *)&master_lep->com.local_addr)->sin_port : ((struct sockaddr_in6 *)&master_lep->com.local_addr)->sin6_port; mutex_lock(&listen_port_mutex); list_for_each_entry(port_info, &listen_port_list, list) if (port_info->port_num == port) { found_portinfo =1; break; } if (!found_portinfo) goto out; /* STEP 3: Traverse through list of lep's that are bound to the current * TCP port address and find the lep that belongs to the ifnet on which * the SYN frame was received. */ list_for_each_entry(lep, &port_info->lep_list, listen_ep_list) { adap = lep->com.dev->rdev.adap; for_each_port(adap, i) { if (hw_ifp == adap->port[i]->vi[0].ifp) { found_lep =1; goto out; } } } out: mutex_unlock(&listen_port_mutex); return found_lep ? lep : (NULL); } static void process_timeout(struct c4iw_ep *ep) { struct c4iw_qp_attributes attrs = {0}; int abort = 1; CTR4(KTR_IW_CXGBE, "%s ep :%p, tid:%u, state %d", __func__, ep, ep->hwtid, ep->com.state); set_bit(TIMEDOUT, &ep->com.history); switch (ep->com.state) { case MPA_REQ_SENT: connect_reply_upcall(ep, -ETIMEDOUT); break; case MPA_REQ_WAIT: case MPA_REQ_RCVD: case MPA_REP_SENT: case FPDU_MODE: break; case CLOSING: case MORIBUND: if (ep->com.cm_id && ep->com.qp) { attrs.next_state = C4IW_QP_STATE_ERROR; c4iw_modify_qp(ep->com.dev, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); } close_complete_upcall(ep, -ETIMEDOUT); break; case ABORTING: case DEAD: /* * These states are expected if the ep timed out at the same * time as another thread was calling stop_ep_timer(). * So we silently do nothing for these states. */ abort = 0; break; default: CTR4(KTR_IW_CXGBE, "%s unexpected state ep %p tid %u state %u" , __func__, ep, ep->hwtid, ep->com.state); abort = 0; } if (abort) c4iw_ep_disconnect(ep, 1, GFP_KERNEL); c4iw_put_ep(&ep->com); return; } struct cqe_list_entry { struct list_head entry; struct c4iw_dev *rhp; struct t4_cqe err_cqe; }; static void process_err_cqes(void) { unsigned long flag; struct cqe_list_entry *cle; spin_lock_irqsave(&err_cqe_lock, flag); while (!list_empty(&err_cqe_list)) { struct list_head *tmp; tmp = err_cqe_list.next; list_del(tmp); tmp->next = tmp->prev = NULL; spin_unlock_irqrestore(&err_cqe_lock, flag); cle = list_entry(tmp, struct cqe_list_entry, entry); c4iw_ev_dispatch(cle->rhp, &cle->err_cqe); free(cle, M_CXGBE); spin_lock_irqsave(&err_cqe_lock, flag); } spin_unlock_irqrestore(&err_cqe_lock, flag); return; } static void process_req(struct work_struct *ctx) { struct c4iw_ep_common *epc; unsigned long flag; int ep_events; process_err_cqes(); spin_lock_irqsave(&req_lock, flag); while (!TAILQ_EMPTY(&req_list)) { epc = TAILQ_FIRST(&req_list); TAILQ_REMOVE(&req_list, epc, entry); epc->entry.tqe_prev = NULL; ep_events = epc->ep_events; epc->ep_events = 0; spin_unlock_irqrestore(&req_lock, flag); mutex_lock(&epc->mutex); CTR5(KTR_IW_CXGBE, "%s: so %p, ep %p, ep_state %s events 0x%x", __func__, epc->so, epc, states[epc->state], ep_events); if (ep_events & C4IW_EVENT_TERM) process_terminate((struct c4iw_ep *)epc); if (ep_events & C4IW_EVENT_TIMEOUT) process_timeout((struct c4iw_ep *)epc); if (ep_events & C4IW_EVENT_SOCKET) process_socket_event((struct c4iw_ep *)epc); mutex_unlock(&epc->mutex); c4iw_put_ep(epc); process_err_cqes(); spin_lock_irqsave(&req_lock, flag); } spin_unlock_irqrestore(&req_lock, flag); } /* * XXX: doesn't belong here in the iWARP driver. * XXX: assumes that the connection was offloaded by cxgbe/t4_tom if TF_TOE is * set. Is this a valid assumption for active open? */ static int set_tcpinfo(struct c4iw_ep *ep) { struct socket *so = ep->com.so; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp; struct toepcb *toep; int rc = 0; INP_WLOCK(inp); tp = intotcpcb(inp); if ((tp->t_flags & TF_TOE) == 0) { rc = EINVAL; log(LOG_ERR, "%s: connection not offloaded (so %p, ep %p)\n", __func__, so, ep); goto done; } toep = TOEPCB(so); ep->hwtid = toep->tid; ep->snd_seq = tp->snd_nxt; ep->rcv_seq = tp->rcv_nxt; done: INP_WUNLOCK(inp); return (rc); } static int get_ifnet_from_raddr(struct sockaddr_storage *raddr, if_t *ifp) { int err = 0; struct nhop_object *nh; if (raddr->ss_family == AF_INET) { struct sockaddr_in *raddr4 = (struct sockaddr_in *)raddr; nh = fib4_lookup(RT_DEFAULT_FIB, raddr4->sin_addr, 0, NHR_NONE, 0); } else { struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *)raddr; struct in6_addr addr6; uint32_t scopeid; memset(&addr6, 0, sizeof(addr6)); in6_splitscope((struct in6_addr *)&raddr6->sin6_addr, &addr6, &scopeid); nh = fib6_lookup(RT_DEFAULT_FIB, &addr6, scopeid, NHR_NONE, 0); } if (nh == NULL) err = EHOSTUNREACH; else *ifp = nh->nh_ifp; CTR2(KTR_IW_CXGBE, "%s: return: %d", __func__, err); return err; } static void close_socket(struct socket *so) { uninit_iwarp_socket(so); soclose(so); } static void process_peer_close(struct c4iw_ep *ep) { struct c4iw_qp_attributes attrs = {0}; int disconnect = 1; int release = 0; CTR4(KTR_IW_CXGBE, "%s:ppcB ep %p so %p state %s", __func__, ep, ep->com.so, states[ep->com.state]); switch (ep->com.state) { case MPA_REQ_WAIT: CTR2(KTR_IW_CXGBE, "%s:ppc1 %p MPA_REQ_WAIT DEAD", __func__, ep); /* Fallthrough */ case MPA_REQ_SENT: CTR2(KTR_IW_CXGBE, "%s:ppc2 %p MPA_REQ_SENT DEAD", __func__, ep); ep->com.state = DEAD; connect_reply_upcall(ep, -ECONNABORTED); disconnect = 0; STOP_EP_TIMER(ep); close_socket(ep->com.so); deref_cm_id(&ep->com); release = 1; break; case MPA_REQ_RCVD: /* * We're gonna mark this puppy DEAD, but keep * the reference on it until the ULP accepts or * rejects the CR. */ CTR2(KTR_IW_CXGBE, "%s:ppc3 %p MPA_REQ_RCVD CLOSING", __func__, ep); ep->com.state = CLOSING; break; case MPA_REP_SENT: CTR2(KTR_IW_CXGBE, "%s:ppc4 %p MPA_REP_SENT CLOSING", __func__, ep); ep->com.state = CLOSING; break; case FPDU_MODE: CTR2(KTR_IW_CXGBE, "%s:ppc5 %p FPDU_MODE CLOSING", __func__, ep); START_EP_TIMER(ep); ep->com.state = CLOSING; attrs.next_state = C4IW_QP_STATE_CLOSING; c4iw_modify_qp(ep->com.dev, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); peer_close_upcall(ep); break; case ABORTING: CTR2(KTR_IW_CXGBE, "%s:ppc6 %p ABORTING (disconn)", __func__, ep); disconnect = 0; break; case CLOSING: CTR2(KTR_IW_CXGBE, "%s:ppc7 %p CLOSING MORIBUND", __func__, ep); ep->com.state = MORIBUND; disconnect = 0; break; case MORIBUND: CTR2(KTR_IW_CXGBE, "%s:ppc8 %p MORIBUND DEAD", __func__, ep); STOP_EP_TIMER(ep); if (ep->com.cm_id && ep->com.qp) { attrs.next_state = C4IW_QP_STATE_IDLE; c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); } close_socket(ep->com.so); close_complete_upcall(ep, 0); ep->com.state = DEAD; release = 1; disconnect = 0; break; case DEAD: CTR2(KTR_IW_CXGBE, "%s:ppc9 %p DEAD (disconn)", __func__, ep); disconnect = 0; break; default: panic("%s: ep %p state %d", __func__, ep, ep->com.state); break; } if (disconnect) { CTR2(KTR_IW_CXGBE, "%s:ppca %p", __func__, ep); c4iw_ep_disconnect(ep, 0, M_NOWAIT); } if (release) { CTR2(KTR_IW_CXGBE, "%s:ppcb %p", __func__, ep); c4iw_put_ep(&ep->com); } CTR2(KTR_IW_CXGBE, "%s:ppcE %p", __func__, ep); return; } static void process_conn_error(struct c4iw_ep *ep) { struct c4iw_qp_attributes attrs = {0}; int ret; int state; state = ep->com.state; CTR5(KTR_IW_CXGBE, "%s:pceB ep %p so %p so->so_error %u state %s", __func__, ep, ep->com.so, ep->com.so->so_error, states[ep->com.state]); switch (state) { case MPA_REQ_WAIT: STOP_EP_TIMER(ep); c4iw_put_ep(&ep->parent_ep->com); break; case MPA_REQ_SENT: STOP_EP_TIMER(ep); connect_reply_upcall(ep, -ECONNRESET); break; case MPA_REP_SENT: ep->com.rpl_err = ECONNRESET; CTR1(KTR_IW_CXGBE, "waking up ep %p", ep); break; case MPA_REQ_RCVD: break; case MORIBUND: case CLOSING: STOP_EP_TIMER(ep); /*FALLTHROUGH*/ case FPDU_MODE: if (ep->com.cm_id && ep->com.qp) { attrs.next_state = C4IW_QP_STATE_ERROR; ret = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); if (ret) log(LOG_ERR, "%s - qp <- error failed!\n", __func__); } peer_abort_upcall(ep); break; case ABORTING: break; case DEAD: CTR2(KTR_IW_CXGBE, "%s so_error %d IN DEAD STATE!!!!", __func__, ep->com.so->so_error); return; default: panic("%s: ep %p state %d", __func__, ep, state); break; } if (state != ABORTING) { close_socket(ep->com.so); ep->com.state = DEAD; c4iw_put_ep(&ep->com); } CTR2(KTR_IW_CXGBE, "%s:pceE %p", __func__, ep); return; } static void process_close_complete(struct c4iw_ep *ep) { struct c4iw_qp_attributes attrs = {0}; int release = 0; CTR4(KTR_IW_CXGBE, "%s:pccB ep %p so %p state %s", __func__, ep, ep->com.so, states[ep->com.state]); /* The cm_id may be null if we failed to connect */ set_bit(CLOSE_CON_RPL, &ep->com.history); switch (ep->com.state) { case CLOSING: CTR2(KTR_IW_CXGBE, "%s:pcc1 %p CLOSING MORIBUND", __func__, ep); ep->com.state = MORIBUND; break; case MORIBUND: CTR2(KTR_IW_CXGBE, "%s:pcc1 %p MORIBUND DEAD", __func__, ep); STOP_EP_TIMER(ep); if ((ep->com.cm_id) && (ep->com.qp)) { CTR2(KTR_IW_CXGBE, "%s:pcc2 %p QP_STATE_IDLE", __func__, ep); attrs.next_state = C4IW_QP_STATE_IDLE; c4iw_modify_qp(ep->com.dev, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); } close_socket(ep->com.so); close_complete_upcall(ep, 0); ep->com.state = DEAD; release = 1; break; case ABORTING: CTR2(KTR_IW_CXGBE, "%s:pcc5 %p ABORTING", __func__, ep); break; case DEAD: CTR2(KTR_IW_CXGBE, "%s:pcc6 %p DEAD", __func__, ep); break; default: CTR2(KTR_IW_CXGBE, "%s:pcc7 %p unknown ep state", __func__, ep); panic("%s:pcc6 %p unknown ep state", __func__, ep); break; } if (release) { CTR2(KTR_IW_CXGBE, "%s:pcc8 %p", __func__, ep); release_ep_resources(ep); } CTR2(KTR_IW_CXGBE, "%s:pccE %p", __func__, ep); return; } static void setiwsockopt(struct socket *so) { int rc; struct sockopt sopt; int on = 1; sopt.sopt_dir = SOPT_SET; sopt.sopt_level = IPPROTO_TCP; sopt.sopt_name = TCP_NODELAY; sopt.sopt_val = (caddr_t)&on; sopt.sopt_valsize = sizeof on; sopt.sopt_td = NULL; rc = -sosetopt(so, &sopt); if (rc) { log(LOG_ERR, "%s: can't set TCP_NODELAY on so %p (%d)\n", __func__, so, rc); } } static void init_iwarp_socket(struct socket *so, void *arg) { if (SOLISTENING(so)) { SOLISTEN_LOCK(so); solisten_upcall_set(so, c4iw_so_upcall, arg); so->so_state |= SS_NBIO; SOLISTEN_UNLOCK(so); } else { SOCKBUF_LOCK(&so->so_rcv); soupcall_set(so, SO_RCV, c4iw_so_upcall, arg); so->so_state |= SS_NBIO; SOCKBUF_UNLOCK(&so->so_rcv); } } static void uninit_iwarp_socket(struct socket *so) { if (SOLISTENING(so)) { SOLISTEN_LOCK(so); solisten_upcall_set(so, NULL, NULL); SOLISTEN_UNLOCK(so); } else { SOCKBUF_LOCK(&so->so_rcv); soupcall_clear(so, SO_RCV); SOCKBUF_UNLOCK(&so->so_rcv); } } static void process_data(struct c4iw_ep *ep) { int ret = 0; int disconnect = 0; struct c4iw_qp_attributes attrs = {0}; CTR5(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s, sbused %d", __func__, ep->com.so, ep, states[ep->com.state], sbused(&ep->com.so->so_rcv)); switch (ep->com.state) { case MPA_REQ_SENT: disconnect = process_mpa_reply(ep); break; case MPA_REQ_WAIT: disconnect = process_mpa_request(ep); if (disconnect) /* Refered in process_newconn() */ c4iw_put_ep(&ep->parent_ep->com); break; case FPDU_MODE: MPASS(ep->com.qp != NULL); attrs.next_state = C4IW_QP_STATE_TERMINATE; ret = c4iw_modify_qp(ep->com.dev, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); if (ret != -EINPROGRESS) disconnect = 1; break; default: log(LOG_ERR, "%s: Unexpected streaming data. ep %p, " "state %d, so %p, so_state 0x%x, sbused %u\n", __func__, ep, ep->com.state, ep->com.so, ep->com.so->so_state, sbused(&ep->com.so->so_rcv)); break; } if (disconnect) c4iw_ep_disconnect(ep, disconnect == 2, GFP_KERNEL); } static void process_connected(struct c4iw_ep *ep) { struct socket *so = ep->com.so; if ((so->so_state & SS_ISCONNECTED) && !so->so_error) { if (send_mpa_req(ep)) goto err; } else { connect_reply_upcall(ep, -so->so_error); goto err; } return; err: close_socket(so); ep->com.state = DEAD; c4iw_put_ep(&ep->com); return; } static inline bool c4iw_zero_addr(struct sockaddr *addr) { struct in6_addr *ip6; if (addr->sa_family == AF_INET) return (((struct sockaddr_in *)addr)->sin_addr.s_addr == 0); else { ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr; return (ip6->s6_addr32[0] | ip6->s6_addr32[1] | ip6->s6_addr32[2] | ip6->s6_addr32[3]) == 0; } } #define _IN_LOOPBACK(i) (((in_addr_t)(i) & 0xff000000) == 0x7f000000) static inline bool c4iw_loopback_addr(struct sockaddr *addr, struct vnet *vnet) { bool ret; if (addr->sa_family == AF_INET) { if (vnet == NULL) ret = _IN_LOOPBACK(ntohl(((struct sockaddr_in *) addr)->sin_addr.s_addr)); else { CURVNET_SET_QUIET(vnet); ret = IN_LOOPBACK(ntohl(((struct sockaddr_in *) addr)->sin_addr.s_addr)); CURVNET_RESTORE(); } } else { ret = IN6_IS_ADDR_LOOPBACK(&((struct sockaddr_in6 *) addr)->sin6_addr); } return (ret); } #undef _IN_LOOPBACK static inline bool c4iw_any_addr(struct sockaddr *addr, struct vnet *vnet) { return c4iw_zero_addr(addr) || c4iw_loopback_addr(addr, vnet); } static void process_newconn(struct c4iw_listen_ep *master_lep, struct socket *new_so) { struct c4iw_listen_ep *real_lep = NULL; struct c4iw_ep *new_ep = NULL; - struct sockaddr_in *remote = NULL; + struct sockaddr_storage remote = { .ss_len = sizeof(remote) }; int ret = 0; MPASS(new_so != NULL); if (c4iw_any_addr((struct sockaddr *)&master_lep->com.local_addr, new_so->so_vnet)) { /* Here we need to find the 'real_lep' that belongs to the * incomming socket's network interface, such that the newly * created 'ep' can be attached to the real 'lep'. */ real_lep = find_real_listen_ep(master_lep, new_so); if (real_lep == NULL) { CTR2(KTR_IW_CXGBE, "%s: Could not find the real listen " "ep for sock: %p", __func__, new_so); log(LOG_ERR,"%s: Could not find the real listen ep for " "sock: %p\n", __func__, new_so); /* FIXME: properly free the 'new_so' in failure case. * Use of soabort() and soclose() are not legal * here(before soaccept()). */ return; } } else /* for Non-Wildcard address, master_lep is always the real_lep */ real_lep = master_lep; new_ep = alloc_ep(sizeof(*new_ep), GFP_KERNEL); CTR6(KTR_IW_CXGBE, "%s: master_lep %p, real_lep: %p, new ep %p, " "listening so %p, new so %p", __func__, master_lep, real_lep, new_ep, master_lep->com.so, new_so); new_ep->com.dev = real_lep->com.dev; new_ep->com.so = new_so; new_ep->com.cm_id = NULL; new_ep->com.thread = real_lep->com.thread; new_ep->parent_ep = real_lep; GET_LOCAL_ADDR(&new_ep->com.local_addr, new_so); GET_REMOTE_ADDR(&new_ep->com.remote_addr, new_so); c4iw_get_ep(&real_lep->com); init_timer(&new_ep->timer); new_ep->com.state = MPA_REQ_WAIT; setiwsockopt(new_so); - ret = soaccept(new_so, (struct sockaddr **)&remote); + ret = soaccept(new_so, (struct sockaddr *)&remote); if (ret != 0) { CTR4(KTR_IW_CXGBE, "%s:listen sock:%p, new sock:%p, ret:%d", __func__, master_lep->com.so, new_so, ret); - if (remote != NULL) - free(remote, M_SONAME); soclose(new_so); c4iw_put_ep(&new_ep->com); c4iw_put_ep(&real_lep->com); return; } - free(remote, M_SONAME); START_EP_TIMER(new_ep); /* MPA request might have been queued up on the socket already, so we * initialize the socket/upcall_handler under lock to prevent processing * MPA request on another thread(via process_req()) simultaneously. */ c4iw_get_ep(&new_ep->com); /* Dereferenced at the end below, this is to avoid freeing of ep before ep unlock. */ mutex_lock(&new_ep->com.mutex); init_iwarp_socket(new_so, &new_ep->com); ret = process_mpa_request(new_ep); if (ret) { /* ABORT */ c4iw_ep_disconnect(new_ep, 1, GFP_KERNEL); c4iw_put_ep(&real_lep->com); } mutex_unlock(&new_ep->com.mutex); c4iw_put_ep(&new_ep->com); return; } static int add_ep_to_req_list(struct c4iw_ep *ep, int new_ep_event) { unsigned long flag; spin_lock_irqsave(&req_lock, flag); if (ep && ep->com.so) { ep->com.ep_events |= new_ep_event; if (!ep->com.entry.tqe_prev) { c4iw_get_ep(&ep->com); TAILQ_INSERT_TAIL(&req_list, &ep->com, entry); queue_work(c4iw_taskq, &c4iw_task); } } spin_unlock_irqrestore(&req_lock, flag); return (0); } static int c4iw_so_upcall(struct socket *so, void *arg, int waitflag) { struct c4iw_ep *ep = arg; CTR6(KTR_IW_CXGBE, "%s: so %p, so_state 0x%x, ep %p, ep_state %s, tqe_prev %p", __func__, so, so->so_state, ep, states[ep->com.state], ep->com.entry.tqe_prev); MPASS(ep->com.so == so); /* * Wake up any threads waiting in rdma_init()/rdma_fini(), * with locks held. */ if (so->so_error || (ep->com.dev->rdev.flags & T4_FATAL_ERROR)) c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); add_ep_to_req_list(ep, C4IW_EVENT_SOCKET); return (SU_OK); } static int terminate(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_rdma_terminate *cpl = mtod(m, const void *); unsigned int tid = GET_TID(cpl); struct toepcb *toep = lookup_tid(sc, tid); struct socket *so; struct c4iw_ep *ep; INP_WLOCK(toep->inp); so = inp_inpcbtosocket(toep->inp); ep = so->so_rcv.sb_upcallarg; INP_WUNLOCK(toep->inp); CTR3(KTR_IW_CXGBE, "%s: so %p, ep %p", __func__, so, ep); add_ep_to_req_list(ep, C4IW_EVENT_TERM); return 0; } static void process_socket_event(struct c4iw_ep *ep) { int state = ep->com.state; struct socket *so = ep->com.so; if (ep->com.state == DEAD) { CTR3(KTR_IW_CXGBE, "%s: Pending socket event discarded " "ep %p ep_state %s", __func__, ep, states[state]); return; } CTR6(KTR_IW_CXGBE, "process_socket_event: so %p, so_state 0x%x, " "so_err %d, sb_state 0x%x, ep %p, ep_state %s", so, so->so_state, so->so_error, so->so_rcv.sb_state, ep, states[state]); if (state == CONNECTING) { process_connected(ep); return; } if (state == LISTEN) { struct c4iw_listen_ep *lep = (struct c4iw_listen_ep *)ep; struct socket *listen_so = so, *new_so = NULL; int error = 0; SOLISTEN_LOCK(listen_so); do { error = solisten_dequeue(listen_so, &new_so, SOCK_NONBLOCK); if (error) { CTR4(KTR_IW_CXGBE, "%s: lep %p listen_so %p " "error %d", __func__, lep, listen_so, error); return; } process_newconn(lep, new_so); /* solisten_dequeue() unlocks while return, so aquire * lock again for sol_qlen and also for next iteration. */ SOLISTEN_LOCK(listen_so); } while (listen_so->sol_qlen); SOLISTEN_UNLOCK(listen_so); return; } /* connection error */ if (so->so_error) { process_conn_error(ep); return; } /* peer close */ if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) && state <= CLOSING) { process_peer_close(ep); /* * check whether socket disconnect event is pending before * returning. Fallthrough if yes. */ if (!(so->so_state & SS_ISDISCONNECTED)) return; } /* close complete */ if (so->so_state & SS_ISDISCONNECTED) { process_close_complete(ep); return; } /* rx data */ if (sbused(&ep->com.so->so_rcv)) { process_data(ep); return; } /* Socket events for 'MPA Request Received' and 'Close Complete' * were already processed earlier in their previous events handlers. * Hence, these socket events are skipped. * And any other socket events must have handled above. */ MPASS((ep->com.state == MPA_REQ_RCVD) || (ep->com.state == MORIBUND)); if ((ep->com.state != MPA_REQ_RCVD) && (ep->com.state != MORIBUND)) log(LOG_ERR, "%s: Unprocessed socket event so %p, " "so_state 0x%x, so_err %d, sb_state 0x%x, ep %p, ep_state %s\n", __func__, so, so->so_state, so->so_error, so->so_rcv.sb_state, ep, states[state]); } SYSCTL_NODE(_hw, OID_AUTO, iw_cxgbe, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "iw_cxgbe driver parameters"); static int dack_mode = 0; SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, dack_mode, CTLFLAG_RWTUN, &dack_mode, 0, "Delayed ack mode (default = 0)"); int c4iw_max_read_depth = 8; SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, c4iw_max_read_depth, CTLFLAG_RWTUN, &c4iw_max_read_depth, 0, "Per-connection max ORD/IRD (default = 8)"); static int enable_tcp_timestamps; SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_timestamps, CTLFLAG_RWTUN, &enable_tcp_timestamps, 0, "Enable tcp timestamps (default = 0)"); static int enable_tcp_sack; SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_sack, CTLFLAG_RWTUN, &enable_tcp_sack, 0, "Enable tcp SACK (default = 0)"); static int enable_tcp_window_scaling = 1; SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_window_scaling, CTLFLAG_RWTUN, &enable_tcp_window_scaling, 0, "Enable tcp window scaling (default = 1)"); int c4iw_debug = 0; SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, c4iw_debug, CTLFLAG_RWTUN, &c4iw_debug, 0, "Enable debug logging (default = 0)"); static int peer2peer = 1; SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, peer2peer, CTLFLAG_RWTUN, &peer2peer, 0, "Support peer2peer ULPs (default = 1)"); static int p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ; SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, p2p_type, CTLFLAG_RWTUN, &p2p_type, 0, "RDMAP opcode to use for the RTR message: 1 = RDMA_READ 0 = RDMA_WRITE (default 1)"); static int ep_timeout_secs = 60; SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, ep_timeout_secs, CTLFLAG_RWTUN, &ep_timeout_secs, 0, "CM Endpoint operation timeout in seconds (default = 60)"); static int mpa_rev = 1; SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, mpa_rev, CTLFLAG_RWTUN, &mpa_rev, 0, "MPA Revision, 0 supports amso1100, 1 is RFC5044 spec compliant, 2 is IETF MPA Peer Connect Draft compliant (default = 1)"); static int markers_enabled; SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, markers_enabled, CTLFLAG_RWTUN, &markers_enabled, 0, "Enable MPA MARKERS (default(0) = disabled)"); static int crc_enabled = 1; SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, crc_enabled, CTLFLAG_RWTUN, &crc_enabled, 0, "Enable MPA CRC (default(1) = enabled)"); static int rcv_win = 256 * 1024; SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, rcv_win, CTLFLAG_RWTUN, &rcv_win, 0, "TCP receive window in bytes (default = 256KB)"); static int snd_win = 128 * 1024; SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, snd_win, CTLFLAG_RWTUN, &snd_win, 0, "TCP send window in bytes (default = 128KB)"); int use_dsgl = 1; SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, use_dsgl, CTLFLAG_RWTUN, &use_dsgl, 0, "Use DSGL for PBL/FastReg (default=1)"); int inline_threshold = 128; SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, inline_threshold, CTLFLAG_RWTUN, &inline_threshold, 0, "inline vs dsgl threshold (default=128)"); static int reuseaddr = 0; SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, reuseaddr, CTLFLAG_RWTUN, &reuseaddr, 0, "Enable SO_REUSEADDR & SO_REUSEPORT socket options on all iWARP client connections(default = 0)"); static void start_ep_timer(struct c4iw_ep *ep) { if (timer_pending(&ep->timer)) { CTR2(KTR_IW_CXGBE, "%s: ep %p, already started", __func__, ep); printk(KERN_ERR "%s timer already started! ep %p\n", __func__, ep); return; } clear_bit(TIMEOUT, &ep->com.flags); c4iw_get_ep(&ep->com); ep->timer.expires = jiffies + ep_timeout_secs * HZ; ep->timer.data = (unsigned long)ep; ep->timer.function = ep_timeout; add_timer(&ep->timer); } static int stop_ep_timer(struct c4iw_ep *ep) { del_timer_sync(&ep->timer); if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) { c4iw_put_ep(&ep->com); return 0; } return 1; } static void * alloc_ep(int size, gfp_t gfp) { struct c4iw_ep_common *epc; epc = kzalloc(size, gfp); if (epc == NULL) return (NULL); kref_init(&epc->kref); mutex_init(&epc->mutex); c4iw_init_wr_wait(&epc->wr_wait); return (epc); } void _c4iw_free_ep(struct kref *kref) { struct c4iw_ep *ep; #if defined(KTR) || defined(INVARIANTS) struct c4iw_ep_common *epc; #endif ep = container_of(kref, struct c4iw_ep, com.kref); #if defined(KTR) || defined(INVARIANTS) epc = &ep->com; #endif KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list", __func__, epc)); if (test_bit(QP_REFERENCED, &ep->com.flags)) deref_qp(ep); CTR4(KTR_IW_CXGBE, "%s: ep %p, history 0x%lx, flags 0x%lx", __func__, ep, epc->history, epc->flags); kfree(ep); } static void release_ep_resources(struct c4iw_ep *ep) { CTR2(KTR_IW_CXGBE, "%s:rerB %p", __func__, ep); set_bit(RELEASE_RESOURCES, &ep->com.flags); c4iw_put_ep(&ep->com); CTR2(KTR_IW_CXGBE, "%s:rerE %p", __func__, ep); } static int send_mpa_req(struct c4iw_ep *ep) { int mpalen; struct mpa_message *mpa; struct mpa_v2_conn_params mpa_v2_params; struct mbuf *m; char mpa_rev_to_use = mpa_rev; int err = 0; if (ep->retry_with_mpa_v1) mpa_rev_to_use = 1; mpalen = sizeof(*mpa) + ep->plen; if (mpa_rev_to_use == 2) mpalen += sizeof(struct mpa_v2_conn_params); mpa = malloc(mpalen, M_CXGBE, M_NOWAIT); if (mpa == NULL) { err = -ENOMEM; CTR3(KTR_IW_CXGBE, "%s:smr1 ep: %p , error: %d", __func__, ep, err); goto err; } memset(mpa, 0, mpalen); memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)); mpa->flags = (crc_enabled ? MPA_CRC : 0) | (markers_enabled ? MPA_MARKERS : 0) | (mpa_rev_to_use == 2 ? MPA_ENHANCED_RDMA_CONN : 0); mpa->private_data_size = htons(ep->plen); mpa->revision = mpa_rev_to_use; if (mpa_rev_to_use == 1) { ep->tried_with_mpa_v1 = 1; ep->retry_with_mpa_v1 = 0; } if (mpa_rev_to_use == 2) { mpa->private_data_size = htons(ntohs(mpa->private_data_size) + sizeof(struct mpa_v2_conn_params)); mpa_v2_params.ird = htons((u16)ep->ird); mpa_v2_params.ord = htons((u16)ep->ord); if (peer2peer) { mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL); if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE) { mpa_v2_params.ord |= htons(MPA_V2_RDMA_WRITE_RTR); } else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) { mpa_v2_params.ord |= htons(MPA_V2_RDMA_READ_RTR); } } memcpy(mpa->private_data, &mpa_v2_params, sizeof(struct mpa_v2_conn_params)); if (ep->plen) { memcpy(mpa->private_data + sizeof(struct mpa_v2_conn_params), ep->mpa_pkt + sizeof(*mpa), ep->plen); } } else { if (ep->plen) memcpy(mpa->private_data, ep->mpa_pkt + sizeof(*mpa), ep->plen); CTR2(KTR_IW_CXGBE, "%s:smr7 %p", __func__, ep); } m = m_getm(NULL, mpalen, M_NOWAIT, MT_DATA); if (m == NULL) { err = -ENOMEM; CTR3(KTR_IW_CXGBE, "%s:smr2 ep: %p , error: %d", __func__, ep, err); free(mpa, M_CXGBE); goto err; } m_copyback(m, 0, mpalen, (void *)mpa); free(mpa, M_CXGBE); err = -sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread); if (err) { CTR3(KTR_IW_CXGBE, "%s:smr3 ep: %p , error: %d", __func__, ep, err); goto err; } START_EP_TIMER(ep); ep->com.state = MPA_REQ_SENT; ep->mpa_attr.initiator = 1; CTR3(KTR_IW_CXGBE, "%s:smrE %p, error: %d", __func__, ep, err); return 0; err: connect_reply_upcall(ep, err); CTR3(KTR_IW_CXGBE, "%s:smrE %p, error: %d", __func__, ep, err); return err; } static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen) { int mpalen ; struct mpa_message *mpa; struct mpa_v2_conn_params mpa_v2_params; struct mbuf *m; int err; CTR4(KTR_IW_CXGBE, "%s:smrejB %p %u %d", __func__, ep, ep->hwtid, ep->plen); mpalen = sizeof(*mpa) + plen; if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { mpalen += sizeof(struct mpa_v2_conn_params); CTR4(KTR_IW_CXGBE, "%s:smrej1 %p %u %d", __func__, ep, ep->mpa_attr.version, mpalen); } mpa = malloc(mpalen, M_CXGBE, M_NOWAIT); if (mpa == NULL) return (-ENOMEM); memset(mpa, 0, mpalen); memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); mpa->flags = MPA_REJECT; mpa->revision = mpa_rev; mpa->private_data_size = htons(plen); if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { mpa->flags |= MPA_ENHANCED_RDMA_CONN; mpa->private_data_size = htons(ntohs(mpa->private_data_size) + sizeof(struct mpa_v2_conn_params)); mpa_v2_params.ird = htons(((u16)ep->ird) | (peer2peer ? MPA_V2_PEER2PEER_MODEL : 0)); mpa_v2_params.ord = htons(((u16)ep->ord) | (peer2peer ? (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE ? MPA_V2_RDMA_WRITE_RTR : p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ ? MPA_V2_RDMA_READ_RTR : 0) : 0)); memcpy(mpa->private_data, &mpa_v2_params, sizeof(struct mpa_v2_conn_params)); if (ep->plen) memcpy(mpa->private_data + sizeof(struct mpa_v2_conn_params), pdata, plen); CTR5(KTR_IW_CXGBE, "%s:smrej3 %p %d %d %d", __func__, ep, mpa_v2_params.ird, mpa_v2_params.ord, ep->plen); } else if (plen) memcpy(mpa->private_data, pdata, plen); m = m_getm(NULL, mpalen, M_NOWAIT, MT_DATA); if (m == NULL) { free(mpa, M_CXGBE); return (-ENOMEM); } m_copyback(m, 0, mpalen, (void *)mpa); free(mpa, M_CXGBE); err = -sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread); if (!err) ep->snd_seq += mpalen; CTR4(KTR_IW_CXGBE, "%s:smrejE %p %u %d", __func__, ep, ep->hwtid, err); return err; } static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen) { int mpalen; struct mpa_message *mpa; struct mbuf *m; struct mpa_v2_conn_params mpa_v2_params; int err; CTR2(KTR_IW_CXGBE, "%s:smrepB %p", __func__, ep); mpalen = sizeof(*mpa) + plen; if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { CTR3(KTR_IW_CXGBE, "%s:smrep1 %p %d", __func__, ep, ep->mpa_attr.version); mpalen += sizeof(struct mpa_v2_conn_params); } mpa = malloc(mpalen, M_CXGBE, M_NOWAIT); if (mpa == NULL) return (-ENOMEM); memset(mpa, 0, sizeof(*mpa)); memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) | (markers_enabled ? MPA_MARKERS : 0); mpa->revision = ep->mpa_attr.version; mpa->private_data_size = htons(plen); if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { mpa->flags |= MPA_ENHANCED_RDMA_CONN; mpa->private_data_size += htons(sizeof(struct mpa_v2_conn_params)); mpa_v2_params.ird = htons((u16)ep->ird); mpa_v2_params.ord = htons((u16)ep->ord); CTR5(KTR_IW_CXGBE, "%s:smrep3 %p %d %d %d", __func__, ep, ep->mpa_attr.version, mpa_v2_params.ird, mpa_v2_params.ord); if (peer2peer && (ep->mpa_attr.p2p_type != FW_RI_INIT_P2PTYPE_DISABLED)) { mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL); if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE) { mpa_v2_params.ord |= htons(MPA_V2_RDMA_WRITE_RTR); CTR5(KTR_IW_CXGBE, "%s:smrep4 %p %d %d %d", __func__, ep, p2p_type, mpa_v2_params.ird, mpa_v2_params.ord); } else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) { mpa_v2_params.ord |= htons(MPA_V2_RDMA_READ_RTR); CTR5(KTR_IW_CXGBE, "%s:smrep5 %p %d %d %d", __func__, ep, p2p_type, mpa_v2_params.ird, mpa_v2_params.ord); } } memcpy(mpa->private_data, &mpa_v2_params, sizeof(struct mpa_v2_conn_params)); if (ep->plen) memcpy(mpa->private_data + sizeof(struct mpa_v2_conn_params), pdata, plen); } else if (plen) memcpy(mpa->private_data, pdata, plen); m = m_getm(NULL, mpalen, M_NOWAIT, MT_DATA); if (m == NULL) { free(mpa, M_CXGBE); return (-ENOMEM); } m_copyback(m, 0, mpalen, (void *)mpa); free(mpa, M_CXGBE); ep->com.state = MPA_REP_SENT; ep->snd_seq += mpalen; err = -sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread); CTR3(KTR_IW_CXGBE, "%s:smrepE %p %d", __func__, ep, err); return err; } static void close_complete_upcall(struct c4iw_ep *ep, int status) { struct iw_cm_event event; CTR2(KTR_IW_CXGBE, "%s:ccuB %p", __func__, ep); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_CLOSE; event.status = status; if (ep->com.cm_id) { CTR2(KTR_IW_CXGBE, "%s:ccu1 %1", __func__, ep); ep->com.cm_id->event_handler(ep->com.cm_id, &event); deref_cm_id(&ep->com); set_bit(CLOSE_UPCALL, &ep->com.history); } CTR2(KTR_IW_CXGBE, "%s:ccuE %p", __func__, ep); } static int send_abort(struct c4iw_ep *ep) { struct socket *so = ep->com.so; struct sockopt sopt; int rc; struct linger l; CTR5(KTR_IW_CXGBE, "%s ep %p so %p state %s tid %d", __func__, ep, so, states[ep->com.state], ep->hwtid); l.l_onoff = 1; l.l_linger = 0; /* linger_time of 0 forces RST to be sent */ sopt.sopt_dir = SOPT_SET; sopt.sopt_level = SOL_SOCKET; sopt.sopt_name = SO_LINGER; sopt.sopt_val = (caddr_t)&l; sopt.sopt_valsize = sizeof l; sopt.sopt_td = NULL; rc = -sosetopt(so, &sopt); if (rc != 0) { log(LOG_ERR, "%s: sosetopt(%p, linger = 0) failed with %d.\n", __func__, so, rc); } uninit_iwarp_socket(so); soclose(so); set_bit(ABORT_CONN, &ep->com.history); /* * TBD: iw_cxgbe driver should receive ABORT reply for every ABORT * request it has sent. But the current TOE driver is not propagating * this ABORT reply event (via do_abort_rpl) to iw_cxgbe. So as a work- * around de-refererece 'ep' here instead of doing it in abort_rpl() * handler(not yet implemented) of iw_cxgbe driver. */ release_ep_resources(ep); ep->com.state = DEAD; return (0); } static void peer_close_upcall(struct c4iw_ep *ep) { struct iw_cm_event event; CTR2(KTR_IW_CXGBE, "%s:pcuB %p", __func__, ep); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_DISCONNECT; if (ep->com.cm_id) { CTR2(KTR_IW_CXGBE, "%s:pcu1 %p", __func__, ep); ep->com.cm_id->event_handler(ep->com.cm_id, &event); set_bit(DISCONN_UPCALL, &ep->com.history); } CTR2(KTR_IW_CXGBE, "%s:pcuE %p", __func__, ep); } static void peer_abort_upcall(struct c4iw_ep *ep) { struct iw_cm_event event; CTR2(KTR_IW_CXGBE, "%s:pauB %p", __func__, ep); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_CLOSE; event.status = -ECONNRESET; if (ep->com.cm_id) { CTR2(KTR_IW_CXGBE, "%s:pau1 %p", __func__, ep); ep->com.cm_id->event_handler(ep->com.cm_id, &event); deref_cm_id(&ep->com); set_bit(ABORT_UPCALL, &ep->com.history); } CTR2(KTR_IW_CXGBE, "%s:pauE %p", __func__, ep); } static void connect_reply_upcall(struct c4iw_ep *ep, int status) { struct iw_cm_event event; CTR3(KTR_IW_CXGBE, "%s:cruB %p, status: %d", __func__, ep, status); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_CONNECT_REPLY; event.status = ((status == -ECONNABORTED) || (status == -EPIPE)) ? -ECONNRESET : status; event.local_addr = ep->com.local_addr; event.remote_addr = ep->com.remote_addr; if ((status == 0) || (status == -ECONNREFUSED)) { if (!ep->tried_with_mpa_v1) { CTR2(KTR_IW_CXGBE, "%s:cru1 %p", __func__, ep); /* this means MPA_v2 is used */ event.ord = ep->ird; event.ird = ep->ord; event.private_data_len = ep->plen - sizeof(struct mpa_v2_conn_params); event.private_data = ep->mpa_pkt + sizeof(struct mpa_message) + sizeof(struct mpa_v2_conn_params); } else { CTR2(KTR_IW_CXGBE, "%s:cru2 %p", __func__, ep); /* this means MPA_v1 is used */ event.ord = c4iw_max_read_depth; event.ird = c4iw_max_read_depth; event.private_data_len = ep->plen; event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); } } if (ep->com.cm_id) { CTR2(KTR_IW_CXGBE, "%s:cru3 %p", __func__, ep); set_bit(CONN_RPL_UPCALL, &ep->com.history); ep->com.cm_id->event_handler(ep->com.cm_id, &event); } if(status == -ECONNABORTED) { CTR3(KTR_IW_CXGBE, "%s:cruE %p %d", __func__, ep, status); return; } if (status < 0) { CTR3(KTR_IW_CXGBE, "%s:cru4 %p %d", __func__, ep, status); deref_cm_id(&ep->com); } CTR2(KTR_IW_CXGBE, "%s:cruE %p", __func__, ep); } static int connect_request_upcall(struct c4iw_ep *ep) { struct iw_cm_event event; int ret; CTR3(KTR_IW_CXGBE, "%s: ep %p, mpa_v1 %d", __func__, ep, ep->tried_with_mpa_v1); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_CONNECT_REQUEST; event.local_addr = ep->com.local_addr; event.remote_addr = ep->com.remote_addr; event.provider_data = ep; if (!ep->tried_with_mpa_v1) { /* this means MPA_v2 is used */ event.ord = ep->ord; event.ird = ep->ird; event.private_data_len = ep->plen - sizeof(struct mpa_v2_conn_params); event.private_data = ep->mpa_pkt + sizeof(struct mpa_message) + sizeof(struct mpa_v2_conn_params); } else { /* this means MPA_v1 is used. Send max supported */ event.ord = c4iw_max_read_depth; event.ird = c4iw_max_read_depth; event.private_data_len = ep->plen; event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); } c4iw_get_ep(&ep->com); ret = ep->parent_ep->com.cm_id->event_handler(ep->parent_ep->com.cm_id, &event); if(ret) { CTR3(KTR_IW_CXGBE, "%s: ep %p, Failure while notifying event to" " IWCM, err:%d", __func__, ep, ret); c4iw_put_ep(&ep->com); } else /* Dereference parent_ep only in success case. * In case of failure, parent_ep is dereferenced by the caller * of process_mpa_request(). */ c4iw_put_ep(&ep->parent_ep->com); set_bit(CONNREQ_UPCALL, &ep->com.history); return ret; } static void established_upcall(struct c4iw_ep *ep) { struct iw_cm_event event; CTR2(KTR_IW_CXGBE, "%s:euB %p", __func__, ep); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_ESTABLISHED; event.ird = ep->ord; event.ord = ep->ird; if (ep->com.cm_id) { CTR2(KTR_IW_CXGBE, "%s:eu1 %p", __func__, ep); ep->com.cm_id->event_handler(ep->com.cm_id, &event); set_bit(ESTAB_UPCALL, &ep->com.history); } CTR2(KTR_IW_CXGBE, "%s:euE %p", __func__, ep); } #define RELAXED_IRD_NEGOTIATION 1 /* * process_mpa_reply - process streaming mode MPA reply * * Returns: * * 0 upon success indicating a connect request was delivered to the ULP * or the mpa request is incomplete but valid so far. * * 1 if a failure requires the caller to close the connection. * * 2 if a failure requires the caller to abort the connection. */ static int process_mpa_reply(struct c4iw_ep *ep) { struct mpa_message *mpa; struct mpa_v2_conn_params *mpa_v2_params; u16 plen; u16 resp_ird, resp_ord; u8 rtr_mismatch = 0, insuff_ird = 0; struct c4iw_qp_attributes attrs = {0}; enum c4iw_qp_attr_mask mask; int err; struct mbuf *top, *m; int flags = MSG_DONTWAIT; struct uio uio; int disconnect = 0; CTR2(KTR_IW_CXGBE, "%s:pmrB %p", __func__, ep); /* * Stop mpa timer. If it expired, then * we ignore the MPA reply. process_timeout() * will abort the connection. */ if (STOP_EP_TIMER(ep)) return 0; uio.uio_resid = 1000000; uio.uio_td = ep->com.thread; err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags); if (err) { if (err == EWOULDBLOCK) { CTR2(KTR_IW_CXGBE, "%s:pmr1 %p", __func__, ep); START_EP_TIMER(ep); return 0; } err = -err; CTR2(KTR_IW_CXGBE, "%s:pmr2 %p", __func__, ep); goto err; } if (ep->com.so->so_rcv.sb_mb) { CTR2(KTR_IW_CXGBE, "%s:pmr3 %p", __func__, ep); printf("%s data after soreceive called! so %p sb_mb %p top %p\n", __func__, ep->com.so, ep->com.so->so_rcv.sb_mb, top); } m = top; do { CTR2(KTR_IW_CXGBE, "%s:pmr4 %p", __func__, ep); /* * If we get more than the supported amount of private data * then we must fail this connection. */ if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) { CTR3(KTR_IW_CXGBE, "%s:pmr5 %p %d", __func__, ep, ep->mpa_pkt_len + m->m_len); err = (-EINVAL); goto err_stop_timer; } /* * copy the new data into our accumulation buffer. */ m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len])); ep->mpa_pkt_len += m->m_len; if (!m->m_next) m = m->m_nextpkt; else m = m->m_next; } while (m); m_freem(top); /* * if we don't even have the mpa message, then bail. */ if (ep->mpa_pkt_len < sizeof(*mpa)) { return 0; } mpa = (struct mpa_message *) ep->mpa_pkt; /* Validate MPA header. */ if (mpa->revision > mpa_rev) { CTR4(KTR_IW_CXGBE, "%s:pmr6 %p %d %d", __func__, ep, mpa->revision, mpa_rev); printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d, " " Received = %d\n", __func__, mpa_rev, mpa->revision); err = -EPROTO; goto err_stop_timer; } if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) { CTR2(KTR_IW_CXGBE, "%s:pmr7 %p", __func__, ep); err = -EPROTO; goto err_stop_timer; } plen = ntohs(mpa->private_data_size); /* * Fail if there's too much private data. */ if (plen > MPA_MAX_PRIVATE_DATA) { CTR2(KTR_IW_CXGBE, "%s:pmr8 %p", __func__, ep); err = -EPROTO; goto err_stop_timer; } /* * If plen does not account for pkt size */ if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { CTR2(KTR_IW_CXGBE, "%s:pmr9 %p", __func__, ep); STOP_EP_TIMER(ep); err = -EPROTO; goto err_stop_timer; } ep->plen = (u8) plen; /* * If we don't have all the pdata yet, then bail. * We'll continue process when more data arrives. */ if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) { CTR2(KTR_IW_CXGBE, "%s:pmra %p", __func__, ep); return 0; } if (mpa->flags & MPA_REJECT) { CTR2(KTR_IW_CXGBE, "%s:pmrb %p", __func__, ep); err = -ECONNREFUSED; goto err_stop_timer; } /* * If we get here we have accumulated the entire mpa * start reply message including private data. And * the MPA header is valid. */ ep->com.state = FPDU_MODE; ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; ep->mpa_attr.recv_marker_enabled = markers_enabled; ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; ep->mpa_attr.version = mpa->revision; ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED; if (mpa->revision == 2) { CTR2(KTR_IW_CXGBE, "%s:pmrc %p", __func__, ep); ep->mpa_attr.enhanced_rdma_conn = mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0; if (ep->mpa_attr.enhanced_rdma_conn) { CTR2(KTR_IW_CXGBE, "%s:pmrd %p", __func__, ep); mpa_v2_params = (struct mpa_v2_conn_params *) (ep->mpa_pkt + sizeof(*mpa)); resp_ird = ntohs(mpa_v2_params->ird) & MPA_V2_IRD_ORD_MASK; resp_ord = ntohs(mpa_v2_params->ord) & MPA_V2_IRD_ORD_MASK; /* * This is a double-check. Ideally, below checks are * not required since ird/ord stuff has been taken * care of in c4iw_accept_cr */ if (ep->ird < resp_ord) { if (RELAXED_IRD_NEGOTIATION && resp_ord <= ep->com.dev->rdev.adap->params.max_ordird_qp) ep->ird = resp_ord; else insuff_ird = 1; } else if (ep->ird > resp_ord) { ep->ird = resp_ord; } if (ep->ord > resp_ird) { if (RELAXED_IRD_NEGOTIATION) ep->ord = resp_ird; else insuff_ird = 1; } if (insuff_ird) { err = -ENOMEM; ep->ird = resp_ord; ep->ord = resp_ird; } if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL) { CTR2(KTR_IW_CXGBE, "%s:pmrf %p", __func__, ep); if (ntohs(mpa_v2_params->ord) & MPA_V2_RDMA_WRITE_RTR) { CTR2(KTR_IW_CXGBE, "%s:pmrg %p", __func__, ep); ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_RDMA_WRITE; } else if (ntohs(mpa_v2_params->ord) & MPA_V2_RDMA_READ_RTR) { CTR2(KTR_IW_CXGBE, "%s:pmrh %p", __func__, ep); ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ; } } } } else { CTR2(KTR_IW_CXGBE, "%s:pmri %p", __func__, ep); if (mpa->revision == 1) { CTR2(KTR_IW_CXGBE, "%s:pmrj %p", __func__, ep); if (peer2peer) { CTR2(KTR_IW_CXGBE, "%s:pmrk %p", __func__, ep); ep->mpa_attr.p2p_type = p2p_type; } } } if (set_tcpinfo(ep)) { CTR2(KTR_IW_CXGBE, "%s:pmrl %p", __func__, ep); printf("%s set_tcpinfo error\n", __func__); err = -ECONNRESET; goto err; } CTR6(KTR_IW_CXGBE, "%s - crc_enabled = %d, recv_marker_enabled = %d, " "xmit_marker_enabled = %d, version = %d p2p_type = %d", __func__, ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version, ep->mpa_attr.p2p_type); /* * If responder's RTR does not match with that of initiator, assign * FW_RI_INIT_P2PTYPE_DISABLED in mpa attributes so that RTR is not * generated when moving QP to RTS state. * A TERM message will be sent after QP has moved to RTS state */ if ((ep->mpa_attr.version == 2) && peer2peer && (ep->mpa_attr.p2p_type != p2p_type)) { CTR2(KTR_IW_CXGBE, "%s:pmrm %p", __func__, ep); ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED; rtr_mismatch = 1; } //ep->ofld_txq = TOEPCB(ep->com.so)->ofld_txq; attrs.mpa_attr = ep->mpa_attr; attrs.max_ird = ep->ird; attrs.max_ord = ep->ord; attrs.llp_stream_handle = ep; attrs.next_state = C4IW_QP_STATE_RTS; mask = C4IW_QP_ATTR_NEXT_STATE | C4IW_QP_ATTR_LLP_STREAM_HANDLE | C4IW_QP_ATTR_MPA_ATTR | C4IW_QP_ATTR_MAX_IRD | C4IW_QP_ATTR_MAX_ORD; /* bind QP and TID with INIT_WR */ err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, mask, &attrs, 1); if (err) { CTR2(KTR_IW_CXGBE, "%s:pmrn %p", __func__, ep); goto err; } /* * If responder's RTR requirement did not match with what initiator * supports, generate TERM message */ if (rtr_mismatch) { CTR2(KTR_IW_CXGBE, "%s:pmro %p", __func__, ep); printk(KERN_ERR "%s: RTR mismatch, sending TERM\n", __func__); attrs.layer_etype = LAYER_MPA | DDP_LLP; attrs.ecode = MPA_NOMATCH_RTR; attrs.next_state = C4IW_QP_STATE_TERMINATE; attrs.send_term = 1; err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); err = -ENOMEM; disconnect = 1; goto out; } /* * Generate TERM if initiator IRD is not sufficient for responder * provided ORD. Currently, we do the same behaviour even when * responder provided IRD is also not sufficient as regards to * initiator ORD. */ if (insuff_ird) { CTR2(KTR_IW_CXGBE, "%s:pmrp %p", __func__, ep); printk(KERN_ERR "%s: Insufficient IRD, sending TERM\n", __func__); attrs.layer_etype = LAYER_MPA | DDP_LLP; attrs.ecode = MPA_INSUFF_IRD; attrs.next_state = C4IW_QP_STATE_TERMINATE; attrs.send_term = 1; err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); err = -ENOMEM; disconnect = 1; goto out; } goto out; err_stop_timer: STOP_EP_TIMER(ep); err: disconnect = 2; out: connect_reply_upcall(ep, err); CTR2(KTR_IW_CXGBE, "%s:pmrE %p", __func__, ep); return disconnect; } /* * process_mpa_request - process streaming mode MPA request * * Returns: * * 0 upon success indicating a connect request was delivered to the ULP * or the mpa request is incomplete but valid so far. * * 1 if a failure requires the caller to close the connection. * * 2 if a failure requires the caller to abort the connection. */ static int process_mpa_request(struct c4iw_ep *ep) { struct mpa_message *mpa; struct mpa_v2_conn_params *mpa_v2_params; u16 plen; int flags = MSG_DONTWAIT; int rc; struct iovec iov; struct uio uio; enum c4iw_ep_state state = ep->com.state; CTR3(KTR_IW_CXGBE, "%s: ep %p, state %s", __func__, ep, states[state]); if (state != MPA_REQ_WAIT) return 0; iov.iov_base = &ep->mpa_pkt[ep->mpa_pkt_len]; iov.iov_len = sizeof(ep->mpa_pkt) - ep->mpa_pkt_len; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = 0; uio.uio_resid = sizeof(ep->mpa_pkt) - ep->mpa_pkt_len; uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = UIO_READ; uio.uio_td = NULL; /* uio.uio_td = ep->com.thread; */ rc = soreceive(ep->com.so, NULL, &uio, NULL, NULL, &flags); if (rc == EAGAIN) return 0; else if (rc) goto err_stop_timer; KASSERT(uio.uio_offset > 0, ("%s: sorecieve on so %p read no data", __func__, ep->com.so)); ep->mpa_pkt_len += uio.uio_offset; /* * If we get more than the supported amount of private data then we must * fail this connection. XXX: check so_rcv->sb_cc, or peek with another * soreceive, or increase the size of mpa_pkt by 1 and abort if the last * byte is filled by the soreceive above. */ /* Don't even have the MPA message. Wait for more data to arrive. */ if (ep->mpa_pkt_len < sizeof(*mpa)) return 0; mpa = (struct mpa_message *) ep->mpa_pkt; /* * Validate MPA Header. */ if (mpa->revision > mpa_rev) { log(LOG_ERR, "%s: MPA version mismatch. Local = %d," " Received = %d\n", __func__, mpa_rev, mpa->revision); goto err_stop_timer; } if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) goto err_stop_timer; /* * Fail if there's too much private data. */ plen = ntohs(mpa->private_data_size); if (plen > MPA_MAX_PRIVATE_DATA) goto err_stop_timer; /* * If plen does not account for pkt size */ if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) goto err_stop_timer; ep->plen = (u8) plen; /* * If we don't have all the pdata yet, then bail. */ if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) return 0; /* * If we get here we have accumulated the entire mpa * start reply message including private data. */ ep->mpa_attr.initiator = 0; ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; ep->mpa_attr.recv_marker_enabled = markers_enabled; ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; ep->mpa_attr.version = mpa->revision; if (mpa->revision == 1) ep->tried_with_mpa_v1 = 1; ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED; if (mpa->revision == 2) { ep->mpa_attr.enhanced_rdma_conn = mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0; if (ep->mpa_attr.enhanced_rdma_conn) { mpa_v2_params = (struct mpa_v2_conn_params *) (ep->mpa_pkt + sizeof(*mpa)); ep->ird = ntohs(mpa_v2_params->ird) & MPA_V2_IRD_ORD_MASK; ep->ird = min_t(u32, ep->ird, cur_max_read_depth(ep->com.dev)); ep->ord = ntohs(mpa_v2_params->ord) & MPA_V2_IRD_ORD_MASK; ep->ord = min_t(u32, ep->ord, cur_max_read_depth(ep->com.dev)); CTR3(KTR_IW_CXGBE, "%s initiator ird %u ord %u", __func__, ep->ird, ep->ord); if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL) if (peer2peer) { if (ntohs(mpa_v2_params->ord) & MPA_V2_RDMA_WRITE_RTR) ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_RDMA_WRITE; else if (ntohs(mpa_v2_params->ord) & MPA_V2_RDMA_READ_RTR) ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ; } } } else if (mpa->revision == 1 && peer2peer) ep->mpa_attr.p2p_type = p2p_type; if (set_tcpinfo(ep)) goto err_stop_timer; CTR5(KTR_IW_CXGBE, "%s: crc_enabled = %d, recv_marker_enabled = %d, " "xmit_marker_enabled = %d, version = %d", __func__, ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version); ep->com.state = MPA_REQ_RCVD; STOP_EP_TIMER(ep); /* drive upcall */ if (ep->parent_ep->com.state != DEAD) if (connect_request_upcall(ep)) goto err_out; return 0; err_stop_timer: STOP_EP_TIMER(ep); err_out: return 2; } /* * Upcall from the adapter indicating data has been transmitted. * For us its just the single MPA request or reply. We can now free * the skb holding the mpa message. */ int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) { #ifdef KTR int err; #endif struct c4iw_ep *ep = to_ep(cm_id); int abort = 0; mutex_lock(&ep->com.mutex); CTR2(KTR_IW_CXGBE, "%s:crcB %p", __func__, ep); if ((ep->com.state == DEAD) || (ep->com.state != MPA_REQ_RCVD)) { CTR2(KTR_IW_CXGBE, "%s:crc1 %p", __func__, ep); mutex_unlock(&ep->com.mutex); c4iw_put_ep(&ep->com); return -ECONNRESET; } set_bit(ULP_REJECT, &ep->com.history); if (mpa_rev == 0) { CTR2(KTR_IW_CXGBE, "%s:crc2 %p", __func__, ep); abort = 1; } else { CTR2(KTR_IW_CXGBE, "%s:crc3 %p", __func__, ep); abort = send_mpa_reject(ep, pdata, pdata_len); } STOP_EP_TIMER(ep); #ifdef KTR err = c4iw_ep_disconnect(ep, abort != 0, GFP_KERNEL); #else c4iw_ep_disconnect(ep, abort != 0, GFP_KERNEL); #endif mutex_unlock(&ep->com.mutex); c4iw_put_ep(&ep->com); CTR3(KTR_IW_CXGBE, "%s:crc4 %p, err: %d", __func__, ep, err); return 0; } int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) { int err; struct c4iw_qp_attributes attrs = {0}; enum c4iw_qp_attr_mask mask; struct c4iw_ep *ep = to_ep(cm_id); struct c4iw_dev *h = to_c4iw_dev(cm_id->device); struct c4iw_qp *qp = get_qhp(h, conn_param->qpn); int abort = 0; mutex_lock(&ep->com.mutex); CTR2(KTR_IW_CXGBE, "%s:cacB %p", __func__, ep); if ((ep->com.state == DEAD) || (ep->com.state != MPA_REQ_RCVD)) { CTR2(KTR_IW_CXGBE, "%s:cac1 %p", __func__, ep); err = -ECONNRESET; goto err_out; } BUG_ON(!qp); set_bit(ULP_ACCEPT, &ep->com.history); if ((conn_param->ord > c4iw_max_read_depth) || (conn_param->ird > c4iw_max_read_depth)) { CTR2(KTR_IW_CXGBE, "%s:cac2 %p", __func__, ep); err = -EINVAL; goto err_abort; } if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { CTR2(KTR_IW_CXGBE, "%s:cac3 %p", __func__, ep); if (conn_param->ord > ep->ird) { if (RELAXED_IRD_NEGOTIATION) { conn_param->ord = ep->ird; } else { ep->ird = conn_param->ird; ep->ord = conn_param->ord; send_mpa_reject(ep, conn_param->private_data, conn_param->private_data_len); err = -ENOMEM; goto err_abort; } } if (conn_param->ird < ep->ord) { if (RELAXED_IRD_NEGOTIATION && ep->ord <= h->rdev.adap->params.max_ordird_qp) { conn_param->ird = ep->ord; } else { err = -ENOMEM; goto err_abort; } } } ep->ird = conn_param->ird; ep->ord = conn_param->ord; if (ep->mpa_attr.version == 1) { if (peer2peer && ep->ird == 0) ep->ird = 1; } else { if (peer2peer && (ep->mpa_attr.p2p_type != FW_RI_INIT_P2PTYPE_DISABLED) && (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) && ep->ird == 0) ep->ird = 1; } CTR4(KTR_IW_CXGBE, "%s %d ird %d ord %d", __func__, __LINE__, ep->ird, ep->ord); ep->com.cm_id = cm_id; ref_cm_id(&ep->com); ep->com.qp = qp; ref_qp(ep); //ep->ofld_txq = TOEPCB(ep->com.so)->ofld_txq; /* bind QP to EP and move to RTS */ attrs.mpa_attr = ep->mpa_attr; attrs.max_ird = ep->ird; attrs.max_ord = ep->ord; attrs.llp_stream_handle = ep; attrs.next_state = C4IW_QP_STATE_RTS; /* bind QP and TID with INIT_WR */ mask = C4IW_QP_ATTR_NEXT_STATE | C4IW_QP_ATTR_LLP_STREAM_HANDLE | C4IW_QP_ATTR_MPA_ATTR | C4IW_QP_ATTR_MAX_IRD | C4IW_QP_ATTR_MAX_ORD; err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, mask, &attrs, 1); if (err) { CTR3(KTR_IW_CXGBE, "%s:caca %p, err: %d", __func__, ep, err); goto err_defef_cm_id; } err = send_mpa_reply(ep, conn_param->private_data, conn_param->private_data_len); if (err) { CTR3(KTR_IW_CXGBE, "%s:cacb %p, err: %d", __func__, ep, err); goto err_defef_cm_id; } ep->com.state = FPDU_MODE; established_upcall(ep); mutex_unlock(&ep->com.mutex); c4iw_put_ep(&ep->com); CTR2(KTR_IW_CXGBE, "%s:cacE %p", __func__, ep); return 0; err_defef_cm_id: deref_cm_id(&ep->com); err_abort: abort = 1; err_out: if (abort) c4iw_ep_disconnect(ep, 1, GFP_KERNEL); mutex_unlock(&ep->com.mutex); c4iw_put_ep(&ep->com); CTR2(KTR_IW_CXGBE, "%s:cacE err %p", __func__, ep); return err; } static int c4iw_sock_create(struct sockaddr_storage *laddr, struct socket **so) { int ret; int size, on; struct socket *sock = NULL; struct sockopt sopt; ret = sock_create_kern(laddr->ss_family, SOCK_STREAM, IPPROTO_TCP, &sock); if (ret) { CTR2(KTR_IW_CXGBE, "%s:Failed to create TCP socket. err %d", __func__, ret); return ret; } if (reuseaddr) { bzero(&sopt, sizeof(struct sockopt)); sopt.sopt_dir = SOPT_SET; sopt.sopt_level = SOL_SOCKET; sopt.sopt_name = SO_REUSEADDR; on = 1; sopt.sopt_val = &on; sopt.sopt_valsize = sizeof(on); ret = -sosetopt(sock, &sopt); if (ret != 0) { log(LOG_ERR, "%s: sosetopt(%p, SO_REUSEADDR) " "failed with %d.\n", __func__, sock, ret); } bzero(&sopt, sizeof(struct sockopt)); sopt.sopt_dir = SOPT_SET; sopt.sopt_level = SOL_SOCKET; sopt.sopt_name = SO_REUSEPORT; on = 1; sopt.sopt_val = &on; sopt.sopt_valsize = sizeof(on); ret = -sosetopt(sock, &sopt); if (ret != 0) { log(LOG_ERR, "%s: sosetopt(%p, SO_REUSEPORT) " "failed with %d.\n", __func__, sock, ret); } } ret = -sobind(sock, (struct sockaddr *)laddr, curthread); if (ret) { CTR2(KTR_IW_CXGBE, "%s:Failed to bind socket. err %p", __func__, ret); sock_release(sock); return ret; } size = laddr->ss_family == AF_INET6 ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in); ret = sock_getname(sock, (struct sockaddr *)laddr, &size, 0); if (ret) { CTR2(KTR_IW_CXGBE, "%s:sock_getname failed. err %p", __func__, ret); sock_release(sock); return ret; } *so = sock; return 0; } int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) { int err = 0; struct c4iw_dev *dev = to_c4iw_dev(cm_id->device); struct c4iw_ep *ep = NULL; if_t nh_ifp; /* Logical egress interface */ struct epoch_tracker et; #ifdef VIMAGE struct rdma_cm_id *rdma_id = (struct rdma_cm_id*)cm_id->context; struct vnet *vnet = rdma_id->route.addr.dev_addr.net; #endif CTR2(KTR_IW_CXGBE, "%s:ccB %p", __func__, cm_id); if ((conn_param->ord > c4iw_max_read_depth) || (conn_param->ird > c4iw_max_read_depth)) { CTR2(KTR_IW_CXGBE, "%s:cc1 %p", __func__, cm_id); err = -EINVAL; goto out; } ep = alloc_ep(sizeof(*ep), GFP_KERNEL); cm_id->provider_data = ep; init_timer(&ep->timer); ep->plen = conn_param->private_data_len; if (ep->plen) { CTR2(KTR_IW_CXGBE, "%s:cc3 %p", __func__, ep); memcpy(ep->mpa_pkt + sizeof(struct mpa_message), conn_param->private_data, ep->plen); } ep->ird = conn_param->ird; ep->ord = conn_param->ord; if (peer2peer && ep->ord == 0) { CTR2(KTR_IW_CXGBE, "%s:cc4 %p", __func__, ep); ep->ord = 1; } ep->com.dev = dev; ep->com.cm_id = cm_id; ref_cm_id(&ep->com); ep->com.qp = get_qhp(dev, conn_param->qpn); if (!ep->com.qp) { CTR2(KTR_IW_CXGBE, "%s:cc5 %p", __func__, ep); err = -EINVAL; goto fail; } ref_qp(ep); ep->com.thread = curthread; NET_EPOCH_ENTER(et); CURVNET_SET(vnet); err = get_ifnet_from_raddr(&cm_id->remote_addr, &nh_ifp); CURVNET_RESTORE(); NET_EPOCH_EXIT(et); if (err) { CTR2(KTR_IW_CXGBE, "%s:cc7 %p", __func__, ep); printk(KERN_ERR MOD "%s - cannot find route.\n", __func__); err = EHOSTUNREACH; return err; } if (!(if_getcapenable(nh_ifp) & IFCAP_TOE) || TOEDEV(nh_ifp) == NULL) { err = -ENOPROTOOPT; goto fail; } ep->com.state = CONNECTING; ep->tos = 0; ep->com.local_addr = cm_id->local_addr; ep->com.remote_addr = cm_id->remote_addr; err = c4iw_sock_create(&cm_id->local_addr, &ep->com.so); if (err) goto fail; setiwsockopt(ep->com.so); init_iwarp_socket(ep->com.so, &ep->com); err = -soconnect(ep->com.so, (struct sockaddr *)&ep->com.remote_addr, ep->com.thread); if (err) goto fail_free_so; CTR2(KTR_IW_CXGBE, "%s:ccE, ep %p", __func__, ep); return 0; fail_free_so: uninit_iwarp_socket(ep->com.so); ep->com.state = DEAD; sock_release(ep->com.so); fail: deref_cm_id(&ep->com); c4iw_put_ep(&ep->com); ep = NULL; out: CTR2(KTR_IW_CXGBE, "%s:ccE Error %d", __func__, err); return err; } /* * iwcm->create_listen. Returns -errno on failure. */ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) { struct c4iw_dev *dev = to_c4iw_dev(cm_id->device); struct c4iw_listen_ep *lep = NULL; struct listen_port_info *port_info = NULL; int rc = 0; CTR3(KTR_IW_CXGBE, "%s: cm_id %p, backlog %s", __func__, cm_id, backlog); if (c4iw_fatal_error(&dev->rdev)) { CTR2(KTR_IW_CXGBE, "%s: cm_id %p, fatal error", __func__, cm_id); return -EIO; } lep = alloc_ep(sizeof(*lep), GFP_KERNEL); lep->com.cm_id = cm_id; ref_cm_id(&lep->com); lep->com.dev = dev; lep->backlog = backlog; lep->com.local_addr = cm_id->local_addr; lep->com.thread = curthread; cm_id->provider_data = lep; lep->com.state = LISTEN; /* In case of INDADDR_ANY, ibcore creates cmid for each device and * invokes iw_cxgbe listener callbacks assuming that iw_cxgbe creates * HW listeners for each device seperately. But toecore expects single * solisten() call with INADDR_ANY address to create HW listeners on * all devices for a given port number. So iw_cxgbe driver calls * solisten() only once for INADDR_ANY(usually done at first time * listener callback from ibcore). And all the subsequent INADDR_ANY * listener callbacks from ibcore(for the same port address) do not * invoke solisten() as first listener callback has already created * listeners for all other devices(via solisten). */ if (c4iw_any_addr((struct sockaddr *)&lep->com.local_addr, NULL)) { port_info = add_ep_to_listenlist(lep); /* skip solisten() if refcnt > 1, as the listeners were * already created by 'Master lep' */ if (port_info->refcnt > 1) { /* As there will be only one listener socket for a TCP * port, copy Master lep's socket pointer to other lep's * that are belonging to same TCP port. */ struct c4iw_listen_ep *head_lep = container_of(port_info->lep_list.next, struct c4iw_listen_ep, listen_ep_list); lep->com.so = head_lep->com.so; goto out; } } rc = c4iw_sock_create(&cm_id->local_addr, &lep->com.so); if (rc) { CTR2(KTR_IW_CXGBE, "%s:Failed to create socket. err %d", __func__, rc); goto fail; } rc = -solisten(lep->com.so, backlog, curthread); if (rc) { CTR3(KTR_IW_CXGBE, "%s:Failed to listen on sock:%p. err %d", __func__, lep->com.so, rc); goto fail_free_so; } init_iwarp_socket(lep->com.so, &lep->com); out: return 0; fail_free_so: sock_release(lep->com.so); fail: if (port_info) rem_ep_from_listenlist(lep); deref_cm_id(&lep->com); c4iw_put_ep(&lep->com); return rc; } int c4iw_destroy_listen(struct iw_cm_id *cm_id) { struct c4iw_listen_ep *lep = to_listen_ep(cm_id); mutex_lock(&lep->com.mutex); CTR3(KTR_IW_CXGBE, "%s: cm_id %p, state %s", __func__, cm_id, states[lep->com.state]); lep->com.state = DEAD; if (c4iw_any_addr((struct sockaddr *)&lep->com.local_addr, lep->com.so->so_vnet)) { /* if no refcount then close listen socket */ if (!rem_ep_from_listenlist(lep)) close_socket(lep->com.so); } else close_socket(lep->com.so); deref_cm_id(&lep->com); mutex_unlock(&lep->com.mutex); c4iw_put_ep(&lep->com); return 0; } int __c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) { int ret; mutex_lock(&ep->com.mutex); ret = c4iw_ep_disconnect(ep, abrupt, gfp); mutex_unlock(&ep->com.mutex); return ret; } int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) { int ret = 0; int close = 0; struct c4iw_rdev *rdev; CTR2(KTR_IW_CXGBE, "%s:cedB %p", __func__, ep); rdev = &ep->com.dev->rdev; if (c4iw_fatal_error(rdev)) { CTR3(KTR_IW_CXGBE, "%s:ced1 fatal error %p %s", __func__, ep, states[ep->com.state]); if (ep->com.state != DEAD) { send_abort(ep); ep->com.state = DEAD; } close_complete_upcall(ep, -ECONNRESET); return ECONNRESET; } CTR3(KTR_IW_CXGBE, "%s:ced2 %p %s", __func__, ep, states[ep->com.state]); /* * Ref the ep here in case we have fatal errors causing the * ep to be released and freed. */ c4iw_get_ep(&ep->com); switch (ep->com.state) { case MPA_REQ_WAIT: case MPA_REQ_SENT: case MPA_REQ_RCVD: case MPA_REP_SENT: case FPDU_MODE: close = 1; if (abrupt) ep->com.state = ABORTING; else { ep->com.state = CLOSING; START_EP_TIMER(ep); } set_bit(CLOSE_SENT, &ep->com.flags); break; case CLOSING: if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) { close = 1; if (abrupt) { STOP_EP_TIMER(ep); ep->com.state = ABORTING; } else ep->com.state = MORIBUND; } break; case MORIBUND: case ABORTING: case DEAD: CTR3(KTR_IW_CXGBE, "%s ignoring disconnect ep %p state %u", __func__, ep, ep->com.state); break; default: BUG(); break; } if (close) { CTR2(KTR_IW_CXGBE, "%s:ced3 %p", __func__, ep); if (abrupt) { CTR2(KTR_IW_CXGBE, "%s:ced4 %p", __func__, ep); set_bit(EP_DISC_ABORT, &ep->com.history); close_complete_upcall(ep, -ECONNRESET); send_abort(ep); } else { CTR2(KTR_IW_CXGBE, "%s:ced5 %p", __func__, ep); set_bit(EP_DISC_CLOSE, &ep->com.history); if (!ep->parent_ep) ep->com.state = MORIBUND; CURVNET_SET(ep->com.so->so_vnet); ret = sodisconnect(ep->com.so); CURVNET_RESTORE(); if (ret) { CTR2(KTR_IW_CXGBE, "%s:ced6 %p", __func__, ep); STOP_EP_TIMER(ep); send_abort(ep); ep->com.state = DEAD; close_complete_upcall(ep, -ECONNRESET); set_bit(EP_DISC_FAIL, &ep->com.history); if (ep->com.qp) { struct c4iw_qp_attributes attrs = {0}; attrs.next_state = C4IW_QP_STATE_ERROR; ret = c4iw_modify_qp( ep->com.dev, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); CTR3(KTR_IW_CXGBE, "%s:ced7 %p ret %d", __func__, ep, ret); } } } } c4iw_put_ep(&ep->com); CTR2(KTR_IW_CXGBE, "%s:cedE %p", __func__, ep); return ret; } #ifdef C4IW_EP_REDIRECT int c4iw_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new, struct l2t_entry *l2t) { struct c4iw_ep *ep = ctx; if (ep->dst != old) return 0; PDBG("%s ep %p redirect to dst %p l2t %p\n", __func__, ep, new, l2t); dst_hold(new); cxgb4_l2t_release(ep->l2t); ep->l2t = l2t; dst_release(old); ep->dst = new; return 1; } #endif static void ep_timeout(unsigned long arg) { struct c4iw_ep *ep = (struct c4iw_ep *)arg; if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) { /* * Only insert if it is not already on the list. */ if (!(ep->com.ep_events & C4IW_EVENT_TIMEOUT)) { CTR2(KTR_IW_CXGBE, "%s:et1 %p", __func__, ep); add_ep_to_req_list(ep, C4IW_EVENT_TIMEOUT); } } } static int fw6_wr_rpl(struct adapter *sc, const __be64 *rpl) { uint64_t val = be64toh(*rpl); int ret; struct c4iw_wr_wait *wr_waitp; ret = (int)((val >> 8) & 0xff); wr_waitp = (struct c4iw_wr_wait *)rpl[1]; CTR3(KTR_IW_CXGBE, "%s wr_waitp %p ret %u", __func__, wr_waitp, ret); if (wr_waitp) c4iw_wake_up(wr_waitp, ret ? -ret : 0); return (0); } static int fw6_cqe_handler(struct adapter *sc, const __be64 *rpl) { struct cqe_list_entry *cle; unsigned long flag; cle = malloc(sizeof(*cle), M_CXGBE, M_NOWAIT); cle->rhp = sc->iwarp_softc; cle->err_cqe = *(const struct t4_cqe *)(&rpl[0]); spin_lock_irqsave(&err_cqe_lock, flag); list_add_tail(&cle->entry, &err_cqe_list); queue_work(c4iw_taskq, &c4iw_task); spin_unlock_irqrestore(&err_cqe_lock, flag); return (0); } static int process_terminate(struct c4iw_ep *ep) { struct c4iw_qp_attributes attrs = {0}; CTR2(KTR_IW_CXGBE, "%s:tB %p %d", __func__, ep); if (ep && ep->com.qp) { printk(KERN_WARNING MOD "TERM received tid %u qpid %u\n", ep->hwtid, ep->com.qp->wq.sq.qid); attrs.next_state = C4IW_QP_STATE_TERMINATE; c4iw_modify_qp(ep->com.dev, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); } else printk(KERN_WARNING MOD "TERM received tid %u no ep/qp\n", ep->hwtid); CTR2(KTR_IW_CXGBE, "%s:tE %p %d", __func__, ep); return 0; } int __init c4iw_cm_init(void) { t4_register_cpl_handler(CPL_RDMA_TERMINATE, terminate); t4_register_fw_msg_handler(FW6_TYPE_WR_RPL, fw6_wr_rpl); t4_register_fw_msg_handler(FW6_TYPE_CQE, fw6_cqe_handler); t4_register_an_handler(c4iw_ev_handler); TAILQ_INIT(&req_list); spin_lock_init(&req_lock); INIT_LIST_HEAD(&err_cqe_list); spin_lock_init(&err_cqe_lock); INIT_WORK(&c4iw_task, process_req); c4iw_taskq = create_singlethread_workqueue("iw_cxgbe"); if (!c4iw_taskq) return -ENOMEM; return 0; } void __exit c4iw_cm_term(void) { WARN_ON(!TAILQ_EMPTY(&req_list)); WARN_ON(!list_empty(&err_cqe_list)); flush_workqueue(c4iw_taskq); destroy_workqueue(c4iw_taskq); t4_register_cpl_handler(CPL_RDMA_TERMINATE, NULL); t4_register_fw_msg_handler(FW6_TYPE_WR_RPL, NULL); t4_register_fw_msg_handler(FW6_TYPE_CQE, NULL); t4_register_an_handler(NULL); } #endif diff --git a/sys/dev/hyperv/hvsock/hv_sock.c b/sys/dev/hyperv/hvsock/hv_sock.c index 60cdfecf3bee..655cc990876e 100644 --- a/sys/dev/hyperv/hvsock/hv_sock.c +++ b/sys/dev/hyperv/hvsock/hv_sock.c @@ -1,1742 +1,1741 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2020 Microsoft Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "hv_sock.h" #define HVSOCK_DBG_NONE 0x0 #define HVSOCK_DBG_INFO 0x1 #define HVSOCK_DBG_ERR 0x2 #define HVSOCK_DBG_VERBOSE 0x3 SYSCTL_NODE(_net, OID_AUTO, hvsock, CTLFLAG_RD, 0, "HyperV socket"); static int hvs_dbg_level; SYSCTL_INT(_net_hvsock, OID_AUTO, hvs_dbg_level, CTLFLAG_RWTUN, &hvs_dbg_level, 0, "hyperv socket debug level: 0 = none, 1 = info, 2 = error, 3 = verbose"); #define HVSOCK_DBG(level, ...) do { \ if (hvs_dbg_level >= (level)) \ printf(__VA_ARGS__); \ } while (0) MALLOC_DEFINE(M_HVSOCK, "hyperv_socket", "hyperv socket control structures"); static int hvs_dom_probe(void); /* The MTU is 16KB per host side's design */ #define HVSOCK_MTU_SIZE (1024 * 16) #define HVSOCK_SEND_BUF_SZ (PAGE_SIZE - sizeof(struct vmpipe_proto_header)) #define HVSOCK_HEADER_LEN (sizeof(struct hvs_pkt_header)) #define HVSOCK_PKT_LEN(payload_len) (HVSOCK_HEADER_LEN + \ roundup2(payload_len, 8) + \ sizeof(uint64_t)) /* * HyperV Transport sockets */ static struct protosw hv_socket_protosw = { .pr_type = SOCK_STREAM, .pr_protocol = HYPERV_SOCK_PROTO_TRANS, .pr_flags = PR_CONNREQUIRED, .pr_attach = hvs_trans_attach, .pr_bind = hvs_trans_bind, .pr_listen = hvs_trans_listen, .pr_accept = hvs_trans_accept, .pr_connect = hvs_trans_connect, .pr_peeraddr = hvs_trans_peeraddr, .pr_sockaddr = hvs_trans_sockaddr, .pr_soreceive = hvs_trans_soreceive, .pr_sosend = hvs_trans_sosend, .pr_disconnect = hvs_trans_disconnect, .pr_close = hvs_trans_close, .pr_detach = hvs_trans_detach, .pr_shutdown = hvs_trans_shutdown, .pr_abort = hvs_trans_abort, }; static struct domain hv_socket_domain = { .dom_family = AF_HYPERV, .dom_name = "hyperv", .dom_probe = hvs_dom_probe, .dom_nprotosw = 1, .dom_protosw = { &hv_socket_protosw }, }; DOMAIN_SET(hv_socket_); #define MAX_PORT ((uint32_t)0xFFFFFFFF) #define MIN_PORT ((uint32_t)0x0) /* 00000000-facb-11e6-bd58-64006a7986d3 */ static const struct hyperv_guid srv_id_template = { .hv_guid = { 0x00, 0x00, 0x00, 0x00, 0xcb, 0xfa, 0xe6, 0x11, 0xbd, 0x58, 0x64, 0x00, 0x6a, 0x79, 0x86, 0xd3 } }; static int hvsock_br_callback(void *, int, void *); static uint32_t hvsock_canread_check(struct hvs_pcb *); static uint32_t hvsock_canwrite_check(struct hvs_pcb *); static int hvsock_send_data(struct vmbus_channel *chan, struct uio *uio, uint32_t to_write, struct sockbuf *sb); /* Globals */ static struct sx hvs_trans_socks_sx; static struct mtx hvs_trans_socks_mtx; static LIST_HEAD(, hvs_pcb) hvs_trans_bound_socks; static LIST_HEAD(, hvs_pcb) hvs_trans_connected_socks; static uint32_t previous_auto_bound_port; static void hvsock_print_guid(struct hyperv_guid *guid) { unsigned char *p = (unsigned char *)guid; HVSOCK_DBG(HVSOCK_DBG_INFO, "0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x\n", *(unsigned int *)p, *((unsigned short *) &p[4]), *((unsigned short *) &p[6]), p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]); } static bool is_valid_srv_id(const struct hyperv_guid *id) { return !memcmp(&id->hv_guid[4], &srv_id_template.hv_guid[4], sizeof(struct hyperv_guid) - 4); } static unsigned int get_port_by_srv_id(const struct hyperv_guid *srv_id) { return *((const unsigned int *)srv_id); } static void set_port_by_srv_id(struct hyperv_guid *srv_id, unsigned int port) { *((unsigned int *)srv_id) = port; } static void __hvs_remove_pcb_from_list(struct hvs_pcb *pcb, unsigned char list) { struct hvs_pcb *p = NULL; HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: pcb is %p\n", __func__, pcb); if (!pcb) return; if (list & HVS_LIST_BOUND) { LIST_FOREACH(p, &hvs_trans_bound_socks, bound_next) if (p == pcb) LIST_REMOVE(p, bound_next); } if (list & HVS_LIST_CONNECTED) { LIST_FOREACH(p, &hvs_trans_connected_socks, connected_next) if (p == pcb) LIST_REMOVE(pcb, connected_next); } } static void __hvs_remove_socket_from_list(struct socket *so, unsigned char list) { struct hvs_pcb *pcb = so2hvspcb(so); HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: pcb is %p\n", __func__, pcb); __hvs_remove_pcb_from_list(pcb, list); } static void __hvs_insert_socket_on_list(struct socket *so, unsigned char list) { struct hvs_pcb *pcb = so2hvspcb(so); if (list & HVS_LIST_BOUND) LIST_INSERT_HEAD(&hvs_trans_bound_socks, pcb, bound_next); if (list & HVS_LIST_CONNECTED) LIST_INSERT_HEAD(&hvs_trans_connected_socks, pcb, connected_next); } void hvs_remove_socket_from_list(struct socket *so, unsigned char list) { if (!so || !so->so_pcb) { HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: socket or so_pcb is null\n", __func__); return; } mtx_lock(&hvs_trans_socks_mtx); __hvs_remove_socket_from_list(so, list); mtx_unlock(&hvs_trans_socks_mtx); } static void hvs_insert_socket_on_list(struct socket *so, unsigned char list) { if (!so || !so->so_pcb) { HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: socket or so_pcb is null\n", __func__); return; } mtx_lock(&hvs_trans_socks_mtx); __hvs_insert_socket_on_list(so, list); mtx_unlock(&hvs_trans_socks_mtx); } static struct socket * __hvs_find_socket_on_list(struct sockaddr_hvs *addr, unsigned char list) { struct hvs_pcb *p = NULL; if (list & HVS_LIST_BOUND) LIST_FOREACH(p, &hvs_trans_bound_socks, bound_next) if (p->so != NULL && addr->hvs_port == p->local_addr.hvs_port) return p->so; if (list & HVS_LIST_CONNECTED) LIST_FOREACH(p, &hvs_trans_connected_socks, connected_next) if (p->so != NULL && addr->hvs_port == p->local_addr.hvs_port) return p->so; return NULL; } static struct socket * hvs_find_socket_on_list(struct sockaddr_hvs *addr, unsigned char list) { struct socket *s = NULL; mtx_lock(&hvs_trans_socks_mtx); s = __hvs_find_socket_on_list(addr, list); mtx_unlock(&hvs_trans_socks_mtx); return s; } static inline void hvs_addr_set(struct sockaddr_hvs *addr, unsigned int port) { memset(addr, 0, sizeof(*addr)); addr->sa_family = AF_HYPERV; addr->sa_len = sizeof(*addr); addr->hvs_port = port; } void hvs_addr_init(struct sockaddr_hvs *addr, const struct hyperv_guid *svr_id) { hvs_addr_set(addr, get_port_by_srv_id(svr_id)); } int hvs_trans_lock(void) { sx_xlock(&hvs_trans_socks_sx); return (0); } void hvs_trans_unlock(void) { sx_xunlock(&hvs_trans_socks_sx); } static int hvs_dom_probe(void) { /* Don't even give us a chance to attach on non-HyperV. */ if (vm_guest != VM_GUEST_HV) return (ENXIO); return (0); } static void hvs_trans_init(void *arg __unused) { HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: HyperV Socket hvs_trans_init called\n", __func__); /* Initialize Globals */ previous_auto_bound_port = MAX_PORT; sx_init(&hvs_trans_socks_sx, "hvs_trans_sock_sx"); mtx_init(&hvs_trans_socks_mtx, "hvs_trans_socks_mtx", NULL, MTX_DEF); LIST_INIT(&hvs_trans_bound_socks); LIST_INIT(&hvs_trans_connected_socks); } SYSINIT(hvs_trans_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, hvs_trans_init, NULL); /* * Called in two cases: * 1) When user calls socket(); * 2) When we accept new incoming conneciton and call sonewconn(). */ int hvs_trans_attach(struct socket *so, int proto, struct thread *td) { struct hvs_pcb *pcb = so2hvspcb(so); HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: HyperV Socket hvs_trans_attach called\n", __func__); if (so->so_type != SOCK_STREAM) return (ESOCKTNOSUPPORT); if (proto != 0 && proto != HYPERV_SOCK_PROTO_TRANS) return (EPROTONOSUPPORT); if (pcb != NULL) return (EISCONN); pcb = malloc(sizeof(struct hvs_pcb), M_HVSOCK, M_NOWAIT | M_ZERO); if (pcb == NULL) return (ENOMEM); pcb->so = so; so->so_pcb = (void *)pcb; return (0); } void hvs_trans_detach(struct socket *so) { struct hvs_pcb *pcb; HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: HyperV Socket hvs_trans_detach called\n", __func__); (void) hvs_trans_lock(); pcb = so2hvspcb(so); if (pcb == NULL) { hvs_trans_unlock(); return; } if (SOLISTENING(so)) { bzero(pcb, sizeof(*pcb)); free(pcb, M_HVSOCK); } so->so_pcb = NULL; hvs_trans_unlock(); } int hvs_trans_bind(struct socket *so, struct sockaddr *addr, struct thread *td) { struct hvs_pcb *pcb = so2hvspcb(so); struct sockaddr_hvs *sa = (struct sockaddr_hvs *) addr; int error = 0; HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: HyperV Socket hvs_trans_bind called\n", __func__); if (sa == NULL) { return (EINVAL); } if (pcb == NULL) { return (EINVAL); } if (sa->sa_family != AF_HYPERV) { HVSOCK_DBG(HVSOCK_DBG_ERR, "%s: Not supported, sa_family is %u\n", __func__, sa->sa_family); return (EAFNOSUPPORT); } if (sa->sa_len != sizeof(*sa)) { HVSOCK_DBG(HVSOCK_DBG_ERR, "%s: Not supported, sa_len is %u\n", __func__, sa->sa_len); return (EINVAL); } HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: binding port = 0x%x\n", __func__, sa->hvs_port); mtx_lock(&hvs_trans_socks_mtx); if (__hvs_find_socket_on_list(sa, HVS_LIST_BOUND | HVS_LIST_CONNECTED)) { error = EADDRINUSE; } else { /* * The address is available for us to bind. * Add socket to the bound list. */ hvs_addr_set(&pcb->local_addr, sa->hvs_port); hvs_addr_set(&pcb->remote_addr, HVADDR_PORT_ANY); __hvs_insert_socket_on_list(so, HVS_LIST_BOUND); } mtx_unlock(&hvs_trans_socks_mtx); return (error); } int hvs_trans_listen(struct socket *so, int backlog, struct thread *td) { struct hvs_pcb *pcb = so2hvspcb(so); struct socket *bound_so; int error; HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: HyperV Socket hvs_trans_listen called\n", __func__); if (pcb == NULL) return (EINVAL); /* Check if the address is already bound and it was by us. */ bound_so = hvs_find_socket_on_list(&pcb->local_addr, HVS_LIST_BOUND); if (bound_so == NULL || bound_so != so) { HVSOCK_DBG(HVSOCK_DBG_ERR, "%s: Address not bound or not by us.\n", __func__); return (EADDRNOTAVAIL); } SOCK_LOCK(so); error = solisten_proto_check(so); if (error == 0) solisten_proto(so, backlog); SOCK_UNLOCK(so); HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: HyperV Socket listen error = %d\n", __func__, error); return (error); } int -hvs_trans_accept(struct socket *so, struct sockaddr **nam) +hvs_trans_accept(struct socket *so, struct sockaddr *sa) { struct hvs_pcb *pcb = so2hvspcb(so); HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: HyperV Socket hvs_trans_accept called\n", __func__); if (pcb == NULL) return (EINVAL); - *nam = sodupsockaddr((struct sockaddr *) &pcb->remote_addr, - M_NOWAIT); + memcpy(sa, &pcb->remote_addr, pcb->remote_addr.sa_len); - return ((*nam == NULL) ? ENOMEM : 0); + return (0); } int hvs_trans_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { struct hvs_pcb *pcb = so2hvspcb(so); struct sockaddr_hvs *raddr = (struct sockaddr_hvs *)nam; bool found_auto_bound_port = false; int i, error = 0; HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: HyperV Socket hvs_trans_connect called, remote port is %x\n", __func__, raddr->hvs_port); if (pcb == NULL) return (EINVAL); /* Verify the remote address */ if (raddr == NULL) return (EINVAL); if (raddr->sa_family != AF_HYPERV) return (EAFNOSUPPORT); if (raddr->sa_len != sizeof(*raddr)) return (EINVAL); mtx_lock(&hvs_trans_socks_mtx); if (so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTING|SS_ISCONNECTING)) { HVSOCK_DBG(HVSOCK_DBG_ERR, "%s: socket connect in progress\n", __func__); error = EINPROGRESS; goto out; } /* * Find an available port for us to auto bind the local * address. */ hvs_addr_set(&pcb->local_addr, 0); for (i = previous_auto_bound_port - 1; i != previous_auto_bound_port; i --) { if (i == MIN_PORT) i = MAX_PORT; pcb->local_addr.hvs_port = i; if (__hvs_find_socket_on_list(&pcb->local_addr, HVS_LIST_BOUND | HVS_LIST_CONNECTED) == NULL) { found_auto_bound_port = true; previous_auto_bound_port = i; HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: found local bound port is %x\n", __func__, pcb->local_addr.hvs_port); break; } } if (found_auto_bound_port == true) { /* Found available port for auto bound, put on list */ __hvs_insert_socket_on_list(so, HVS_LIST_BOUND); /* Set VM service ID */ pcb->vm_srv_id = srv_id_template; set_port_by_srv_id(&pcb->vm_srv_id, pcb->local_addr.hvs_port); /* Set host service ID and remote port */ pcb->host_srv_id = srv_id_template; set_port_by_srv_id(&pcb->host_srv_id, raddr->hvs_port); hvs_addr_set(&pcb->remote_addr, raddr->hvs_port); /* Change the socket state to SS_ISCONNECTING */ soisconnecting(so); } else { HVSOCK_DBG(HVSOCK_DBG_ERR, "%s: No local port available for auto bound\n", __func__); error = EADDRINUSE; } HVSOCK_DBG(HVSOCK_DBG_INFO, "Connect vm_srv_id is "); hvsock_print_guid(&pcb->vm_srv_id); HVSOCK_DBG(HVSOCK_DBG_INFO, "Connect host_srv_id is "); hvsock_print_guid(&pcb->host_srv_id); out: mtx_unlock(&hvs_trans_socks_mtx); if (found_auto_bound_port == true) vmbus_req_tl_connect(&pcb->vm_srv_id, &pcb->host_srv_id); return (error); } int hvs_trans_disconnect(struct socket *so) { struct hvs_pcb *pcb; HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: HyperV Socket hvs_trans_disconnect called\n", __func__); (void) hvs_trans_lock(); pcb = so2hvspcb(so); if (pcb == NULL) { hvs_trans_unlock(); return (EINVAL); } /* If socket is already disconnected, skip this */ if ((so->so_state & SS_ISDISCONNECTED) == 0) soisdisconnecting(so); hvs_trans_unlock(); return (0); } struct hvs_callback_arg { struct uio *uio; struct sockbuf *sb; }; int hvs_trans_soreceive(struct socket *so, struct sockaddr **paddr, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) { struct hvs_pcb *pcb = so2hvspcb(so); struct sockbuf *sb; ssize_t orig_resid; uint32_t canread, to_read; int flags, error = 0; struct hvs_callback_arg cbarg; HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: HyperV Socket hvs_trans_soreceive called\n", __func__); if (so->so_type != SOCK_STREAM) return (EINVAL); if (pcb == NULL) return (EINVAL); if (flagsp != NULL) flags = *flagsp &~ MSG_EOR; else flags = 0; if (flags & MSG_PEEK) return (EOPNOTSUPP); /* If no space to copy out anything */ if (uio->uio_resid == 0 || uio->uio_rw != UIO_READ) return (EINVAL); orig_resid = uio->uio_resid; /* Prevent other readers from entering the socket. */ error = SOCK_IO_RECV_LOCK(so, SBLOCKWAIT(flags)); if (error) { HVSOCK_DBG(HVSOCK_DBG_ERR, "%s: soiolock returned error = %d\n", __func__, error); return (error); } sb = &so->so_rcv; SOCKBUF_LOCK(sb); cbarg.uio = uio; cbarg.sb = sb; /* * If the socket is closing, there might still be some data * in rx br to read. However we need to make sure * the channel is still open. */ if ((sb->sb_state & SBS_CANTRCVMORE) && (so->so_state & SS_ISDISCONNECTED)) { /* Other thread already closed the channel */ error = EPIPE; goto out; } while (true) { while (uio->uio_resid > 0 && (canread = hvsock_canread_check(pcb)) > 0) { to_read = MIN(canread, uio->uio_resid); HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: to_read = %u, skip = %u\n", __func__, to_read, (unsigned int)(sizeof(struct hvs_pkt_header) + pcb->recv_data_off)); error = vmbus_chan_recv_peek_call(pcb->chan, to_read, sizeof(struct hvs_pkt_header) + pcb->recv_data_off, hvsock_br_callback, (void *)&cbarg); /* * It is possible socket is disconnected becasue * we released lock in hvsock_br_callback. So we * need to check the state to make sure it is not * disconnected. */ if (error || so->so_state & SS_ISDISCONNECTED) { break; } pcb->recv_data_len -= to_read; pcb->recv_data_off += to_read; } if (error) break; /* Abort if socket has reported problems. */ if (so->so_error) { if (so->so_error == ESHUTDOWN && orig_resid > uio->uio_resid) { /* * Although we got a FIN, we also received * some data in this round. Delivery it * to user. */ error = 0; } else { if (so->so_error != ESHUTDOWN) error = so->so_error; } break; } /* Cannot received more. */ if (sb->sb_state & SBS_CANTRCVMORE) break; /* We are done if buffer has been filled */ if (uio->uio_resid == 0) break; if (!(flags & MSG_WAITALL) && orig_resid > uio->uio_resid) break; /* Buffer ring is empty and we shall not block */ if ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO))) { if (orig_resid == uio->uio_resid) { /* We have not read anything */ error = EAGAIN; } HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: non blocked read return, error %d.\n", __func__, error); break; } /* * Wait and block until (more) data comes in. * Note: Drops the sockbuf lock during wait. */ error = sbwait(so, SO_RCV); if (error) break; HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: wake up from sbwait, read available is %u\n", __func__, vmbus_chan_read_available(pcb->chan)); } out: SOCKBUF_UNLOCK(sb); SOCK_IO_RECV_UNLOCK(so); /* We recieved a FIN in this call */ if (so->so_error == ESHUTDOWN) { if (so->so_snd.sb_state & SBS_CANTSENDMORE) { /* Send has already closed */ soisdisconnecting(so); } else { /* Just close the receive side */ socantrcvmore(so); } } HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: returning error = %d, so_error = %d\n", __func__, error, so->so_error); return (error); } int hvs_trans_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *controlp, int flags, struct thread *td) { struct hvs_pcb *pcb = so2hvspcb(so); struct sockbuf *sb; ssize_t orig_resid; uint32_t canwrite, to_write; int error = 0; HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: HyperV Socket hvs_trans_sosend called, uio_resid = %zd\n", __func__, uio->uio_resid); if (so->so_type != SOCK_STREAM) return (EINVAL); if (pcb == NULL) return (EINVAL); /* If nothing to send */ if (uio->uio_resid == 0 || uio->uio_rw != UIO_WRITE) return (EINVAL); orig_resid = uio->uio_resid; /* Prevent other writers from entering the socket. */ error = SOCK_IO_SEND_LOCK(so, SBLOCKWAIT(flags)); if (error) { HVSOCK_DBG(HVSOCK_DBG_ERR, "%s: soiolocak returned error = %d\n", __func__, error); return (error); } sb = &so->so_snd; SOCKBUF_LOCK(sb); if ((sb->sb_state & SBS_CANTSENDMORE) || so->so_error == ESHUTDOWN) { error = EPIPE; goto out; } while (uio->uio_resid > 0) { canwrite = hvsock_canwrite_check(pcb); if (canwrite == 0) { /* We have sent some data */ if (orig_resid > uio->uio_resid) break; /* * We have not sent any data and it is * non-blocked io */ if (so->so_state & SS_NBIO || (flags & (MSG_NBIO | MSG_DONTWAIT)) != 0) { error = EWOULDBLOCK; break; } else { /* * We are here because there is no space on * send buffer ring. Signal the other side * to read and free more space. * Sleep wait until space avaiable to send * Note: Drops the sockbuf lock during wait. */ error = sbwait(so, SO_SND); if (error) break; HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: wake up from sbwait, space avail on " "tx ring is %u\n", __func__, vmbus_chan_write_available(pcb->chan)); continue; } } to_write = MIN(canwrite, uio->uio_resid); to_write = MIN(to_write, HVSOCK_SEND_BUF_SZ); HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: canwrite is %u, to_write = %u\n", __func__, canwrite, to_write); error = hvsock_send_data(pcb->chan, uio, to_write, sb); if (error) break; } out: SOCKBUF_UNLOCK(sb); SOCK_IO_SEND_UNLOCK(so); return (error); } int hvs_trans_peeraddr(struct socket *so, struct sockaddr **nam) { struct hvs_pcb *pcb = so2hvspcb(so); HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: HyperV Socket hvs_trans_peeraddr called\n", __func__); if (pcb == NULL) return (EINVAL); *nam = sodupsockaddr((struct sockaddr *) &pcb->remote_addr, M_NOWAIT); return ((*nam == NULL)? ENOMEM : 0); } int hvs_trans_sockaddr(struct socket *so, struct sockaddr **nam) { struct hvs_pcb *pcb = so2hvspcb(so); HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: HyperV Socket hvs_trans_sockaddr called\n", __func__); if (pcb == NULL) return (EINVAL); *nam = sodupsockaddr((struct sockaddr *) &pcb->local_addr, M_NOWAIT); return ((*nam == NULL)? ENOMEM : 0); } void hvs_trans_close(struct socket *so) { struct hvs_pcb *pcb; HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: HyperV Socket hvs_trans_close called\n", __func__); (void) hvs_trans_lock(); pcb = so2hvspcb(so); if (!pcb) { hvs_trans_unlock(); return; } if (so->so_state & SS_ISCONNECTED) { /* Send a FIN to peer */ HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: hvs_trans_close sending a FIN to host\n", __func__); (void) hvsock_send_data(pcb->chan, NULL, 0, NULL); } if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING)) soisdisconnected(so); pcb->chan = NULL; pcb->so = NULL; if (SOLISTENING(so)) { mtx_lock(&hvs_trans_socks_mtx); /* Remove from bound list */ __hvs_remove_socket_from_list(so, HVS_LIST_BOUND); mtx_unlock(&hvs_trans_socks_mtx); } hvs_trans_unlock(); return; } void hvs_trans_abort(struct socket *so) { struct hvs_pcb *pcb = so2hvspcb(so); HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: HyperV Socket hvs_trans_abort called\n", __func__); (void) hvs_trans_lock(); if (pcb == NULL) { hvs_trans_unlock(); return; } if (SOLISTENING(so)) { mtx_lock(&hvs_trans_socks_mtx); /* Remove from bound list */ __hvs_remove_socket_from_list(so, HVS_LIST_BOUND); mtx_unlock(&hvs_trans_socks_mtx); } if (so->so_state & SS_ISCONNECTED) { (void) sodisconnect(so); } hvs_trans_unlock(); return; } int hvs_trans_shutdown(struct socket *so) { struct hvs_pcb *pcb = so2hvspcb(so); struct sockbuf *sb; HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: HyperV Socket hvs_trans_shutdown called\n", __func__); if (pcb == NULL) return (EINVAL); /* * Only get called with the shutdown method is SHUT_WR or * SHUT_RDWR. * When the method is SHUT_RD or SHUT_RDWR, the caller * already set the SBS_CANTRCVMORE on receive side socket * buffer. */ if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0) { /* * SHUT_WR only case. * Receive side is still open. Just close * the send side. */ socantsendmore(so); } else { /* SHUT_RDWR case */ if (so->so_state & SS_ISCONNECTED) { /* Send a FIN to peer */ sb = &so->so_snd; SOCKBUF_LOCK(sb); (void) hvsock_send_data(pcb->chan, NULL, 0, sb); SOCKBUF_UNLOCK(sb); soisdisconnecting(so); } } return (0); } /* In the VM, we support Hyper-V Sockets with AF_HYPERV, and the endpoint is * (see struct sockaddr_hvs). * * On the host, Hyper-V Sockets are supported by Winsock AF_HYPERV: * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/user- * guide/make-integration-service, and the endpoint is with * the below sockaddr: * * struct SOCKADDR_HV * { * ADDRESS_FAMILY Family; * USHORT Reserved; * GUID VmId; * GUID ServiceId; * }; * Note: VmID is not used by FreeBSD VM and actually it isn't transmitted via * VMBus, because here it's obvious the host and the VM can easily identify * each other. Though the VmID is useful on the host, especially in the case * of Windows container, FreeBSD VM doesn't need it at all. * * To be compatible with similar infrastructure in Linux VMs, we have * to limit the available GUID space of SOCKADDR_HV so that we can create * a mapping between FreeBSD AF_HYPERV port and SOCKADDR_HV Service GUID. * The rule of writing Hyper-V Sockets apps on the host and in FreeBSD VM is: * **************************************************************************** * The only valid Service GUIDs, from the perspectives of both the host and * * FreeBSD VM, that can be connected by the other end, must conform to this * * format: -facb-11e6-bd58-64006a7986d3. * **************************************************************************** * * When we write apps on the host to connect(), the GUID ServiceID is used. * When we write apps in FreeBSD VM to connect(), we only need to specify the * port and the driver will form the GUID and use that to request the host. * * From the perspective of FreeBSD VM, the remote ephemeral port (i.e. the * auto-generated remote port for a connect request initiated by the host's * connect()) is set to HVADDR_PORT_UNKNOWN, which is not realy used on the * FreeBSD guest. */ /* * Older HyperV hosts (vmbus version 'VMBUS_VERSION_WIN10' or before) * restricts HyperV socket ring buffer size to six 4K pages. Newer * HyperV hosts doen't have this limit. */ #define HVS_RINGBUF_RCV_SIZE (PAGE_SIZE * 6) #define HVS_RINGBUF_SND_SIZE (PAGE_SIZE * 6) #define HVS_RINGBUF_MAX_SIZE (PAGE_SIZE * 64) struct hvsock_sc { device_t dev; struct hvs_pcb *pcb; struct vmbus_channel *channel; }; static bool hvsock_chan_readable(struct vmbus_channel *chan) { uint32_t readable = vmbus_chan_read_available(chan); return (readable >= HVSOCK_PKT_LEN(0)); } static void hvsock_chan_cb(struct vmbus_channel *chan, void *context) { struct hvs_pcb *pcb = (struct hvs_pcb *) context; struct socket *so; uint32_t canwrite; HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: host send us a wakeup on rb data, pcb = %p\n", __func__, pcb); /* * Check if the socket is still attached and valid. * Here we know channel is still open. Need to make * sure the socket has not been closed or freed. */ (void) hvs_trans_lock(); so = hsvpcb2so(pcb); if (pcb->chan != NULL && so != NULL) { /* * Wake up reader if there are data to read. */ SOCKBUF_LOCK(&(so)->so_rcv); HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: read available = %u\n", __func__, vmbus_chan_read_available(pcb->chan)); if (hvsock_chan_readable(pcb->chan)) sorwakeup_locked(so); else SOCKBUF_UNLOCK(&(so)->so_rcv); /* * Wake up sender if space becomes available to write. */ SOCKBUF_LOCK(&(so)->so_snd); canwrite = hvsock_canwrite_check(pcb); HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: canwrite = %u\n", __func__, canwrite); if (canwrite > 0) { sowwakeup_locked(so); } else { SOCKBUF_UNLOCK(&(so)->so_snd); } } hvs_trans_unlock(); return; } static int hvsock_br_callback(void *datap, int cplen, void *cbarg) { struct hvs_callback_arg *arg = (struct hvs_callback_arg *)cbarg; struct uio *uio = arg->uio; struct sockbuf *sb = arg->sb; int error = 0; if (cbarg == NULL || datap == NULL) return (EINVAL); HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: called, uio_rw = %s, uio_resid = %zd, cplen = %u, " "datap = %p\n", __func__, (uio->uio_rw == UIO_READ) ? "read from br":"write to br", uio->uio_resid, cplen, datap); if (sb) SOCKBUF_UNLOCK(sb); error = uiomove(datap, cplen, uio); if (sb) SOCKBUF_LOCK(sb); HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: after uiomove, uio_resid = %zd, error = %d\n", __func__, uio->uio_resid, error); return (error); } static int hvsock_send_data(struct vmbus_channel *chan, struct uio *uio, uint32_t to_write, struct sockbuf *sb) { struct hvs_pkt_header hvs_pkt; int hvs_pkthlen, hvs_pktlen, pad_pktlen, hlen, error = 0; uint64_t pad = 0; struct iovec iov[3]; struct hvs_callback_arg cbarg; if (chan == NULL) return (ENOTCONN); hlen = sizeof(struct vmbus_chanpkt_hdr); hvs_pkthlen = sizeof(struct hvs_pkt_header); hvs_pktlen = hvs_pkthlen + to_write; pad_pktlen = VMBUS_CHANPKT_TOTLEN(hvs_pktlen); HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: hlen = %u, hvs_pkthlen = %u, hvs_pktlen = %u, " "pad_pktlen = %u, data_len = %u\n", __func__, hlen, hvs_pkthlen, hvs_pktlen, pad_pktlen, to_write); hvs_pkt.chan_pkt_hdr.cph_type = VMBUS_CHANPKT_TYPE_INBAND; hvs_pkt.chan_pkt_hdr.cph_flags = 0; VMBUS_CHANPKT_SETLEN(hvs_pkt.chan_pkt_hdr.cph_hlen, hlen); VMBUS_CHANPKT_SETLEN(hvs_pkt.chan_pkt_hdr.cph_tlen, pad_pktlen); hvs_pkt.chan_pkt_hdr.cph_xactid = 0; hvs_pkt.vmpipe_pkt_hdr.vmpipe_pkt_type = 1; hvs_pkt.vmpipe_pkt_hdr.vmpipe_data_size = to_write; cbarg.uio = uio; cbarg.sb = sb; if (uio && to_write > 0) { iov[0].iov_base = &hvs_pkt; iov[0].iov_len = hvs_pkthlen; iov[1].iov_base = NULL; iov[1].iov_len = to_write; iov[2].iov_base = &pad; iov[2].iov_len = pad_pktlen - hvs_pktlen; error = vmbus_chan_iov_send(chan, iov, 3, hvsock_br_callback, &cbarg); } else { if (to_write == 0) { iov[0].iov_base = &hvs_pkt; iov[0].iov_len = hvs_pkthlen; iov[1].iov_base = &pad; iov[1].iov_len = pad_pktlen - hvs_pktlen; error = vmbus_chan_iov_send(chan, iov, 2, NULL, NULL); } } if (error) { HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: error = %d\n", __func__, error); } return (error); } /* * Check if we have data on current ring buffer to read * or not. If not, advance the ring buffer read index to * next packet. Update the recev_data_len and recev_data_off * to new value. * Return the number of bytes can read. */ static uint32_t hvsock_canread_check(struct hvs_pcb *pcb) { uint32_t advance; uint32_t tlen, hlen, dlen; uint32_t bytes_canread = 0; int error; if (pcb == NULL || pcb->chan == NULL) { pcb->so->so_error = EIO; return (0); } /* Still have data not read yet on current packet */ if (pcb->recv_data_len > 0) return (pcb->recv_data_len); if (pcb->rb_init) advance = VMBUS_CHANPKT_GETLEN(pcb->hvs_pkt.chan_pkt_hdr.cph_tlen); else advance = 0; bytes_canread = vmbus_chan_read_available(pcb->chan); HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: bytes_canread on br = %u, advance = %u\n", __func__, bytes_canread, advance); if (pcb->rb_init && bytes_canread == (advance + sizeof(uint64_t))) { /* * Nothing to read. Need to advance the rindex before * calling sbwait, so host knows to wake us up when data * is available to read on rb. */ error = vmbus_chan_recv_idxadv(pcb->chan, advance); if (error) { HVSOCK_DBG(HVSOCK_DBG_ERR, "%s: after calling vmbus_chan_recv_idxadv, " "got error = %d\n", __func__, error); return (0); } else { pcb->rb_init = false; pcb->recv_data_len = 0; pcb->recv_data_off = 0; bytes_canread = vmbus_chan_read_available(pcb->chan); HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: advanced %u bytes, " " bytes_canread on br now = %u\n", __func__, advance, bytes_canread); if (bytes_canread == 0) return (0); else advance = 0; } } if (bytes_canread < advance + (sizeof(struct hvs_pkt_header) + sizeof(uint64_t))) return (0); error = vmbus_chan_recv_peek(pcb->chan, &pcb->hvs_pkt, sizeof(struct hvs_pkt_header), advance); /* Don't have anything to read */ if (error) { HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: after calling vmbus_chan_recv_peek, got error = %d\n", __func__, error); return (0); } /* * We just read in a new packet header. Do some sanity checks. */ tlen = VMBUS_CHANPKT_GETLEN(pcb->hvs_pkt.chan_pkt_hdr.cph_tlen); hlen = VMBUS_CHANPKT_GETLEN(pcb->hvs_pkt.chan_pkt_hdr.cph_hlen); dlen = pcb->hvs_pkt.vmpipe_pkt_hdr.vmpipe_data_size; if (__predict_false(hlen < sizeof(struct vmbus_chanpkt_hdr)) || __predict_false(hlen > tlen) || __predict_false(tlen < dlen + sizeof(struct hvs_pkt_header))) { HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "invalid tlen(%u), hlen(%u) or dlen(%u)\n", tlen, hlen, dlen); pcb->so->so_error = EIO; return (0); } if (pcb->rb_init == false) pcb->rb_init = true; HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "Got new pkt tlen(%u), hlen(%u) or dlen(%u)\n", tlen, hlen, dlen); /* The other side has sent a close FIN */ if (dlen == 0) { HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: Received FIN from other side\n", __func__); /* inform the caller by seting so_error to ESHUTDOWN */ pcb->so->so_error = ESHUTDOWN; } HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: canread on receive ring is %u \n", __func__, dlen); pcb->recv_data_len = dlen; pcb->recv_data_off = 0; return (pcb->recv_data_len); } static uint32_t hvsock_canwrite_check(struct hvs_pcb *pcb) { uint32_t writeable; uint32_t ret; if (pcb == NULL || pcb->chan == NULL) return (0); writeable = vmbus_chan_write_available(pcb->chan); /* * We must always reserve a 0-length-payload packet for the FIN. */ HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: writeable is %u, should be greater than %ju\n", __func__, writeable, (uintmax_t)(HVSOCK_PKT_LEN(1) + HVSOCK_PKT_LEN(0))); if (writeable < HVSOCK_PKT_LEN(1) + HVSOCK_PKT_LEN(0)) { /* * The Tx ring seems full. */ return (0); } ret = writeable - HVSOCK_PKT_LEN(0) - HVSOCK_PKT_LEN(0); HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: available size is %u\n", __func__, rounddown2(ret, 8)); return (rounddown2(ret, 8)); } static void hvsock_set_chan_pending_send_size(struct vmbus_channel *chan) { vmbus_chan_set_pending_send_size(chan, HVSOCK_PKT_LEN(HVSOCK_SEND_BUF_SZ)); } static int hvsock_open_channel(struct vmbus_channel *chan, struct socket *so) { unsigned int rcvbuf, sndbuf; struct hvs_pcb *pcb = so2hvspcb(so); int ret; if (vmbus_current_version < VMBUS_VERSION_WIN10_V5) { sndbuf = HVS_RINGBUF_SND_SIZE; rcvbuf = HVS_RINGBUF_RCV_SIZE; } else { sndbuf = MAX(so->so_snd.sb_hiwat, HVS_RINGBUF_SND_SIZE); sndbuf = MIN(sndbuf, HVS_RINGBUF_MAX_SIZE); sndbuf = rounddown2(sndbuf, PAGE_SIZE); rcvbuf = MAX(so->so_rcv.sb_hiwat, HVS_RINGBUF_RCV_SIZE); rcvbuf = MIN(rcvbuf, HVS_RINGBUF_MAX_SIZE); rcvbuf = rounddown2(rcvbuf, PAGE_SIZE); } /* * Can only read whatever user provided size of data * from ring buffer. Turn off batched reading. */ vmbus_chan_set_readbatch(chan, false); ret = vmbus_chan_open(chan, sndbuf, rcvbuf, NULL, 0, hvsock_chan_cb, pcb); if (ret != 0) { HVSOCK_DBG(HVSOCK_DBG_ERR, "%s: failed to open hvsock channel, sndbuf = %u, " "rcvbuf = %u\n", __func__, sndbuf, rcvbuf); } else { HVSOCK_DBG(HVSOCK_DBG_INFO, "%s: hvsock channel opened, sndbuf = %u, i" "rcvbuf = %u\n", __func__, sndbuf, rcvbuf); /* * Se the pending send size so to receive wakeup * signals from host when there is enough space on * rx buffer ring to write. */ hvsock_set_chan_pending_send_size(chan); } return ret; } /* * Guest is listening passively on the socket. Open channel and * create a new socket for the conneciton. */ static void hvsock_open_conn_passive(struct vmbus_channel *chan, struct socket *so, struct hvsock_sc *sc) { struct socket *new_so; struct hvs_pcb *new_pcb, *pcb; int error; /* Do nothing if socket is not listening */ if (!SOLISTENING(so)) { HVSOCK_DBG(HVSOCK_DBG_ERR, "%s: socket is not a listening one\n", __func__); return; } /* * Create a new socket. This will call pru_attach to complete * the socket initialization and put the new socket onto * listening socket's sol_incomp list, waiting to be promoted * to sol_comp list. * The new socket created has ref count 0. There is no other * thread that changes the state of this new one at the * moment, so we don't need to hold its lock while opening * channel and filling out its pcb information. */ new_so = sonewconn(so, 0); if (!new_so) HVSOCK_DBG(HVSOCK_DBG_ERR, "%s: creating new socket failed\n", __func__); /* * Now open the vmbus channel. If it fails, the socket will be * on the listening socket's sol_incomp queue until it is * replaced and aborted. */ error = hvsock_open_channel(chan, new_so); if (error) { new_so->so_error = error; return; } pcb = so->so_pcb; new_pcb = new_so->so_pcb; hvs_addr_set(&(new_pcb->local_addr), pcb->local_addr.hvs_port); /* Remote port is unknown to guest in this type of conneciton */ hvs_addr_set(&(new_pcb->remote_addr), HVADDR_PORT_UNKNOWN); new_pcb->chan = chan; new_pcb->recv_data_len = 0; new_pcb->recv_data_off = 0; new_pcb->rb_init = false; new_pcb->vm_srv_id = *vmbus_chan_guid_type(chan); new_pcb->host_srv_id = *vmbus_chan_guid_inst(chan); hvs_insert_socket_on_list(new_so, HVS_LIST_CONNECTED); sc->pcb = new_pcb; /* * Change the socket state to SS_ISCONNECTED. This will promote * the socket to sol_comp queue and wake up the thread which * is accepting connection. */ soisconnected(new_so); } /* * Guest is actively connecting to host. */ static void hvsock_open_conn_active(struct vmbus_channel *chan, struct socket *so) { struct hvs_pcb *pcb; int error; error = hvsock_open_channel(chan, so); if (error) { so->so_error = error; return; } pcb = so->so_pcb; pcb->chan = chan; pcb->recv_data_len = 0; pcb->recv_data_off = 0; pcb->rb_init = false; mtx_lock(&hvs_trans_socks_mtx); __hvs_remove_socket_from_list(so, HVS_LIST_BOUND); __hvs_insert_socket_on_list(so, HVS_LIST_CONNECTED); mtx_unlock(&hvs_trans_socks_mtx); /* * Change the socket state to SS_ISCONNECTED. This will wake up * the thread sleeping in connect call. */ soisconnected(so); } static void hvsock_open_connection(struct vmbus_channel *chan, struct hvsock_sc *sc) { struct hyperv_guid *inst_guid, *type_guid; bool conn_from_host; struct sockaddr_hvs addr; struct socket *so; struct hvs_pcb *pcb; type_guid = (struct hyperv_guid *) vmbus_chan_guid_type(chan); inst_guid = (struct hyperv_guid *) vmbus_chan_guid_inst(chan); conn_from_host = vmbus_chan_is_hvs_conn_from_host(chan); HVSOCK_DBG(HVSOCK_DBG_INFO, "type_guid is "); hvsock_print_guid(type_guid); HVSOCK_DBG(HVSOCK_DBG_INFO, "inst_guid is "); hvsock_print_guid(inst_guid); HVSOCK_DBG(HVSOCK_DBG_INFO, "connection %s host\n", (conn_from_host == true ) ? "from" : "to"); /* * The listening port should be in [0, MAX_LISTEN_PORT] */ if (!is_valid_srv_id(type_guid)) return; /* * There should be a bound socket already created no matter * it is a passive or active connection. * For host initiated connection (passive on guest side), * the type_guid contains the port which guest is bound and * listening. * For the guest initiated connection (active on guest side), * the inst_guid contains the port that guest has auto bound * to. */ hvs_addr_init(&addr, conn_from_host ? type_guid : inst_guid); so = hvs_find_socket_on_list(&addr, HVS_LIST_BOUND); if (!so) { HVSOCK_DBG(HVSOCK_DBG_ERR, "%s: no bound socket found for port %u\n", __func__, addr.hvs_port); return; } if (conn_from_host) { hvsock_open_conn_passive(chan, so, sc); } else { (void) hvs_trans_lock(); pcb = so->so_pcb; if (pcb && pcb->so) { sc->pcb = so2hvspcb(so); hvsock_open_conn_active(chan, so); } else { HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: channel detached before open\n", __func__); } hvs_trans_unlock(); } } static int hvsock_probe(device_t dev) { struct vmbus_channel *channel = vmbus_get_channel(dev); if (!channel || !vmbus_chan_is_hvs(channel)) { HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "hvsock_probe called but not a hvsock channel id %u\n", vmbus_chan_id(channel)); return ENXIO; } else { HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "hvsock_probe got a hvsock channel id %u\n", vmbus_chan_id(channel)); return BUS_PROBE_DEFAULT; } } static int hvsock_attach(device_t dev) { struct vmbus_channel *channel = vmbus_get_channel(dev); struct hvsock_sc *sc = (struct hvsock_sc *)device_get_softc(dev); HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "hvsock_attach called.\n"); hvsock_open_connection(channel, sc); /* * Always return success. On error the host will rescind the device * in 30 seconds and we can do cleanup at that time in * vmbus_chan_msgproc_chrescind(). */ return (0); } static int hvsock_detach(device_t dev) { struct hvsock_sc *sc = (struct hvsock_sc *)device_get_softc(dev); struct socket *so; int retry; if (bootverbose) device_printf(dev, "hvsock_detach called.\n"); HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "hvsock_detach called.\n"); if (sc->pcb != NULL) { (void) hvs_trans_lock(); so = hsvpcb2so(sc->pcb); if (so) { /* Close the connection */ if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING)) soisdisconnected(so); } mtx_lock(&hvs_trans_socks_mtx); __hvs_remove_pcb_from_list(sc->pcb, HVS_LIST_BOUND | HVS_LIST_CONNECTED); mtx_unlock(&hvs_trans_socks_mtx); /* * Close channel while no reader and sender are working * on the buffer rings. */ if (so) { retry = 0; while (SOCK_IO_RECV_LOCK(so, 0) == EWOULDBLOCK) { /* * Someone is reading, rx br is busy */ soisdisconnected(so); DELAY(500); HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "waiting for rx reader to exit, " "retry = %d\n", retry++); } retry = 0; while (SOCK_IO_SEND_LOCK(so, 0) == EWOULDBLOCK) { /* * Someone is sending, tx br is busy */ soisdisconnected(so); DELAY(500); HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "waiting for tx sender to exit, " "retry = %d\n", retry++); } } bzero(sc->pcb, sizeof(struct hvs_pcb)); free(sc->pcb, M_HVSOCK); sc->pcb = NULL; if (so) { SOCK_IO_RECV_UNLOCK(so); SOCK_IO_SEND_UNLOCK(so); so->so_pcb = NULL; } hvs_trans_unlock(); } vmbus_chan_close(vmbus_get_channel(dev)); return (0); } static device_method_t hvsock_methods[] = { /* Device interface */ DEVMETHOD(device_probe, hvsock_probe), DEVMETHOD(device_attach, hvsock_attach), DEVMETHOD(device_detach, hvsock_detach), DEVMETHOD_END }; static driver_t hvsock_driver = { "hv_sock", hvsock_methods, sizeof(struct hvsock_sc) }; DRIVER_MODULE(hvsock, vmbus, hvsock_driver, NULL, NULL); MODULE_VERSION(hvsock, 1); MODULE_DEPEND(hvsock, vmbus, 1, 1, 1); diff --git a/sys/dev/hyperv/hvsock/hv_sock.h b/sys/dev/hyperv/hvsock/hv_sock.h index 98a9afb747bf..ee6416a29662 100644 --- a/sys/dev/hyperv/hvsock/hv_sock.h +++ b/sys/dev/hyperv/hvsock/hv_sock.h @@ -1,119 +1,119 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2020 Microsoft Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _HVSOCK_H #define _HVSOCK_H #include #include #include #include #include /* * HyperV Socket Protocols */ #define HYPERV_SOCK_PROTO_TRANS 1 /* Transport protocol */ #define HVADDR_PORT_ANY -1U #define HVADDR_PORT_UNKNOWN -1U #define HVS_LIST_BOUND 0x01 #define HVS_LIST_CONNECTED 0x02 #define HVS_LIST_ALL (HVS_LIST_BOUND | HVS_LIST_CONNECTED) struct sockaddr_hvs { unsigned char sa_len; sa_family_t sa_family; unsigned int hvs_port; unsigned char hvs_zero[sizeof(struct sockaddr) - sizeof(sa_family_t) - sizeof(unsigned char) - sizeof(unsigned int)]; }; struct vmpipe_proto_header { uint32_t vmpipe_pkt_type; uint32_t vmpipe_data_size; } __packed; struct hvs_pkt_header { struct vmbus_chanpkt_hdr chan_pkt_hdr; struct vmpipe_proto_header vmpipe_pkt_hdr; } __packed; struct hvs_pcb { struct socket *so; /* Pointer to socket */ struct sockaddr_hvs local_addr; struct sockaddr_hvs remote_addr; struct hyperv_guid vm_srv_id; struct hyperv_guid host_srv_id; struct vmbus_channel *chan; /* Current packet header on rx ring */ struct hvs_pkt_header hvs_pkt; /* Available data in receive br in current packet */ uint32_t recv_data_len; /* offset in the packet */ uint32_t recv_data_off; bool rb_init; /* Link lists for global bound and connected sockets */ LIST_ENTRY(hvs_pcb) bound_next; LIST_ENTRY(hvs_pcb) connected_next; }; #define so2hvspcb(so) \ ((struct hvs_pcb *)((so)->so_pcb)) #define hsvpcb2so(hvspcb) \ ((struct socket *)((hvspcb)->so)) void hvs_addr_init(struct sockaddr_hvs *, const struct hyperv_guid *); void hvs_trans_close(struct socket *); void hvs_trans_detach(struct socket *); void hvs_trans_abort(struct socket *); int hvs_trans_attach(struct socket *, int, struct thread *); int hvs_trans_bind(struct socket *, struct sockaddr *, struct thread *); int hvs_trans_listen(struct socket *, int, struct thread *); -int hvs_trans_accept(struct socket *, struct sockaddr **); +int hvs_trans_accept(struct socket *, struct sockaddr *); int hvs_trans_connect(struct socket *, struct sockaddr *, struct thread *); int hvs_trans_peeraddr(struct socket *, struct sockaddr **); int hvs_trans_sockaddr(struct socket *, struct sockaddr **); int hvs_trans_soreceive(struct socket *, struct sockaddr **, struct uio *, struct mbuf **, struct mbuf **, int *); int hvs_trans_sosend(struct socket *, struct sockaddr *, struct uio *, struct mbuf *, struct mbuf *, int, struct thread *); int hvs_trans_disconnect(struct socket *); int hvs_trans_shutdown(struct socket *); int hvs_trans_lock(void); void hvs_trans_unlock(void); void hvs_remove_socket_from_list(struct socket *, unsigned char); #endif /* _HVSOCK_H */ diff --git a/sys/dev/iscsi/icl_soft_proxy.c b/sys/dev/iscsi/icl_soft_proxy.c index ee448116b0e9..db9bf12a688c 100644 --- a/sys/dev/iscsi/icl_soft_proxy.c +++ b/sys/dev/iscsi/icl_soft_proxy.c @@ -1,337 +1,335 @@ /*- * Copyright (c) 2012 The FreeBSD Foundation * * This software was developed by Edward Tomasz Napierala under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ /*- * Copyright (c) 1982, 1986, 1989, 1990, 1993 * The Regents of the University of California. All rights reserved. * * sendfile(2) and related extensions: * Copyright (c) 1998, David Greenman. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * iSCSI Common Layer, kernel proxy part. */ #ifdef ICL_KERNEL_PROXY #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct icl_listen_sock { TAILQ_ENTRY(icl_listen_sock) ils_next; struct icl_listen *ils_listen; struct socket *ils_socket; bool ils_running; int ils_id; }; struct icl_listen { TAILQ_HEAD(, icl_listen_sock) il_sockets; struct sx il_lock; void (*il_accept)(struct socket *, struct sockaddr *, int); }; static MALLOC_DEFINE(M_ICL_PROXY, "ICL_PROXY", "iSCSI common layer proxy"); int icl_soft_proxy_connect(struct icl_conn *ic, int domain, int socktype, int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa) { struct socket *so; int error; error = socreate(domain, &so, socktype, protocol, curthread->td_ucred, curthread); if (error != 0) return (error); if (from_sa != NULL) { error = sobind(so, from_sa, curthread); if (error != 0) { soclose(so); return (error); } } error = soconnect(so, to_sa, curthread); if (error != 0) { soclose(so); return (error); } SOCK_LOCK(so); while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH, "icl_connect", 0); if (error) break; } if (error == 0) { error = so->so_error; so->so_error = 0; } SOCK_UNLOCK(so); if (error != 0) { soclose(so); return (error); } error = icl_soft_handoff_sock(ic, so); if (error != 0) soclose(so); return (error); } struct icl_listen * icl_listen_new(void (*accept_cb)(struct socket *, struct sockaddr *, int)) { struct icl_listen *il; il = malloc(sizeof(*il), M_ICL_PROXY, M_ZERO | M_WAITOK); TAILQ_INIT(&il->il_sockets); sx_init(&il->il_lock, "icl_listen"); il->il_accept = accept_cb; return (il); } void icl_listen_free(struct icl_listen *il) { struct icl_listen_sock *ils; sbintime_t sbt, pr; sx_xlock(&il->il_lock); while (!TAILQ_EMPTY(&il->il_sockets)) { ils = TAILQ_FIRST(&il->il_sockets); while (ils->ils_running) { ICL_DEBUG("waiting for accept thread to terminate"); sx_xunlock(&il->il_lock); SOLISTEN_LOCK(ils->ils_socket); ils->ils_socket->so_error = ENOTCONN; SOLISTEN_UNLOCK(ils->ils_socket); wakeup(&ils->ils_socket->so_timeo); sbt = mstosbt(995); pr = mstosbt(10); pause_sbt("icl_unlisten", sbt, pr, 0); sx_xlock(&il->il_lock); } TAILQ_REMOVE(&il->il_sockets, ils, ils_next); soclose(ils->ils_socket); free(ils, M_ICL_PROXY); } sx_xunlock(&il->il_lock); free(il, M_ICL_PROXY); } /* * XXX: Doing accept in a separate thread in each socket might not be the * best way to do stuff, but it's pretty clean and debuggable - and you * probably won't have hundreds of listening sockets anyway. */ static void icl_accept_thread(void *arg) { struct icl_listen_sock *ils; struct socket *head, *so; - struct sockaddr *sa; + struct sockaddr_storage ss = { .ss_len = sizeof(ss) }; int error; ils = arg; head = ils->ils_socket; ils->ils_running = true; for (;;) { SOLISTEN_LOCK(head); error = solisten_dequeue(head, &so, 0); if (error == ENOTCONN) { /* * XXXGL: ENOTCONN is our mark from icl_listen_free(). * Neither socket code, nor msleep(9) may return it. */ ICL_DEBUG("terminating"); ils->ils_running = false; kthread_exit(); return; } if (error) { ICL_WARN("solisten_dequeue error %d", error); continue; } - sa = NULL; - error = soaccept(so, &sa); + error = soaccept(so, (struct sockaddr *)&ss); if (error != 0) { ICL_WARN("soaccept error %d", error); - if (sa != NULL) - free(sa, M_SONAME); soclose(so); continue; } - (ils->ils_listen->il_accept)(so, sa, ils->ils_id); + (ils->ils_listen->il_accept)(so, (struct sockaddr *)&ss, + ils->ils_id); } } static int icl_listen_add_tcp(struct icl_listen *il, int domain, int socktype, int protocol, struct sockaddr *sa, int portal_id) { struct icl_listen_sock *ils; struct socket *so; struct sockopt sopt; int error, one = 1; error = socreate(domain, &so, socktype, protocol, curthread->td_ucred, curthread); if (error != 0) { ICL_WARN("socreate failed with error %d", error); return (error); } sopt.sopt_dir = SOPT_SET; sopt.sopt_level = SOL_SOCKET; sopt.sopt_name = SO_REUSEADDR; sopt.sopt_val = &one; sopt.sopt_valsize = sizeof(one); sopt.sopt_td = NULL; error = sosetopt(so, &sopt); if (error != 0) { ICL_WARN("failed to set SO_REUSEADDR with error %d", error); soclose(so); return (error); } error = sobind(so, sa, curthread); if (error != 0) { ICL_WARN("sobind failed with error %d", error); soclose(so); return (error); } error = solisten(so, -1, curthread); if (error != 0) { ICL_WARN("solisten failed with error %d", error); soclose(so); return (error); } ils = malloc(sizeof(*ils), M_ICL_PROXY, M_ZERO | M_WAITOK); ils->ils_listen = il; ils->ils_socket = so; ils->ils_id = portal_id; error = kthread_add(icl_accept_thread, ils, NULL, NULL, 0, 0, "iclacc"); if (error != 0) { ICL_WARN("kthread_add failed with error %d", error); soclose(so); free(ils, M_ICL_PROXY); return (error); } sx_xlock(&il->il_lock); TAILQ_INSERT_TAIL(&il->il_sockets, ils, ils_next); sx_xunlock(&il->il_lock); return (0); } int icl_listen_add(struct icl_listen *il, bool rdma, int domain, int socktype, int protocol, struct sockaddr *sa, int portal_id) { if (rdma) { ICL_DEBUG("RDMA not supported"); return (EOPNOTSUPP); } return (icl_listen_add_tcp(il, domain, socktype, protocol, sa, portal_id)); } int icl_listen_remove(struct icl_listen *il, struct sockaddr *sa) { /* * XXX */ return (EOPNOTSUPP); } #endif /* ICL_KERNEL_PROXY */ diff --git a/sys/kern/uipc_domain.c b/sys/kern/uipc_domain.c index 03e6856b7750..cf9b91511574 100644 --- a/sys/kern/uipc_domain.c +++ b/sys/kern/uipc_domain.c @@ -1,394 +1,394 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct domainhead domains = SLIST_HEAD_INITIALIZER(&domains); int domain_init_status = 1; static struct mtx dom_mtx; /* domain list lock */ MTX_SYSINIT(domain, &dom_mtx, "domain list", MTX_DEF); static int -pr_accept_notsupp(struct socket *so, struct sockaddr **nam) +pr_accept_notsupp(struct socket *so, struct sockaddr *sa) { return (EOPNOTSUPP); } static int pr_aio_queue_notsupp(struct socket *so, struct kaiocb *job) { return (EOPNOTSUPP); } static int pr_bind_notsupp(struct socket *so, struct sockaddr *nam, struct thread *td) { return (EOPNOTSUPP); } static int pr_bindat_notsupp(int fd, struct socket *so, struct sockaddr *nam, struct thread *td) { return (EOPNOTSUPP); } static int pr_connect_notsupp(struct socket *so, struct sockaddr *nam, struct thread *td) { return (EOPNOTSUPP); } static int pr_connectat_notsupp(int fd, struct socket *so, struct sockaddr *nam, struct thread *td) { return (EOPNOTSUPP); } static int pr_connect2_notsupp(struct socket *so1, struct socket *so2) { return (EOPNOTSUPP); } static int pr_control_notsupp(struct socket *so, u_long cmd, void *data, struct ifnet *ifp, struct thread *td) { return (EOPNOTSUPP); } static int pr_disconnect_notsupp(struct socket *so) { return (EOPNOTSUPP); } static int pr_listen_notsupp(struct socket *so, int backlog, struct thread *td) { return (EOPNOTSUPP); } static int pr_peeraddr_notsupp(struct socket *so, struct sockaddr **nam) { return (EOPNOTSUPP); } static int pr_rcvd_notsupp(struct socket *so, int flags) { return (EOPNOTSUPP); } static int pr_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags) { return (EOPNOTSUPP); } static int pr_send_notsupp(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, struct mbuf *control, struct thread *td) { if (control != NULL) m_freem(control); if ((flags & PRUS_NOTREADY) == 0) m_freem(m); return (EOPNOTSUPP); } static int pr_ready_notsupp(struct socket *so, struct mbuf *m, int count) { return (EOPNOTSUPP); } static int pr_shutdown_notsupp(struct socket *so) { return (EOPNOTSUPP); } static int pr_sockaddr_notsupp(struct socket *so, struct sockaddr **nam) { return (EOPNOTSUPP); } static int pr_sosend_notsupp(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags, struct thread *td) { return (EOPNOTSUPP); } static int pr_soreceive_notsupp(struct socket *so, struct sockaddr **paddr, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) { return (EOPNOTSUPP); } static int pr_sopoll_notsupp(struct socket *so, int events, struct ucred *cred, struct thread *td) { return (EOPNOTSUPP); } static void pr_init(struct domain *dom, struct protosw *pr) { KASSERT(pr->pr_attach != NULL, ("%s: protocol doesn't have pr_attach", __func__)); pr->pr_domain = dom; #define DEFAULT(foo, bar) if (pr->foo == NULL) pr->foo = bar DEFAULT(pr_sosend, sosend_generic); DEFAULT(pr_soreceive, soreceive_generic); DEFAULT(pr_sopoll, sopoll_generic); DEFAULT(pr_setsbopt, sbsetopt); #define NOTSUPP(foo) if (pr->foo == NULL) pr->foo = foo ## _notsupp NOTSUPP(pr_accept); NOTSUPP(pr_aio_queue); NOTSUPP(pr_bind); NOTSUPP(pr_bindat); NOTSUPP(pr_connect); NOTSUPP(pr_connect2); NOTSUPP(pr_connectat); NOTSUPP(pr_control); NOTSUPP(pr_disconnect); NOTSUPP(pr_listen); NOTSUPP(pr_peeraddr); NOTSUPP(pr_rcvd); NOTSUPP(pr_rcvoob); NOTSUPP(pr_send); NOTSUPP(pr_shutdown); NOTSUPP(pr_sockaddr); NOTSUPP(pr_sosend); NOTSUPP(pr_soreceive); NOTSUPP(pr_sopoll); NOTSUPP(pr_ready); } /* * Add a new protocol domain to the list of supported domains * Note: you cant unload it again because a socket may be using it. * XXX can't fail at this time. */ void domain_add(struct domain *dp) { struct protosw *pr; MPASS(IS_DEFAULT_VNET(curvnet)); if (dp->dom_probe != NULL && (*dp->dom_probe)() != 0) return; for (int i = 0; i < dp->dom_nprotosw; i++) if ((pr = dp->dom_protosw[i]) != NULL) pr_init(dp, pr); mtx_lock(&dom_mtx); #ifdef INVARIANTS struct domain *tmp; SLIST_FOREACH(tmp, &domains, dom_next) MPASS(tmp->dom_family != dp->dom_family); #endif SLIST_INSERT_HEAD(&domains, dp, dom_next); mtx_unlock(&dom_mtx); } void domain_remove(struct domain *dp) { if ((dp->dom_flags & DOMF_UNLOADABLE) == 0) return; mtx_lock(&dom_mtx); SLIST_REMOVE(&domains, dp, domain, dom_next); mtx_unlock(&dom_mtx); } static void domainfinalize(void *dummy) { mtx_lock(&dom_mtx); KASSERT(domain_init_status == 1, ("domainfinalize called too late!")); domain_init_status = 2; mtx_unlock(&dom_mtx); } SYSINIT(domainfin, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, domainfinalize, NULL); struct domain * pffinddomain(int family) { struct domain *dp; SLIST_FOREACH(dp, &domains, dom_next) if (dp->dom_family == family) return (dp); return (NULL); } struct protosw * pffindproto(int family, int type, int proto) { struct domain *dp; struct protosw *pr; dp = pffinddomain(family); if (dp == NULL) return (NULL); for (int i = 0; i < dp->dom_nprotosw; i++) if ((pr = dp->dom_protosw[i]) != NULL && pr->pr_type == type && (pr->pr_protocol == 0 || proto == 0 || pr->pr_protocol == proto)) return (pr); return (NULL); } /* * The caller must make sure that the new protocol is fully set up and ready to * accept requests before it is registered. */ int protosw_register(struct domain *dp, struct protosw *npr) { struct protosw **prp; MPASS(dp); MPASS(npr && npr->pr_type > 0 && npr->pr_protocol > 0); prp = NULL; /* * Protect us against races when two protocol registrations for * the same protocol happen at the same time. */ mtx_lock(&dom_mtx); for (int i = 0; i < dp->dom_nprotosw; i++) { if (dp->dom_protosw[i] == NULL) { /* Remember the first free spacer. */ if (prp == NULL) prp = &dp->dom_protosw[i]; } else { /* * The new protocol must not yet exist. * XXXAO: Check only protocol? * XXXGL: Maybe assert that it doesn't exist? */ if ((dp->dom_protosw[i]->pr_type == npr->pr_type) && (dp->dom_protosw[i]->pr_protocol == npr->pr_protocol)) { mtx_unlock(&dom_mtx); return (EEXIST); } } } /* If no free spacer is found we can't add the new protocol. */ if (prp == NULL) { mtx_unlock(&dom_mtx); return (ENOMEM); } pr_init(dp, npr); *prp = npr; mtx_unlock(&dom_mtx); return (0); } /* * The caller must make sure the protocol and its functions correctly shut down * all sockets and release all locks and memory references. */ int protosw_unregister(struct protosw *pr) { struct domain *dp; struct protosw **prp; dp = pr->pr_domain; prp = NULL; mtx_lock(&dom_mtx); /* The protocol must exist and only once. */ for (int i = 0; i < dp->dom_nprotosw; i++) { if (dp->dom_protosw[i] == pr) { KASSERT(prp == NULL, ("%s: domain %p protocol %p registered twice\n", __func__, dp, pr)); prp = &dp->dom_protosw[i]; } } /* Protocol does not exist. XXXGL: assert that it does? */ if (prp == NULL) { mtx_unlock(&dom_mtx); return (EPROTONOSUPPORT); } /* De-orbit the protocol and make the slot available again. */ *prp = NULL; mtx_unlock(&dom_mtx); return (0); } diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index b59051ae3350..d16c0049dc43 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -1,4399 +1,4404 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1988, 1990, 1993 * The Regents of the University of California. * Copyright (c) 2004 The FreeBSD Foundation * Copyright (c) 2004-2008 Robert N. M. Watson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Comments on the socket life cycle: * * soalloc() sets of socket layer state for a socket, called only by * socreate() and sonewconn(). Socket layer private. * * sodealloc() tears down socket layer state for a socket, called only by * sofree() and sonewconn(). Socket layer private. * * pru_attach() associates protocol layer state with an allocated socket; * called only once, may fail, aborting socket allocation. This is called * from socreate() and sonewconn(). Socket layer private. * * pru_detach() disassociates protocol layer state from an attached socket, * and will be called exactly once for sockets in which pru_attach() has * been successfully called. If pru_attach() returned an error, * pru_detach() will not be called. Socket layer private. * * pru_abort() and pru_close() notify the protocol layer that the last * consumer of a socket is starting to tear down the socket, and that the * protocol should terminate the connection. Historically, pru_abort() also * detached protocol state from the socket state, but this is no longer the * case. * * socreate() creates a socket and attaches protocol state. This is a public * interface that may be used by socket layer consumers to create new * sockets. * * sonewconn() creates a socket and attaches protocol state. This is a * public interface that may be used by protocols to create new sockets when * a new connection is received and will be available for accept() on a * listen socket. * * soclose() destroys a socket after possibly waiting for it to disconnect. * This is a public interface that socket consumers should use to close and * release a socket when done with it. * * soabort() destroys a socket without waiting for it to disconnect (used * only for incoming connections that are already partially or fully * connected). This is used internally by the socket layer when clearing * listen socket queues (due to overflow or close on the listen socket), but * is also a public interface protocols may use to abort connections in * their incomplete listen queues should they no longer be required. Sockets * placed in completed connection listen queues should not be aborted for * reasons described in the comment above the soclose() implementation. This * is not a general purpose close routine, and except in the specific * circumstances described here, should not be used. * * sofree() will free a socket and its protocol state if all references on * the socket have been released, and is the public interface to attempt to * free a socket when a reference is removed. This is a socket layer private * interface. * * NOTE: In addition to socreate() and soclose(), which provide a single * socket reference to the consumer to be managed as required, there are two * calls to explicitly manage socket references, soref(), and sorele(). * Currently, these are generally required only when transitioning a socket * from a listen queue to a file descriptor, in order to prevent garbage * collection of the socket at an untimely moment. For a number of reasons, * these interfaces are not preferred, and should be avoided. * * NOTE: With regard to VNETs the general rule is that callers do not set * curvnet. Exceptions to this rule include soabort(), sodisconnect(), * sofree() (and with that sorele(), sotryfree()), as well as sonewconn() * and sorflush(), which are usually called from a pre-set VNET context. * sopoll() currently does not need a VNET context to be set. */ #include #include "opt_inet.h" #include "opt_inet6.h" #include "opt_kern_tls.h" #include "opt_sctp.h" #include #include #include #include #include #include #include #include #include #include #include #include /* for struct knote */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef COMPAT_FREEBSD32 #include #include #include #endif static int soreceive_rcvoob(struct socket *so, struct uio *uio, int flags); static void so_rdknl_lock(void *); static void so_rdknl_unlock(void *); static void so_rdknl_assert_lock(void *, int); static void so_wrknl_lock(void *); static void so_wrknl_unlock(void *); static void so_wrknl_assert_lock(void *, int); static void filt_sordetach(struct knote *kn); static int filt_soread(struct knote *kn, long hint); static void filt_sowdetach(struct knote *kn); static int filt_sowrite(struct knote *kn, long hint); static int filt_soempty(struct knote *kn, long hint); static int inline hhook_run_socket(struct socket *so, void *hctx, int32_t h_id); fo_kqfilter_t soo_kqfilter; static struct filterops soread_filtops = { .f_isfd = 1, .f_detach = filt_sordetach, .f_event = filt_soread, }; static struct filterops sowrite_filtops = { .f_isfd = 1, .f_detach = filt_sowdetach, .f_event = filt_sowrite, }; static struct filterops soempty_filtops = { .f_isfd = 1, .f_detach = filt_sowdetach, .f_event = filt_soempty, }; so_gen_t so_gencnt; /* generation count for sockets */ MALLOC_DEFINE(M_SONAME, "soname", "socket name"); MALLOC_DEFINE(M_PCB, "pcb", "protocol control block"); #define VNET_SO_ASSERT(so) \ VNET_ASSERT(curvnet != NULL, \ ("%s:%d curvnet is NULL, so=%p", __func__, __LINE__, (so))); VNET_DEFINE(struct hhook_head *, socket_hhh[HHOOK_SOCKET_LAST + 1]); #define V_socket_hhh VNET(socket_hhh) /* * Limit on the number of connections in the listen queue waiting * for accept(2). * NB: The original sysctl somaxconn is still available but hidden * to prevent confusion about the actual purpose of this number. */ static u_int somaxconn = SOMAXCONN; static int sysctl_somaxconn(SYSCTL_HANDLER_ARGS) { int error; int val; val = somaxconn; error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr ) return (error); /* * The purpose of the UINT_MAX / 3 limit, is so that the formula * 3 * so_qlimit / 2 * below, will not overflow. */ if (val < 1 || val > UINT_MAX / 3) return (EINVAL); somaxconn = val; return (0); } SYSCTL_PROC(_kern_ipc, OID_AUTO, soacceptqueue, CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, sizeof(int), sysctl_somaxconn, "I", "Maximum listen socket pending connection accept queue size"); SYSCTL_PROC(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_SKIP | CTLFLAG_MPSAFE, 0, sizeof(int), sysctl_somaxconn, "I", "Maximum listen socket pending connection accept queue size (compat)"); static int numopensockets; SYSCTL_INT(_kern_ipc, OID_AUTO, numopensockets, CTLFLAG_RD, &numopensockets, 0, "Number of open sockets"); /* * so_global_mtx protects so_gencnt, numopensockets, and the per-socket * so_gencnt field. */ static struct mtx so_global_mtx; MTX_SYSINIT(so_global_mtx, &so_global_mtx, "so_glabel", MTX_DEF); /* * General IPC sysctl name space, used by sockets and a variety of other IPC * types. */ SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "IPC"); /* * Initialize the socket subsystem and set up the socket * memory allocator. */ static uma_zone_t socket_zone; int maxsockets; static void socket_zone_change(void *tag) { maxsockets = uma_zone_set_max(socket_zone, maxsockets); } static void socket_hhook_register(int subtype) { if (hhook_head_register(HHOOK_TYPE_SOCKET, subtype, &V_socket_hhh[subtype], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0) printf("%s: WARNING: unable to register hook\n", __func__); } static void socket_hhook_deregister(int subtype) { if (hhook_head_deregister(V_socket_hhh[subtype]) != 0) printf("%s: WARNING: unable to deregister hook\n", __func__); } static void socket_init(void *tag) { socket_zone = uma_zcreate("socket", sizeof(struct socket), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); maxsockets = uma_zone_set_max(socket_zone, maxsockets); uma_zone_set_warning(socket_zone, "kern.ipc.maxsockets limit reached"); EVENTHANDLER_REGISTER(maxsockets_change, socket_zone_change, NULL, EVENTHANDLER_PRI_FIRST); } SYSINIT(socket, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, socket_init, NULL); static void socket_vnet_init(const void *unused __unused) { int i; /* We expect a contiguous range */ for (i = 0; i <= HHOOK_SOCKET_LAST; i++) socket_hhook_register(i); } VNET_SYSINIT(socket_vnet_init, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, socket_vnet_init, NULL); static void socket_vnet_uninit(const void *unused __unused) { int i; for (i = 0; i <= HHOOK_SOCKET_LAST; i++) socket_hhook_deregister(i); } VNET_SYSUNINIT(socket_vnet_uninit, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, socket_vnet_uninit, NULL); /* * Initialise maxsockets. This SYSINIT must be run after * tunable_mbinit(). */ static void init_maxsockets(void *ignored) { TUNABLE_INT_FETCH("kern.ipc.maxsockets", &maxsockets); maxsockets = imax(maxsockets, maxfiles); } SYSINIT(param, SI_SUB_TUNABLES, SI_ORDER_ANY, init_maxsockets, NULL); /* * Sysctl to get and set the maximum global sockets limit. Notify protocols * of the change so that they can update their dependent limits as required. */ static int sysctl_maxsockets(SYSCTL_HANDLER_ARGS) { int error, newmaxsockets; newmaxsockets = maxsockets; error = sysctl_handle_int(oidp, &newmaxsockets, 0, req); if (error == 0 && req->newptr && newmaxsockets != maxsockets) { if (newmaxsockets > maxsockets && newmaxsockets <= maxfiles) { maxsockets = newmaxsockets; EVENTHANDLER_INVOKE(maxsockets_change); } else error = EINVAL; } return (error); } SYSCTL_PROC(_kern_ipc, OID_AUTO, maxsockets, CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE, &maxsockets, 0, sysctl_maxsockets, "IU", "Maximum number of sockets available"); /* * Socket operation routines. These routines are called by the routines in * sys_socket.c or from a system process, and implement the semantics of * socket operations by switching out to the protocol specific routines. */ /* * Get a socket structure from our zone, and initialize it. Note that it * would probably be better to allocate socket and PCB at the same time, but * I'm not convinced that all the protocols can be easily modified to do * this. * * soalloc() returns a socket with a ref count of 0. */ static struct socket * soalloc(struct vnet *vnet) { struct socket *so; so = uma_zalloc(socket_zone, M_NOWAIT | M_ZERO); if (so == NULL) return (NULL); #ifdef MAC if (mac_socket_init(so, M_NOWAIT) != 0) { uma_zfree(socket_zone, so); return (NULL); } #endif if (khelp_init_osd(HELPER_CLASS_SOCKET, &so->osd)) { uma_zfree(socket_zone, so); return (NULL); } /* * The socket locking protocol allows to lock 2 sockets at a time, * however, the first one must be a listening socket. WITNESS lacks * a feature to change class of an existing lock, so we use DUPOK. */ mtx_init(&so->so_lock, "socket", NULL, MTX_DEF | MTX_DUPOK); mtx_init(&so->so_snd_mtx, "so_snd", NULL, MTX_DEF); mtx_init(&so->so_rcv_mtx, "so_rcv", NULL, MTX_DEF); so->so_rcv.sb_sel = &so->so_rdsel; so->so_snd.sb_sel = &so->so_wrsel; sx_init(&so->so_snd_sx, "so_snd_sx"); sx_init(&so->so_rcv_sx, "so_rcv_sx"); TAILQ_INIT(&so->so_snd.sb_aiojobq); TAILQ_INIT(&so->so_rcv.sb_aiojobq); TASK_INIT(&so->so_snd.sb_aiotask, 0, soaio_snd, so); TASK_INIT(&so->so_rcv.sb_aiotask, 0, soaio_rcv, so); #ifdef VIMAGE VNET_ASSERT(vnet != NULL, ("%s:%d vnet is NULL, so=%p", __func__, __LINE__, so)); so->so_vnet = vnet; #endif /* We shouldn't need the so_global_mtx */ if (hhook_run_socket(so, NULL, HHOOK_SOCKET_CREATE)) { /* Do we need more comprehensive error returns? */ uma_zfree(socket_zone, so); return (NULL); } mtx_lock(&so_global_mtx); so->so_gencnt = ++so_gencnt; ++numopensockets; #ifdef VIMAGE vnet->vnet_sockcnt++; #endif mtx_unlock(&so_global_mtx); return (so); } /* * Free the storage associated with a socket at the socket layer, tear down * locks, labels, etc. All protocol state is assumed already to have been * torn down (and possibly never set up) by the caller. */ void sodealloc(struct socket *so) { KASSERT(so->so_count == 0, ("sodealloc(): so_count %d", so->so_count)); KASSERT(so->so_pcb == NULL, ("sodealloc(): so_pcb != NULL")); mtx_lock(&so_global_mtx); so->so_gencnt = ++so_gencnt; --numopensockets; /* Could be below, but faster here. */ #ifdef VIMAGE VNET_ASSERT(so->so_vnet != NULL, ("%s:%d so_vnet is NULL, so=%p", __func__, __LINE__, so)); so->so_vnet->vnet_sockcnt--; #endif mtx_unlock(&so_global_mtx); #ifdef MAC mac_socket_destroy(so); #endif hhook_run_socket(so, NULL, HHOOK_SOCKET_CLOSE); khelp_destroy_osd(&so->osd); if (SOLISTENING(so)) { if (so->sol_accept_filter != NULL) accept_filt_setopt(so, NULL); } else { if (so->so_rcv.sb_hiwat) (void)chgsbsize(so->so_cred->cr_uidinfo, &so->so_rcv.sb_hiwat, 0, RLIM_INFINITY); if (so->so_snd.sb_hiwat) (void)chgsbsize(so->so_cred->cr_uidinfo, &so->so_snd.sb_hiwat, 0, RLIM_INFINITY); sx_destroy(&so->so_snd_sx); sx_destroy(&so->so_rcv_sx); mtx_destroy(&so->so_snd_mtx); mtx_destroy(&so->so_rcv_mtx); } crfree(so->so_cred); mtx_destroy(&so->so_lock); uma_zfree(socket_zone, so); } /* * socreate returns a socket with a ref count of 1 and a file descriptor * reference. The socket should be closed with soclose(). */ int socreate(int dom, struct socket **aso, int type, int proto, struct ucred *cred, struct thread *td) { struct protosw *prp; struct socket *so; int error; /* * XXX: divert(4) historically abused PF_INET. Keep this compatibility * shim until all applications have been updated. */ if (__predict_false(dom == PF_INET && type == SOCK_RAW && proto == IPPROTO_DIVERT)) { dom = PF_DIVERT; printf("%s uses obsolete way to create divert(4) socket\n", td->td_proc->p_comm); } prp = pffindproto(dom, type, proto); if (prp == NULL) { /* No support for domain. */ if (pffinddomain(dom) == NULL) return (EAFNOSUPPORT); /* No support for socket type. */ if (proto == 0 && type != 0) return (EPROTOTYPE); return (EPROTONOSUPPORT); } MPASS(prp->pr_attach); if (IN_CAPABILITY_MODE(td) && (prp->pr_flags & PR_CAPATTACH) == 0) return (ECAPMODE); if (prison_check_af(cred, prp->pr_domain->dom_family) != 0) return (EPROTONOSUPPORT); so = soalloc(CRED_TO_VNET(cred)); if (so == NULL) return (ENOBUFS); so->so_type = type; so->so_cred = crhold(cred); if ((prp->pr_domain->dom_family == PF_INET) || (prp->pr_domain->dom_family == PF_INET6) || (prp->pr_domain->dom_family == PF_ROUTE)) so->so_fibnum = td->td_proc->p_fibnum; else so->so_fibnum = 0; so->so_proto = prp; #ifdef MAC mac_socket_create(cred, so); #endif knlist_init(&so->so_rdsel.si_note, so, so_rdknl_lock, so_rdknl_unlock, so_rdknl_assert_lock); knlist_init(&so->so_wrsel.si_note, so, so_wrknl_lock, so_wrknl_unlock, so_wrknl_assert_lock); if ((prp->pr_flags & PR_SOCKBUF) == 0) { so->so_snd.sb_mtx = &so->so_snd_mtx; so->so_rcv.sb_mtx = &so->so_rcv_mtx; } /* * Auto-sizing of socket buffers is managed by the protocols and * the appropriate flags must be set in the pru_attach function. */ CURVNET_SET(so->so_vnet); error = prp->pr_attach(so, proto, td); CURVNET_RESTORE(); if (error) { sodealloc(so); return (error); } soref(so); *aso = so; return (0); } #ifdef REGRESSION static int regression_sonewconn_earlytest = 1; SYSCTL_INT(_regression, OID_AUTO, sonewconn_earlytest, CTLFLAG_RW, ®ression_sonewconn_earlytest, 0, "Perform early sonewconn limit test"); #endif static int sooverprio = LOG_DEBUG; SYSCTL_INT(_kern_ipc, OID_AUTO, sooverprio, CTLFLAG_RW, &sooverprio, 0, "Log priority for listen socket overflows: 0..7 or -1 to disable"); static struct timeval overinterval = { 60, 0 }; SYSCTL_TIMEVAL_SEC(_kern_ipc, OID_AUTO, sooverinterval, CTLFLAG_RW, &overinterval, "Delay in seconds between warnings for listen socket overflows"); /* * When an attempt at a new connection is noted on a socket which supports * accept(2), the protocol has two options: * 1) Call legacy sonewconn() function, which would call protocol attach * method, same as used for socket(2). * 2) Call solisten_clone(), do attach that is specific to a cloned connection, * and then call solisten_enqueue(). * * Note: the ref count on the socket is 0 on return. */ struct socket * solisten_clone(struct socket *head) { struct sbuf descrsb; struct socket *so; int len, overcount; u_int qlen; const char localprefix[] = "local:"; char descrbuf[SUNPATHLEN + sizeof(localprefix)]; #if defined(INET6) char addrbuf[INET6_ADDRSTRLEN]; #elif defined(INET) char addrbuf[INET_ADDRSTRLEN]; #endif bool dolog, over; SOLISTEN_LOCK(head); over = (head->sol_qlen > 3 * head->sol_qlimit / 2); #ifdef REGRESSION if (regression_sonewconn_earlytest && over) { #else if (over) { #endif head->sol_overcount++; dolog = (sooverprio >= 0) && !!ratecheck(&head->sol_lastover, &overinterval); /* * If we're going to log, copy the overflow count and queue * length from the listen socket before dropping the lock. * Also, reset the overflow count. */ if (dolog) { overcount = head->sol_overcount; head->sol_overcount = 0; qlen = head->sol_qlen; } SOLISTEN_UNLOCK(head); if (dolog) { /* * Try to print something descriptive about the * socket for the error message. */ sbuf_new(&descrsb, descrbuf, sizeof(descrbuf), SBUF_FIXEDLEN); switch (head->so_proto->pr_domain->dom_family) { #if defined(INET) || defined(INET6) #ifdef INET case AF_INET: #endif #ifdef INET6 case AF_INET6: if (head->so_proto->pr_domain->dom_family == AF_INET6 || (sotoinpcb(head)->inp_inc.inc_flags & INC_ISIPV6)) { ip6_sprintf(addrbuf, &sotoinpcb(head)->inp_inc.inc6_laddr); sbuf_printf(&descrsb, "[%s]", addrbuf); } else #endif { #ifdef INET inet_ntoa_r( sotoinpcb(head)->inp_inc.inc_laddr, addrbuf); sbuf_cat(&descrsb, addrbuf); #endif } sbuf_printf(&descrsb, ":%hu (proto %u)", ntohs(sotoinpcb(head)->inp_inc.inc_lport), head->so_proto->pr_protocol); break; #endif /* INET || INET6 */ case AF_UNIX: sbuf_cat(&descrsb, localprefix); if (sotounpcb(head)->unp_addr != NULL) len = sotounpcb(head)->unp_addr->sun_len - offsetof(struct sockaddr_un, sun_path); else len = 0; if (len > 0) sbuf_bcat(&descrsb, sotounpcb(head)->unp_addr->sun_path, len); else sbuf_cat(&descrsb, "(unknown)"); break; } /* * If we can't print something more specific, at least * print the domain name. */ if (sbuf_finish(&descrsb) != 0 || sbuf_len(&descrsb) <= 0) { sbuf_clear(&descrsb); sbuf_cat(&descrsb, head->so_proto->pr_domain->dom_name ?: "unknown"); sbuf_finish(&descrsb); } KASSERT(sbuf_len(&descrsb) > 0, ("%s: sbuf creation failed", __func__)); /* * Preserve the historic listen queue overflow log * message, that starts with "sonewconn:". It has * been known to sysadmins for years and also test * sys/kern/sonewconn_overflow checks for it. */ if (head->so_cred == 0) { log(LOG_PRI(sooverprio), "sonewconn: pcb %p (%s): " "Listen queue overflow: %i already in " "queue awaiting acceptance (%d " "occurrences)\n", head->so_pcb, sbuf_data(&descrsb), qlen, overcount); } else { log(LOG_PRI(sooverprio), "sonewconn: pcb %p (%s): " "Listen queue overflow: " "%i already in queue awaiting acceptance " "(%d occurrences), euid %d, rgid %d, jail %s\n", head->so_pcb, sbuf_data(&descrsb), qlen, overcount, head->so_cred->cr_uid, head->so_cred->cr_rgid, head->so_cred->cr_prison ? head->so_cred->cr_prison->pr_name : "not_jailed"); } sbuf_delete(&descrsb); overcount = 0; } return (NULL); } SOLISTEN_UNLOCK(head); VNET_ASSERT(head->so_vnet != NULL, ("%s: so %p vnet is NULL", __func__, head)); so = soalloc(head->so_vnet); if (so == NULL) { log(LOG_DEBUG, "%s: pcb %p: New socket allocation failure: " "limit reached or out of memory\n", __func__, head->so_pcb); return (NULL); } so->so_listen = head; so->so_type = head->so_type; /* * POSIX is ambiguous on what options an accept(2)ed socket should * inherit from the listener. Words "create a new socket" may be * interpreted as not inheriting anything. Best programming practice * for application developers is to not rely on such inheritance. * FreeBSD had historically inherited all so_options excluding * SO_ACCEPTCONN, which virtually means all SOL_SOCKET level options, * including those completely irrelevant to a new born socket. For * compatibility with older versions we will inherit a list of * meaningful options. */ so->so_options = head->so_options & (SO_KEEPALIVE | SO_DONTROUTE | SO_LINGER | SO_OOBINLINE | SO_NOSIGPIPE); so->so_linger = head->so_linger; so->so_state = head->so_state; so->so_fibnum = head->so_fibnum; so->so_proto = head->so_proto; so->so_cred = crhold(head->so_cred); #ifdef MAC mac_socket_newconn(head, so); #endif knlist_init(&so->so_rdsel.si_note, so, so_rdknl_lock, so_rdknl_unlock, so_rdknl_assert_lock); knlist_init(&so->so_wrsel.si_note, so, so_wrknl_lock, so_wrknl_unlock, so_wrknl_assert_lock); VNET_SO_ASSERT(head); if (soreserve(so, head->sol_sbsnd_hiwat, head->sol_sbrcv_hiwat)) { sodealloc(so); log(LOG_DEBUG, "%s: pcb %p: soreserve() failed\n", __func__, head->so_pcb); return (NULL); } so->so_rcv.sb_lowat = head->sol_sbrcv_lowat; so->so_snd.sb_lowat = head->sol_sbsnd_lowat; so->so_rcv.sb_timeo = head->sol_sbrcv_timeo; so->so_snd.sb_timeo = head->sol_sbsnd_timeo; so->so_rcv.sb_flags = head->sol_sbrcv_flags & SB_AUTOSIZE; so->so_snd.sb_flags = head->sol_sbsnd_flags & SB_AUTOSIZE; if ((so->so_proto->pr_flags & PR_SOCKBUF) == 0) { so->so_snd.sb_mtx = &so->so_snd_mtx; so->so_rcv.sb_mtx = &so->so_rcv_mtx; } return (so); } /* Connstatus may be 0, or SS_ISCONFIRMING, or SS_ISCONNECTED. */ struct socket * sonewconn(struct socket *head, int connstatus) { struct socket *so; if ((so = solisten_clone(head)) == NULL) return (NULL); if (so->so_proto->pr_attach(so, 0, NULL) != 0) { sodealloc(so); log(LOG_DEBUG, "%s: pcb %p: pr_attach() failed\n", __func__, head->so_pcb); return (NULL); } (void)solisten_enqueue(so, connstatus); return (so); } /* * Enqueue socket cloned by solisten_clone() to the listen queue of the * listener it has been cloned from. * * Return 'true' if socket landed on complete queue, otherwise 'false'. */ bool solisten_enqueue(struct socket *so, int connstatus) { struct socket *head = so->so_listen; MPASS(refcount_load(&so->so_count) == 0); refcount_init(&so->so_count, 1); SOLISTEN_LOCK(head); if (head->sol_accept_filter != NULL) connstatus = 0; so->so_state |= connstatus; soref(head); /* A socket on (in)complete queue refs head. */ if (connstatus) { TAILQ_INSERT_TAIL(&head->sol_comp, so, so_list); so->so_qstate = SQ_COMP; head->sol_qlen++; solisten_wakeup(head); /* unlocks */ return (true); } else { /* * Keep removing sockets from the head until there's room for * us to insert on the tail. In pre-locking revisions, this * was a simple if(), but as we could be racing with other * threads and soabort() requires dropping locks, we must * loop waiting for the condition to be true. */ while (head->sol_incqlen > head->sol_qlimit) { struct socket *sp; sp = TAILQ_FIRST(&head->sol_incomp); TAILQ_REMOVE(&head->sol_incomp, sp, so_list); head->sol_incqlen--; SOCK_LOCK(sp); sp->so_qstate = SQ_NONE; sp->so_listen = NULL; SOCK_UNLOCK(sp); sorele_locked(head); /* does SOLISTEN_UNLOCK, head stays */ soabort(sp); SOLISTEN_LOCK(head); } TAILQ_INSERT_TAIL(&head->sol_incomp, so, so_list); so->so_qstate = SQ_INCOMP; head->sol_incqlen++; SOLISTEN_UNLOCK(head); return (false); } } #if defined(SCTP) || defined(SCTP_SUPPORT) /* * Socket part of sctp_peeloff(). Detach a new socket from an * association. The new socket is returned with a reference. * * XXXGL: reduce copy-paste with solisten_clone(). */ struct socket * sopeeloff(struct socket *head) { struct socket *so; VNET_ASSERT(head->so_vnet != NULL, ("%s:%d so_vnet is NULL, head=%p", __func__, __LINE__, head)); so = soalloc(head->so_vnet); if (so == NULL) { log(LOG_DEBUG, "%s: pcb %p: New socket allocation failure: " "limit reached or out of memory\n", __func__, head->so_pcb); return (NULL); } so->so_type = head->so_type; so->so_options = head->so_options; so->so_linger = head->so_linger; so->so_state = (head->so_state & SS_NBIO) | SS_ISCONNECTED; so->so_fibnum = head->so_fibnum; so->so_proto = head->so_proto; so->so_cred = crhold(head->so_cred); #ifdef MAC mac_socket_newconn(head, so); #endif knlist_init(&so->so_rdsel.si_note, so, so_rdknl_lock, so_rdknl_unlock, so_rdknl_assert_lock); knlist_init(&so->so_wrsel.si_note, so, so_wrknl_lock, so_wrknl_unlock, so_wrknl_assert_lock); VNET_SO_ASSERT(head); if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) { sodealloc(so); log(LOG_DEBUG, "%s: pcb %p: soreserve() failed\n", __func__, head->so_pcb); return (NULL); } if ((*so->so_proto->pr_attach)(so, 0, NULL)) { sodealloc(so); log(LOG_DEBUG, "%s: pcb %p: pru_attach() failed\n", __func__, head->so_pcb); return (NULL); } so->so_rcv.sb_lowat = head->so_rcv.sb_lowat; so->so_snd.sb_lowat = head->so_snd.sb_lowat; so->so_rcv.sb_timeo = head->so_rcv.sb_timeo; so->so_snd.sb_timeo = head->so_snd.sb_timeo; so->so_rcv.sb_flags |= head->so_rcv.sb_flags & SB_AUTOSIZE; so->so_snd.sb_flags |= head->so_snd.sb_flags & SB_AUTOSIZE; soref(so); return (so); } #endif /* SCTP */ int sobind(struct socket *so, struct sockaddr *nam, struct thread *td) { int error; CURVNET_SET(so->so_vnet); error = so->so_proto->pr_bind(so, nam, td); CURVNET_RESTORE(); return (error); } int sobindat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td) { int error; CURVNET_SET(so->so_vnet); error = so->so_proto->pr_bindat(fd, so, nam, td); CURVNET_RESTORE(); return (error); } /* * solisten() transitions a socket from a non-listening state to a listening * state, but can also be used to update the listen queue depth on an * existing listen socket. The protocol will call back into the sockets * layer using solisten_proto_check() and solisten_proto() to check and set * socket-layer listen state. Call backs are used so that the protocol can * acquire both protocol and socket layer locks in whatever order is required * by the protocol. * * Protocol implementors are advised to hold the socket lock across the * socket-layer test and set to avoid races at the socket layer. */ int solisten(struct socket *so, int backlog, struct thread *td) { int error; CURVNET_SET(so->so_vnet); error = so->so_proto->pr_listen(so, backlog, td); CURVNET_RESTORE(); return (error); } /* * Prepare for a call to solisten_proto(). Acquire all socket buffer locks in * order to interlock with socket I/O. */ int solisten_proto_check(struct socket *so) { SOCK_LOCK_ASSERT(so); if ((so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) != 0) return (EINVAL); /* * Sleeping is not permitted here, so simply fail if userspace is * attempting to transmit or receive on the socket. This kind of * transient failure is not ideal, but it should occur only if userspace * is misusing the socket interfaces. */ if (!sx_try_xlock(&so->so_snd_sx)) return (EAGAIN); if (!sx_try_xlock(&so->so_rcv_sx)) { sx_xunlock(&so->so_snd_sx); return (EAGAIN); } mtx_lock(&so->so_snd_mtx); mtx_lock(&so->so_rcv_mtx); /* Interlock with soo_aio_queue() and KTLS. */ if (!SOLISTENING(so)) { bool ktls; #ifdef KERN_TLS ktls = so->so_snd.sb_tls_info != NULL || so->so_rcv.sb_tls_info != NULL; #else ktls = false; #endif if (ktls || (so->so_snd.sb_flags & (SB_AIO | SB_AIO_RUNNING)) != 0 || (so->so_rcv.sb_flags & (SB_AIO | SB_AIO_RUNNING)) != 0) { solisten_proto_abort(so); return (EINVAL); } } return (0); } /* * Undo the setup done by solisten_proto_check(). */ void solisten_proto_abort(struct socket *so) { mtx_unlock(&so->so_snd_mtx); mtx_unlock(&so->so_rcv_mtx); sx_xunlock(&so->so_snd_sx); sx_xunlock(&so->so_rcv_sx); } void solisten_proto(struct socket *so, int backlog) { int sbrcv_lowat, sbsnd_lowat; u_int sbrcv_hiwat, sbsnd_hiwat; short sbrcv_flags, sbsnd_flags; sbintime_t sbrcv_timeo, sbsnd_timeo; SOCK_LOCK_ASSERT(so); KASSERT((so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) == 0, ("%s: bad socket state %p", __func__, so)); if (SOLISTENING(so)) goto listening; /* * Change this socket to listening state. */ sbrcv_lowat = so->so_rcv.sb_lowat; sbsnd_lowat = so->so_snd.sb_lowat; sbrcv_hiwat = so->so_rcv.sb_hiwat; sbsnd_hiwat = so->so_snd.sb_hiwat; sbrcv_flags = so->so_rcv.sb_flags; sbsnd_flags = so->so_snd.sb_flags; sbrcv_timeo = so->so_rcv.sb_timeo; sbsnd_timeo = so->so_snd.sb_timeo; sbdestroy(so, SO_SND); sbdestroy(so, SO_RCV); #ifdef INVARIANTS bzero(&so->so_rcv, sizeof(struct socket) - offsetof(struct socket, so_rcv)); #endif so->sol_sbrcv_lowat = sbrcv_lowat; so->sol_sbsnd_lowat = sbsnd_lowat; so->sol_sbrcv_hiwat = sbrcv_hiwat; so->sol_sbsnd_hiwat = sbsnd_hiwat; so->sol_sbrcv_flags = sbrcv_flags; so->sol_sbsnd_flags = sbsnd_flags; so->sol_sbrcv_timeo = sbrcv_timeo; so->sol_sbsnd_timeo = sbsnd_timeo; so->sol_qlen = so->sol_incqlen = 0; TAILQ_INIT(&so->sol_incomp); TAILQ_INIT(&so->sol_comp); so->sol_accept_filter = NULL; so->sol_accept_filter_arg = NULL; so->sol_accept_filter_str = NULL; so->sol_upcall = NULL; so->sol_upcallarg = NULL; so->so_options |= SO_ACCEPTCONN; listening: if (backlog < 0 || backlog > somaxconn) backlog = somaxconn; so->sol_qlimit = backlog; mtx_unlock(&so->so_snd_mtx); mtx_unlock(&so->so_rcv_mtx); sx_xunlock(&so->so_snd_sx); sx_xunlock(&so->so_rcv_sx); } /* * Wakeup listeners/subsystems once we have a complete connection. * Enters with lock, returns unlocked. */ void solisten_wakeup(struct socket *sol) { if (sol->sol_upcall != NULL) (void )sol->sol_upcall(sol, sol->sol_upcallarg, M_NOWAIT); else { selwakeuppri(&sol->so_rdsel, PSOCK); KNOTE_LOCKED(&sol->so_rdsel.si_note, 0); } SOLISTEN_UNLOCK(sol); wakeup_one(&sol->sol_comp); if ((sol->so_state & SS_ASYNC) && sol->so_sigio != NULL) pgsigio(&sol->so_sigio, SIGIO, 0); } /* * Return single connection off a listening socket queue. Main consumer of * the function is kern_accept4(). Some modules, that do their own accept * management also use the function. The socket reference held by the * listen queue is handed to the caller. * * Listening socket must be locked on entry and is returned unlocked on * return. * The flags argument is set of accept4(2) flags and ACCEPT4_INHERIT. */ int solisten_dequeue(struct socket *head, struct socket **ret, int flags) { struct socket *so; int error; SOLISTEN_LOCK_ASSERT(head); while (!(head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->sol_comp) && head->so_error == 0) { error = msleep(&head->sol_comp, SOCK_MTX(head), PSOCK | PCATCH, "accept", 0); if (error != 0) { SOLISTEN_UNLOCK(head); return (error); } } if (head->so_error) { error = head->so_error; head->so_error = 0; } else if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->sol_comp)) error = EWOULDBLOCK; else error = 0; if (error) { SOLISTEN_UNLOCK(head); return (error); } so = TAILQ_FIRST(&head->sol_comp); SOCK_LOCK(so); KASSERT(so->so_qstate == SQ_COMP, ("%s: so %p not SQ_COMP", __func__, so)); head->sol_qlen--; so->so_qstate = SQ_NONE; so->so_listen = NULL; TAILQ_REMOVE(&head->sol_comp, so, so_list); if (flags & ACCEPT4_INHERIT) so->so_state |= (head->so_state & SS_NBIO); else so->so_state |= (flags & SOCK_NONBLOCK) ? SS_NBIO : 0; SOCK_UNLOCK(so); sorele_locked(head); *ret = so; return (0); } /* * Free socket upon release of the very last reference. */ static void sofree(struct socket *so) { struct protosw *pr = so->so_proto; SOCK_LOCK_ASSERT(so); KASSERT(refcount_load(&so->so_count) == 0, ("%s: so %p has references", __func__, so)); KASSERT(SOLISTENING(so) || so->so_qstate == SQ_NONE, ("%s: so %p is on listen queue", __func__, so)); SOCK_UNLOCK(so); if (so->so_dtor != NULL) so->so_dtor(so); VNET_SO_ASSERT(so); if ((pr->pr_flags & PR_RIGHTS) && !SOLISTENING(so)) { MPASS(pr->pr_domain->dom_dispose != NULL); (*pr->pr_domain->dom_dispose)(so); } if (pr->pr_detach != NULL) pr->pr_detach(so); /* * From this point on, we assume that no other references to this * socket exist anywhere else in the stack. Therefore, no locks need * to be acquired or held. */ if (!(pr->pr_flags & PR_SOCKBUF) && !SOLISTENING(so)) { sbdestroy(so, SO_SND); sbdestroy(so, SO_RCV); } seldrain(&so->so_rdsel); seldrain(&so->so_wrsel); knlist_destroy(&so->so_rdsel.si_note); knlist_destroy(&so->so_wrsel.si_note); sodealloc(so); } /* * Release a reference on a socket while holding the socket lock. * Unlocks the socket lock before returning. */ void sorele_locked(struct socket *so) { SOCK_LOCK_ASSERT(so); if (refcount_release(&so->so_count)) sofree(so); else SOCK_UNLOCK(so); } /* * Close a socket on last file table reference removal. Initiate disconnect * if connected. Free socket when disconnect complete. * * This function will sorele() the socket. Note that soclose() may be called * prior to the ref count reaching zero. The actual socket structure will * not be freed until the ref count reaches zero. */ int soclose(struct socket *so) { struct accept_queue lqueue; int error = 0; bool listening, last __diagused; CURVNET_SET(so->so_vnet); funsetown(&so->so_sigio); if (so->so_state & SS_ISCONNECTED) { if ((so->so_state & SS_ISDISCONNECTING) == 0) { error = sodisconnect(so); if (error) { if (error == ENOTCONN) error = 0; goto drop; } } if ((so->so_options & SO_LINGER) != 0 && so->so_linger != 0) { if ((so->so_state & SS_ISDISCONNECTING) && (so->so_state & SS_NBIO)) goto drop; while (so->so_state & SS_ISCONNECTED) { error = tsleep(&so->so_timeo, PSOCK | PCATCH, "soclos", so->so_linger * hz); if (error) break; } } } drop: if (so->so_proto->pr_close != NULL) so->so_proto->pr_close(so); SOCK_LOCK(so); if ((listening = SOLISTENING(so))) { struct socket *sp; TAILQ_INIT(&lqueue); TAILQ_SWAP(&lqueue, &so->sol_incomp, socket, so_list); TAILQ_CONCAT(&lqueue, &so->sol_comp, so_list); so->sol_qlen = so->sol_incqlen = 0; TAILQ_FOREACH(sp, &lqueue, so_list) { SOCK_LOCK(sp); sp->so_qstate = SQ_NONE; sp->so_listen = NULL; SOCK_UNLOCK(sp); last = refcount_release(&so->so_count); KASSERT(!last, ("%s: released last reference for %p", __func__, so)); } } sorele_locked(so); if (listening) { struct socket *sp, *tsp; TAILQ_FOREACH_SAFE(sp, &lqueue, so_list, tsp) soabort(sp); } CURVNET_RESTORE(); return (error); } /* * soabort() is used to abruptly tear down a connection, such as when a * resource limit is reached (listen queue depth exceeded), or if a listen * socket is closed while there are sockets waiting to be accepted. * * This interface is tricky, because it is called on an unreferenced socket, * and must be called only by a thread that has actually removed the socket * from the listen queue it was on. Likely this thread holds the last * reference on the socket and soabort() will proceed with sofree(). But * it might be not the last, as the sockets on the listen queues are seen * from the protocol side. * * This interface will call into the protocol code, so must not be called * with any socket locks held. Protocols do call it while holding their own * recursible protocol mutexes, but this is something that should be subject * to review in the future. * * Usually socket should have a single reference left, but this is not a * requirement. In the past, when we have had named references for file * descriptor and protocol, we asserted that none of them are being held. */ void soabort(struct socket *so) { VNET_SO_ASSERT(so); if (so->so_proto->pr_abort != NULL) so->so_proto->pr_abort(so); SOCK_LOCK(so); sorele_locked(so); } int -soaccept(struct socket *so, struct sockaddr **nam) +soaccept(struct socket *so, struct sockaddr *sa) { +#ifdef INVARIANTS + u_char len = sa->sa_len; +#endif int error; CURVNET_SET(so->so_vnet); - error = so->so_proto->pr_accept(so, nam); + error = so->so_proto->pr_accept(so, sa); + KASSERT(sa->sa_len <= len, + ("%s: protocol %p sockaddr overflow", __func__, so->so_proto)); CURVNET_RESTORE(); return (error); } int soconnect(struct socket *so, struct sockaddr *nam, struct thread *td) { return (soconnectat(AT_FDCWD, so, nam, td)); } int soconnectat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td) { int error; CURVNET_SET(so->so_vnet); /* * If protocol is connection-based, can only connect once. * Otherwise, if connected, try to disconnect first. This allows * user to disconnect by connecting to, e.g., a null address. * * Note, this check is racy and may need to be re-evaluated at the * protocol layer. */ if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && ((so->so_proto->pr_flags & PR_CONNREQUIRED) || (error = sodisconnect(so)))) { error = EISCONN; } else { /* * Prevent accumulated error from previous connection from * biting us. */ so->so_error = 0; if (fd == AT_FDCWD) { error = so->so_proto->pr_connect(so, nam, td); } else { error = so->so_proto->pr_connectat(fd, so, nam, td); } } CURVNET_RESTORE(); return (error); } int soconnect2(struct socket *so1, struct socket *so2) { int error; CURVNET_SET(so1->so_vnet); error = so1->so_proto->pr_connect2(so1, so2); CURVNET_RESTORE(); return (error); } int sodisconnect(struct socket *so) { int error; if ((so->so_state & SS_ISCONNECTED) == 0) return (ENOTCONN); if (so->so_state & SS_ISDISCONNECTING) return (EALREADY); VNET_SO_ASSERT(so); error = so->so_proto->pr_disconnect(so); return (error); } int sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags, struct thread *td) { long space; ssize_t resid; int clen = 0, error, dontroute; KASSERT(so->so_type == SOCK_DGRAM, ("sosend_dgram: !SOCK_DGRAM")); KASSERT(so->so_proto->pr_flags & PR_ATOMIC, ("sosend_dgram: !PR_ATOMIC")); if (uio != NULL) resid = uio->uio_resid; else resid = top->m_pkthdr.len; /* * In theory resid should be unsigned. However, space must be * signed, as it might be less than 0 if we over-committed, and we * must use a signed comparison of space and resid. On the other * hand, a negative resid causes us to loop sending 0-length * segments to the protocol. */ if (resid < 0) { error = EINVAL; goto out; } dontroute = (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0; if (td != NULL) td->td_ru.ru_msgsnd++; if (control != NULL) clen = control->m_len; SOCKBUF_LOCK(&so->so_snd); if (so->so_snd.sb_state & SBS_CANTSENDMORE) { SOCKBUF_UNLOCK(&so->so_snd); error = EPIPE; goto out; } if (so->so_error) { error = so->so_error; so->so_error = 0; SOCKBUF_UNLOCK(&so->so_snd); goto out; } if ((so->so_state & SS_ISCONNECTED) == 0) { /* * `sendto' and `sendmsg' is allowed on a connection-based * socket if it supports implied connect. Return ENOTCONN if * not connected and no address is supplied. */ if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { if ((so->so_state & SS_ISCONFIRMING) == 0 && !(resid == 0 && clen != 0)) { SOCKBUF_UNLOCK(&so->so_snd); error = ENOTCONN; goto out; } } else if (addr == NULL) { if (so->so_proto->pr_flags & PR_CONNREQUIRED) error = ENOTCONN; else error = EDESTADDRREQ; SOCKBUF_UNLOCK(&so->so_snd); goto out; } } /* * Do we need MSG_OOB support in SOCK_DGRAM? Signs here may be a * problem and need fixing. */ space = sbspace(&so->so_snd); if (flags & MSG_OOB) space += 1024; space -= clen; SOCKBUF_UNLOCK(&so->so_snd); if (resid > space) { error = EMSGSIZE; goto out; } if (uio == NULL) { resid = 0; if (flags & MSG_EOR) top->m_flags |= M_EOR; } else { /* * Copy the data from userland into a mbuf chain. * If no data is to be copied in, a single empty mbuf * is returned. */ top = m_uiotombuf(uio, M_WAITOK, space, max_hdr, (M_PKTHDR | ((flags & MSG_EOR) ? M_EOR : 0))); if (top == NULL) { error = EFAULT; /* only possible error */ goto out; } space -= resid - uio->uio_resid; resid = uio->uio_resid; } KASSERT(resid == 0, ("sosend_dgram: resid != 0")); /* * XXXRW: Frobbing SO_DONTROUTE here is even worse without sblock * than with. */ if (dontroute) { SOCK_LOCK(so); so->so_options |= SO_DONTROUTE; SOCK_UNLOCK(so); } /* * XXX all the SBS_CANTSENDMORE checks previously done could be out * of date. We could have received a reset packet in an interrupt or * maybe we slept while doing page faults in uiomove() etc. We could * probably recheck again inside the locking protection here, but * there are probably other places that this also happens. We must * rethink this. */ VNET_SO_ASSERT(so); error = so->so_proto->pr_send(so, (flags & MSG_OOB) ? PRUS_OOB : /* * If the user set MSG_EOF, the protocol understands this flag and * nothing left to send then use PRU_SEND_EOF instead of PRU_SEND. */ ((flags & MSG_EOF) && (so->so_proto->pr_flags & PR_IMPLOPCL) && (resid <= 0)) ? PRUS_EOF : /* If there is more to send set PRUS_MORETOCOME */ (flags & MSG_MORETOCOME) || (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0, top, addr, control, td); if (dontroute) { SOCK_LOCK(so); so->so_options &= ~SO_DONTROUTE; SOCK_UNLOCK(so); } clen = 0; control = NULL; top = NULL; out: if (top != NULL) m_freem(top); if (control != NULL) m_freem(control); return (error); } /* * Send on a socket. If send must go all at once and message is larger than * send buffering, then hard error. Lock against other senders. If must go * all at once and not enough room now, then inform user that this would * block and do nothing. Otherwise, if nonblocking, send as much as * possible. The data to be sent is described by "uio" if nonzero, otherwise * by the mbuf chain "top" (which must be null if uio is not). Data provided * in mbuf chain must be small enough to send all at once. * * Returns nonzero on error, timeout or signal; callers must check for short * counts if EINTR/ERESTART are returned. Data and control buffers are freed * on return. */ int sosend_generic(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags, struct thread *td) { long space; ssize_t resid; int clen = 0, error, dontroute; int atomic = sosendallatonce(so) || top; int pr_send_flag; #ifdef KERN_TLS struct ktls_session *tls; int tls_enq_cnt, tls_send_flag; uint8_t tls_rtype; tls = NULL; tls_rtype = TLS_RLTYPE_APP; #endif if (uio != NULL) resid = uio->uio_resid; else if ((top->m_flags & M_PKTHDR) != 0) resid = top->m_pkthdr.len; else resid = m_length(top, NULL); /* * In theory resid should be unsigned. However, space must be * signed, as it might be less than 0 if we over-committed, and we * must use a signed comparison of space and resid. On the other * hand, a negative resid causes us to loop sending 0-length * segments to the protocol. * * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM * type sockets since that's an error. */ if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) { error = EINVAL; goto out; } dontroute = (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && (so->so_proto->pr_flags & PR_ATOMIC); if (td != NULL) td->td_ru.ru_msgsnd++; if (control != NULL) clen = control->m_len; error = SOCK_IO_SEND_LOCK(so, SBLOCKWAIT(flags)); if (error) goto out; #ifdef KERN_TLS tls_send_flag = 0; tls = ktls_hold(so->so_snd.sb_tls_info); if (tls != NULL) { if (tls->mode == TCP_TLS_MODE_SW) tls_send_flag = PRUS_NOTREADY; if (control != NULL) { struct cmsghdr *cm = mtod(control, struct cmsghdr *); if (clen >= sizeof(*cm) && cm->cmsg_type == TLS_SET_RECORD_TYPE) { tls_rtype = *((uint8_t *)CMSG_DATA(cm)); clen = 0; m_freem(control); control = NULL; atomic = 1; } } if (resid == 0 && !ktls_permit_empty_frames(tls)) { error = EINVAL; goto release; } } #endif restart: do { SOCKBUF_LOCK(&so->so_snd); if (so->so_snd.sb_state & SBS_CANTSENDMORE) { SOCKBUF_UNLOCK(&so->so_snd); error = EPIPE; goto release; } if (so->so_error) { error = so->so_error; so->so_error = 0; SOCKBUF_UNLOCK(&so->so_snd); goto release; } if ((so->so_state & SS_ISCONNECTED) == 0) { /* * `sendto' and `sendmsg' is allowed on a connection- * based socket if it supports implied connect. * Return ENOTCONN if not connected and no address is * supplied. */ if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { if ((so->so_state & SS_ISCONFIRMING) == 0 && !(resid == 0 && clen != 0)) { SOCKBUF_UNLOCK(&so->so_snd); error = ENOTCONN; goto release; } } else if (addr == NULL) { SOCKBUF_UNLOCK(&so->so_snd); if (so->so_proto->pr_flags & PR_CONNREQUIRED) error = ENOTCONN; else error = EDESTADDRREQ; goto release; } } space = sbspace(&so->so_snd); if (flags & MSG_OOB) space += 1024; if ((atomic && resid > so->so_snd.sb_hiwat) || clen > so->so_snd.sb_hiwat) { SOCKBUF_UNLOCK(&so->so_snd); error = EMSGSIZE; goto release; } if (space < resid + clen && (atomic || space < so->so_snd.sb_lowat || space < clen)) { if ((so->so_state & SS_NBIO) || (flags & (MSG_NBIO | MSG_DONTWAIT)) != 0) { SOCKBUF_UNLOCK(&so->so_snd); error = EWOULDBLOCK; goto release; } error = sbwait(so, SO_SND); SOCKBUF_UNLOCK(&so->so_snd); if (error) goto release; goto restart; } SOCKBUF_UNLOCK(&so->so_snd); space -= clen; do { if (uio == NULL) { resid = 0; if (flags & MSG_EOR) top->m_flags |= M_EOR; #ifdef KERN_TLS if (tls != NULL) { ktls_frame(top, tls, &tls_enq_cnt, tls_rtype); tls_rtype = TLS_RLTYPE_APP; } #endif } else { /* * Copy the data from userland into a mbuf * chain. If resid is 0, which can happen * only if we have control to send, then * a single empty mbuf is returned. This * is a workaround to prevent protocol send * methods to panic. */ #ifdef KERN_TLS if (tls != NULL) { top = m_uiotombuf(uio, M_WAITOK, space, tls->params.max_frame_len, M_EXTPG | ((flags & MSG_EOR) ? M_EOR : 0)); if (top != NULL) { ktls_frame(top, tls, &tls_enq_cnt, tls_rtype); } tls_rtype = TLS_RLTYPE_APP; } else #endif top = m_uiotombuf(uio, M_WAITOK, space, (atomic ? max_hdr : 0), (atomic ? M_PKTHDR : 0) | ((flags & MSG_EOR) ? M_EOR : 0)); if (top == NULL) { error = EFAULT; /* only possible error */ goto release; } space -= resid - uio->uio_resid; resid = uio->uio_resid; } if (dontroute) { SOCK_LOCK(so); so->so_options |= SO_DONTROUTE; SOCK_UNLOCK(so); } /* * XXX all the SBS_CANTSENDMORE checks previously * done could be out of date. We could have received * a reset packet in an interrupt or maybe we slept * while doing page faults in uiomove() etc. We * could probably recheck again inside the locking * protection here, but there are probably other * places that this also happens. We must rethink * this. */ VNET_SO_ASSERT(so); pr_send_flag = (flags & MSG_OOB) ? PRUS_OOB : /* * If the user set MSG_EOF, the protocol understands * this flag and nothing left to send then use * PRU_SEND_EOF instead of PRU_SEND. */ ((flags & MSG_EOF) && (so->so_proto->pr_flags & PR_IMPLOPCL) && (resid <= 0)) ? PRUS_EOF : /* If there is more to send set PRUS_MORETOCOME. */ (flags & MSG_MORETOCOME) || (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0; #ifdef KERN_TLS pr_send_flag |= tls_send_flag; #endif error = so->so_proto->pr_send(so, pr_send_flag, top, addr, control, td); if (dontroute) { SOCK_LOCK(so); so->so_options &= ~SO_DONTROUTE; SOCK_UNLOCK(so); } #ifdef KERN_TLS if (tls != NULL && tls->mode == TCP_TLS_MODE_SW) { if (error != 0) { m_freem(top); top = NULL; } else { soref(so); ktls_enqueue(top, so, tls_enq_cnt); } } #endif clen = 0; control = NULL; top = NULL; if (error) goto release; } while (resid && space > 0); } while (resid); release: SOCK_IO_SEND_UNLOCK(so); out: #ifdef KERN_TLS if (tls != NULL) ktls_free(tls); #endif if (top != NULL) m_freem(top); if (control != NULL) m_freem(control); return (error); } /* * Send to a socket from a kernel thread. * * XXXGL: in almost all cases uio is NULL and the mbuf is supplied. * Exception is nfs/bootp_subr.c. It is arguable that the VNET context needs * to be set at all. This function should just boil down to a static inline * calling the protocol method. */ int sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags, struct thread *td) { int error; CURVNET_SET(so->so_vnet); error = so->so_proto->pr_sosend(so, addr, uio, top, control, flags, td); CURVNET_RESTORE(); return (error); } /* * send(2), write(2) or aio_write(2) on a socket. */ int sousrsend(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *control, int flags, struct proc *userproc) { struct thread *td; ssize_t len; int error; td = uio->uio_td; len = uio->uio_resid; CURVNET_SET(so->so_vnet); error = so->so_proto->pr_sosend(so, addr, uio, NULL, control, flags, td); CURVNET_RESTORE(); if (error != 0) { /* * Clear transient errors for stream protocols if they made * some progress. Make exclusion for aio(4) that would * schedule a new write in case of EWOULDBLOCK and clear * error itself. See soaio_process_job(). */ if (uio->uio_resid != len && (so->so_proto->pr_flags & PR_ATOMIC) == 0 && userproc == NULL && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) error = 0; /* Generation of SIGPIPE can be controlled per socket. */ if (error == EPIPE && (so->so_options & SO_NOSIGPIPE) == 0 && (flags & MSG_NOSIGNAL) == 0) { if (userproc != NULL) { /* aio(4) job */ PROC_LOCK(userproc); kern_psignal(userproc, SIGPIPE); PROC_UNLOCK(userproc); } else { PROC_LOCK(td->td_proc); tdsignal(td, SIGPIPE); PROC_UNLOCK(td->td_proc); } } } return (error); } /* * The part of soreceive() that implements reading non-inline out-of-band * data from a socket. For more complete comments, see soreceive(), from * which this code originated. * * Note that soreceive_rcvoob(), unlike the remainder of soreceive(), is * unable to return an mbuf chain to the caller. */ static int soreceive_rcvoob(struct socket *so, struct uio *uio, int flags) { struct protosw *pr = so->so_proto; struct mbuf *m; int error; KASSERT(flags & MSG_OOB, ("soreceive_rcvoob: (flags & MSG_OOB) == 0")); VNET_SO_ASSERT(so); m = m_get(M_WAITOK, MT_DATA); error = pr->pr_rcvoob(so, m, flags & MSG_PEEK); if (error) goto bad; do { error = uiomove(mtod(m, void *), (int) min(uio->uio_resid, m->m_len), uio); m = m_free(m); } while (uio->uio_resid && error == 0 && m); bad: if (m != NULL) m_freem(m); return (error); } /* * Following replacement or removal of the first mbuf on the first mbuf chain * of a socket buffer, push necessary state changes back into the socket * buffer so that other consumers see the values consistently. 'nextrecord' * is the callers locally stored value of the original value of * sb->sb_mb->m_nextpkt which must be restored when the lead mbuf changes. * NOTE: 'nextrecord' may be NULL. */ static __inline void sockbuf_pushsync(struct sockbuf *sb, struct mbuf *nextrecord) { SOCKBUF_LOCK_ASSERT(sb); /* * First, update for the new value of nextrecord. If necessary, make * it the first record. */ if (sb->sb_mb != NULL) sb->sb_mb->m_nextpkt = nextrecord; else sb->sb_mb = nextrecord; /* * Now update any dependent socket buffer fields to reflect the new * state. This is an expanded inline of SB_EMPTY_FIXUP(), with the * addition of a second clause that takes care of the case where * sb_mb has been updated, but remains the last record. */ if (sb->sb_mb == NULL) { sb->sb_mbtail = NULL; sb->sb_lastrecord = NULL; } else if (sb->sb_mb->m_nextpkt == NULL) sb->sb_lastrecord = sb->sb_mb; } /* * Implement receive operations on a socket. We depend on the way that * records are added to the sockbuf by sbappend. In particular, each record * (mbufs linked through m_next) must begin with an address if the protocol * so specifies, followed by an optional mbuf or mbufs containing ancillary * data, and then zero or more mbufs of data. In order to allow parallelism * between network receive and copying to user space, as well as avoid * sleeping with a mutex held, we release the socket buffer mutex during the * user space copy. Although the sockbuf is locked, new data may still be * appended, and thus we must maintain consistency of the sockbuf during that * time. * * The caller may receive the data as a single mbuf chain by supplying an * mbuf **mp0 for use in returning the chain. The uio is then used only for * the count in uio_resid. */ int soreceive_generic(struct socket *so, struct sockaddr **psa, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) { struct mbuf *m, **mp; int flags, error, offset; ssize_t len; struct protosw *pr = so->so_proto; struct mbuf *nextrecord; int moff, type = 0; ssize_t orig_resid = uio->uio_resid; bool report_real_len = false; mp = mp0; if (psa != NULL) *psa = NULL; if (controlp != NULL) *controlp = NULL; if (flagsp != NULL) { report_real_len = *flagsp & MSG_TRUNC; *flagsp &= ~MSG_TRUNC; flags = *flagsp &~ MSG_EOR; } else flags = 0; if (flags & MSG_OOB) return (soreceive_rcvoob(so, uio, flags)); if (mp != NULL) *mp = NULL; if ((pr->pr_flags & PR_WANTRCVD) && (so->so_state & SS_ISCONFIRMING) && uio->uio_resid) { VNET_SO_ASSERT(so); pr->pr_rcvd(so, 0); } error = SOCK_IO_RECV_LOCK(so, SBLOCKWAIT(flags)); if (error) return (error); restart: SOCKBUF_LOCK(&so->so_rcv); m = so->so_rcv.sb_mb; /* * If we have less data than requested, block awaiting more (subject * to any timeout) if: * 1. the current count is less than the low water mark, or * 2. MSG_DONTWAIT is not set */ if (m == NULL || (((flags & MSG_DONTWAIT) == 0 && sbavail(&so->so_rcv) < uio->uio_resid) && sbavail(&so->so_rcv) < so->so_rcv.sb_lowat && m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) { KASSERT(m != NULL || !sbavail(&so->so_rcv), ("receive: m == %p sbavail == %u", m, sbavail(&so->so_rcv))); if (so->so_error || so->so_rerror) { if (m != NULL) goto dontblock; if (so->so_error) error = so->so_error; else error = so->so_rerror; if ((flags & MSG_PEEK) == 0) { if (so->so_error) so->so_error = 0; else so->so_rerror = 0; } SOCKBUF_UNLOCK(&so->so_rcv); goto release; } SOCKBUF_LOCK_ASSERT(&so->so_rcv); if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { if (m != NULL) goto dontblock; #ifdef KERN_TLS else if (so->so_rcv.sb_tlsdcc == 0 && so->so_rcv.sb_tlscc == 0) { #else else { #endif SOCKBUF_UNLOCK(&so->so_rcv); goto release; } } for (; m != NULL; m = m->m_next) if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { m = so->so_rcv.sb_mb; goto dontblock; } if ((so->so_state & (SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING | SS_ISDISCONNECTED)) == 0 && (so->so_proto->pr_flags & PR_CONNREQUIRED) != 0) { SOCKBUF_UNLOCK(&so->so_rcv); error = ENOTCONN; goto release; } if (uio->uio_resid == 0 && !report_real_len) { SOCKBUF_UNLOCK(&so->so_rcv); goto release; } if ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO))) { SOCKBUF_UNLOCK(&so->so_rcv); error = EWOULDBLOCK; goto release; } SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); error = sbwait(so, SO_RCV); SOCKBUF_UNLOCK(&so->so_rcv); if (error) goto release; goto restart; } dontblock: /* * From this point onward, we maintain 'nextrecord' as a cache of the * pointer to the next record in the socket buffer. We must keep the * various socket buffer pointers and local stack versions of the * pointers in sync, pushing out modifications before dropping the * socket buffer mutex, and re-reading them when picking it up. * * Otherwise, we will race with the network stack appending new data * or records onto the socket buffer by using inconsistent/stale * versions of the field, possibly resulting in socket buffer * corruption. * * By holding the high-level sblock(), we prevent simultaneous * readers from pulling off the front of the socket buffer. */ SOCKBUF_LOCK_ASSERT(&so->so_rcv); if (uio->uio_td) uio->uio_td->td_ru.ru_msgrcv++; KASSERT(m == so->so_rcv.sb_mb, ("soreceive: m != so->so_rcv.sb_mb")); SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); nextrecord = m->m_nextpkt; if (pr->pr_flags & PR_ADDR) { KASSERT(m->m_type == MT_SONAME, ("m->m_type == %d", m->m_type)); orig_resid = 0; if (psa != NULL) *psa = sodupsockaddr(mtod(m, struct sockaddr *), M_NOWAIT); if (flags & MSG_PEEK) { m = m->m_next; } else { sbfree(&so->so_rcv, m); so->so_rcv.sb_mb = m_free(m); m = so->so_rcv.sb_mb; sockbuf_pushsync(&so->so_rcv, nextrecord); } } /* * Process one or more MT_CONTROL mbufs present before any data mbufs * in the first mbuf chain on the socket buffer. If MSG_PEEK, we * just copy the data; if !MSG_PEEK, we call into the protocol to * perform externalization (or freeing if controlp == NULL). */ if (m != NULL && m->m_type == MT_CONTROL) { struct mbuf *cm = NULL, *cmn; struct mbuf **cme = &cm; #ifdef KERN_TLS struct cmsghdr *cmsg; struct tls_get_record tgr; /* * For MSG_TLSAPPDATA, check for an alert record. * If found, return ENXIO without removing * it from the receive queue. This allows a subsequent * call without MSG_TLSAPPDATA to receive it. * Note that, for TLS, there should only be a single * control mbuf with the TLS_GET_RECORD message in it. */ if (flags & MSG_TLSAPPDATA) { cmsg = mtod(m, struct cmsghdr *); if (cmsg->cmsg_type == TLS_GET_RECORD && cmsg->cmsg_len == CMSG_LEN(sizeof(tgr))) { memcpy(&tgr, CMSG_DATA(cmsg), sizeof(tgr)); if (__predict_false(tgr.tls_type == TLS_RLTYPE_ALERT)) { SOCKBUF_UNLOCK(&so->so_rcv); error = ENXIO; goto release; } } } #endif do { if (flags & MSG_PEEK) { if (controlp != NULL) { *controlp = m_copym(m, 0, m->m_len, M_NOWAIT); controlp = &(*controlp)->m_next; } m = m->m_next; } else { sbfree(&so->so_rcv, m); so->so_rcv.sb_mb = m->m_next; m->m_next = NULL; *cme = m; cme = &(*cme)->m_next; m = so->so_rcv.sb_mb; } } while (m != NULL && m->m_type == MT_CONTROL); if ((flags & MSG_PEEK) == 0) sockbuf_pushsync(&so->so_rcv, nextrecord); while (cm != NULL) { cmn = cm->m_next; cm->m_next = NULL; if (pr->pr_domain->dom_externalize != NULL) { SOCKBUF_UNLOCK(&so->so_rcv); VNET_SO_ASSERT(so); error = (*pr->pr_domain->dom_externalize) (cm, controlp, flags); SOCKBUF_LOCK(&so->so_rcv); } else if (controlp != NULL) *controlp = cm; else m_freem(cm); if (controlp != NULL) { while (*controlp != NULL) controlp = &(*controlp)->m_next; } cm = cmn; } if (m != NULL) nextrecord = so->so_rcv.sb_mb->m_nextpkt; else nextrecord = so->so_rcv.sb_mb; orig_resid = 0; } if (m != NULL) { if ((flags & MSG_PEEK) == 0) { KASSERT(m->m_nextpkt == nextrecord, ("soreceive: post-control, nextrecord !sync")); if (nextrecord == NULL) { KASSERT(so->so_rcv.sb_mb == m, ("soreceive: post-control, sb_mb!=m")); KASSERT(so->so_rcv.sb_lastrecord == m, ("soreceive: post-control, lastrecord!=m")); } } type = m->m_type; if (type == MT_OOBDATA) flags |= MSG_OOB; } else { if ((flags & MSG_PEEK) == 0) { KASSERT(so->so_rcv.sb_mb == nextrecord, ("soreceive: sb_mb != nextrecord")); if (so->so_rcv.sb_mb == NULL) { KASSERT(so->so_rcv.sb_lastrecord == NULL, ("soreceive: sb_lastercord != NULL")); } } } SOCKBUF_LOCK_ASSERT(&so->so_rcv); SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); /* * Now continue to read any data mbufs off of the head of the socket * buffer until the read request is satisfied. Note that 'type' is * used to store the type of any mbuf reads that have happened so far * such that soreceive() can stop reading if the type changes, which * causes soreceive() to return only one of regular data and inline * out-of-band data in a single socket receive operation. */ moff = 0; offset = 0; while (m != NULL && !(m->m_flags & M_NOTAVAIL) && uio->uio_resid > 0 && error == 0) { /* * If the type of mbuf has changed since the last mbuf * examined ('type'), end the receive operation. */ SOCKBUF_LOCK_ASSERT(&so->so_rcv); if (m->m_type == MT_OOBDATA || m->m_type == MT_CONTROL) { if (type != m->m_type) break; } else if (type == MT_OOBDATA) break; else KASSERT(m->m_type == MT_DATA, ("m->m_type == %d", m->m_type)); so->so_rcv.sb_state &= ~SBS_RCVATMARK; len = uio->uio_resid; if (so->so_oobmark && len > so->so_oobmark - offset) len = so->so_oobmark - offset; if (len > m->m_len - moff) len = m->m_len - moff; /* * If mp is set, just pass back the mbufs. Otherwise copy * them out via the uio, then free. Sockbuf must be * consistent here (points to current mbuf, it points to next * record) when we drop priority; we must note any additions * to the sockbuf when we block interrupts again. */ if (mp == NULL) { SOCKBUF_LOCK_ASSERT(&so->so_rcv); SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); SOCKBUF_UNLOCK(&so->so_rcv); if ((m->m_flags & M_EXTPG) != 0) error = m_unmapped_uiomove(m, moff, uio, (int)len); else error = uiomove(mtod(m, char *) + moff, (int)len, uio); SOCKBUF_LOCK(&so->so_rcv); if (error) { /* * The MT_SONAME mbuf has already been removed * from the record, so it is necessary to * remove the data mbufs, if any, to preserve * the invariant in the case of PR_ADDR that * requires MT_SONAME mbufs at the head of * each record. */ if (pr->pr_flags & PR_ATOMIC && ((flags & MSG_PEEK) == 0)) (void)sbdroprecord_locked(&so->so_rcv); SOCKBUF_UNLOCK(&so->so_rcv); goto release; } } else uio->uio_resid -= len; SOCKBUF_LOCK_ASSERT(&so->so_rcv); if (len == m->m_len - moff) { if (m->m_flags & M_EOR) flags |= MSG_EOR; if (flags & MSG_PEEK) { m = m->m_next; moff = 0; } else { nextrecord = m->m_nextpkt; sbfree(&so->so_rcv, m); if (mp != NULL) { m->m_nextpkt = NULL; *mp = m; mp = &m->m_next; so->so_rcv.sb_mb = m = m->m_next; *mp = NULL; } else { so->so_rcv.sb_mb = m_free(m); m = so->so_rcv.sb_mb; } sockbuf_pushsync(&so->so_rcv, nextrecord); SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); } } else { if (flags & MSG_PEEK) moff += len; else { if (mp != NULL) { if (flags & MSG_DONTWAIT) { *mp = m_copym(m, 0, len, M_NOWAIT); if (*mp == NULL) { /* * m_copym() couldn't * allocate an mbuf. * Adjust uio_resid back * (it was adjusted * down by len bytes, * which we didn't end * up "copying" over). */ uio->uio_resid += len; break; } } else { SOCKBUF_UNLOCK(&so->so_rcv); *mp = m_copym(m, 0, len, M_WAITOK); SOCKBUF_LOCK(&so->so_rcv); } } sbcut_locked(&so->so_rcv, len); } } SOCKBUF_LOCK_ASSERT(&so->so_rcv); if (so->so_oobmark) { if ((flags & MSG_PEEK) == 0) { so->so_oobmark -= len; if (so->so_oobmark == 0) { so->so_rcv.sb_state |= SBS_RCVATMARK; break; } } else { offset += len; if (offset == so->so_oobmark) break; } } if (flags & MSG_EOR) break; /* * If the MSG_WAITALL flag is set (for non-atomic socket), we * must not quit until "uio->uio_resid == 0" or an error * termination. If a signal/timeout occurs, return with a * short count but without error. Keep sockbuf locked * against other readers. */ while (flags & MSG_WAITALL && m == NULL && uio->uio_resid > 0 && !sosendallatonce(so) && nextrecord == NULL) { SOCKBUF_LOCK_ASSERT(&so->so_rcv); if (so->so_error || so->so_rerror || so->so_rcv.sb_state & SBS_CANTRCVMORE) break; /* * Notify the protocol that some data has been * drained before blocking. */ if (pr->pr_flags & PR_WANTRCVD) { SOCKBUF_UNLOCK(&so->so_rcv); VNET_SO_ASSERT(so); pr->pr_rcvd(so, flags); SOCKBUF_LOCK(&so->so_rcv); if (__predict_false(so->so_rcv.sb_mb == NULL && (so->so_error || so->so_rerror || so->so_rcv.sb_state & SBS_CANTRCVMORE))) break; } SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); /* * We could receive some data while was notifying * the protocol. Skip blocking in this case. */ if (so->so_rcv.sb_mb == NULL) { error = sbwait(so, SO_RCV); if (error) { SOCKBUF_UNLOCK(&so->so_rcv); goto release; } } m = so->so_rcv.sb_mb; if (m != NULL) nextrecord = m->m_nextpkt; } } SOCKBUF_LOCK_ASSERT(&so->so_rcv); if (m != NULL && pr->pr_flags & PR_ATOMIC) { if (report_real_len) uio->uio_resid -= m_length(m, NULL) - moff; flags |= MSG_TRUNC; if ((flags & MSG_PEEK) == 0) (void) sbdroprecord_locked(&so->so_rcv); } if ((flags & MSG_PEEK) == 0) { if (m == NULL) { /* * First part is an inline SB_EMPTY_FIXUP(). Second * part makes sure sb_lastrecord is up-to-date if * there is still data in the socket buffer. */ so->so_rcv.sb_mb = nextrecord; if (so->so_rcv.sb_mb == NULL) { so->so_rcv.sb_mbtail = NULL; so->so_rcv.sb_lastrecord = NULL; } else if (nextrecord->m_nextpkt == NULL) so->so_rcv.sb_lastrecord = nextrecord; } SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); /* * If soreceive() is being done from the socket callback, * then don't need to generate ACK to peer to update window, * since ACK will be generated on return to TCP. */ if (!(flags & MSG_SOCALLBCK) && (pr->pr_flags & PR_WANTRCVD)) { SOCKBUF_UNLOCK(&so->so_rcv); VNET_SO_ASSERT(so); pr->pr_rcvd(so, flags); SOCKBUF_LOCK(&so->so_rcv); } } SOCKBUF_LOCK_ASSERT(&so->so_rcv); if (orig_resid == uio->uio_resid && orig_resid && (flags & MSG_EOR) == 0 && (so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0) { SOCKBUF_UNLOCK(&so->so_rcv); goto restart; } SOCKBUF_UNLOCK(&so->so_rcv); if (flagsp != NULL) *flagsp |= flags; release: SOCK_IO_RECV_UNLOCK(so); return (error); } /* * Optimized version of soreceive() for stream (TCP) sockets. */ int soreceive_stream(struct socket *so, struct sockaddr **psa, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) { int len = 0, error = 0, flags, oresid; struct sockbuf *sb; struct mbuf *m, *n = NULL; /* We only do stream sockets. */ if (so->so_type != SOCK_STREAM) return (EINVAL); if (psa != NULL) *psa = NULL; if (flagsp != NULL) flags = *flagsp &~ MSG_EOR; else flags = 0; if (controlp != NULL) *controlp = NULL; if (flags & MSG_OOB) return (soreceive_rcvoob(so, uio, flags)); if (mp0 != NULL) *mp0 = NULL; sb = &so->so_rcv; #ifdef KERN_TLS /* * KTLS store TLS records as records with a control message to * describe the framing. * * We check once here before acquiring locks to optimize the * common case. */ if (sb->sb_tls_info != NULL) return (soreceive_generic(so, psa, uio, mp0, controlp, flagsp)); #endif /* Prevent other readers from entering the socket. */ error = SOCK_IO_RECV_LOCK(so, SBLOCKWAIT(flags)); if (error) return (error); SOCKBUF_LOCK(sb); #ifdef KERN_TLS if (sb->sb_tls_info != NULL) { SOCKBUF_UNLOCK(sb); SOCK_IO_RECV_UNLOCK(so); return (soreceive_generic(so, psa, uio, mp0, controlp, flagsp)); } #endif /* Easy one, no space to copyout anything. */ if (uio->uio_resid == 0) { error = EINVAL; goto out; } oresid = uio->uio_resid; /* We will never ever get anything unless we are or were connected. */ if (!(so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED))) { error = ENOTCONN; goto out; } restart: SOCKBUF_LOCK_ASSERT(&so->so_rcv); /* Abort if socket has reported problems. */ if (so->so_error) { if (sbavail(sb) > 0) goto deliver; if (oresid > uio->uio_resid) goto out; error = so->so_error; if (!(flags & MSG_PEEK)) so->so_error = 0; goto out; } /* Door is closed. Deliver what is left, if any. */ if (sb->sb_state & SBS_CANTRCVMORE) { if (sbavail(sb) > 0) goto deliver; else goto out; } /* Socket buffer is empty and we shall not block. */ if (sbavail(sb) == 0 && ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) { error = EAGAIN; goto out; } /* Socket buffer got some data that we shall deliver now. */ if (sbavail(sb) > 0 && !(flags & MSG_WAITALL) && ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)) || sbavail(sb) >= sb->sb_lowat || sbavail(sb) >= uio->uio_resid || sbavail(sb) >= sb->sb_hiwat) ) { goto deliver; } /* On MSG_WAITALL we must wait until all data or error arrives. */ if ((flags & MSG_WAITALL) && (sbavail(sb) >= uio->uio_resid || sbavail(sb) >= sb->sb_hiwat)) goto deliver; /* * Wait and block until (more) data comes in. * NB: Drops the sockbuf lock during wait. */ error = sbwait(so, SO_RCV); if (error) goto out; goto restart; deliver: SOCKBUF_LOCK_ASSERT(&so->so_rcv); KASSERT(sbavail(sb) > 0, ("%s: sockbuf empty", __func__)); KASSERT(sb->sb_mb != NULL, ("%s: sb_mb == NULL", __func__)); /* Statistics. */ if (uio->uio_td) uio->uio_td->td_ru.ru_msgrcv++; /* Fill uio until full or current end of socket buffer is reached. */ len = min(uio->uio_resid, sbavail(sb)); if (mp0 != NULL) { /* Dequeue as many mbufs as possible. */ if (!(flags & MSG_PEEK) && len >= sb->sb_mb->m_len) { if (*mp0 == NULL) *mp0 = sb->sb_mb; else m_cat(*mp0, sb->sb_mb); for (m = sb->sb_mb; m != NULL && m->m_len <= len; m = m->m_next) { KASSERT(!(m->m_flags & M_NOTAVAIL), ("%s: m %p not available", __func__, m)); len -= m->m_len; uio->uio_resid -= m->m_len; sbfree(sb, m); n = m; } n->m_next = NULL; sb->sb_mb = m; sb->sb_lastrecord = sb->sb_mb; if (sb->sb_mb == NULL) SB_EMPTY_FIXUP(sb); } /* Copy the remainder. */ if (len > 0) { KASSERT(sb->sb_mb != NULL, ("%s: len > 0 && sb->sb_mb empty", __func__)); m = m_copym(sb->sb_mb, 0, len, M_NOWAIT); if (m == NULL) len = 0; /* Don't flush data from sockbuf. */ else uio->uio_resid -= len; if (*mp0 != NULL) m_cat(*mp0, m); else *mp0 = m; if (*mp0 == NULL) { error = ENOBUFS; goto out; } } } else { /* NB: Must unlock socket buffer as uiomove may sleep. */ SOCKBUF_UNLOCK(sb); error = m_mbuftouio(uio, sb->sb_mb, len); SOCKBUF_LOCK(sb); if (error) goto out; } SBLASTRECORDCHK(sb); SBLASTMBUFCHK(sb); /* * Remove the delivered data from the socket buffer unless we * were only peeking. */ if (!(flags & MSG_PEEK)) { if (len > 0) sbdrop_locked(sb, len); /* Notify protocol that we drained some data. */ if ((so->so_proto->pr_flags & PR_WANTRCVD) && (((flags & MSG_WAITALL) && uio->uio_resid > 0) || !(flags & MSG_SOCALLBCK))) { SOCKBUF_UNLOCK(sb); VNET_SO_ASSERT(so); so->so_proto->pr_rcvd(so, flags); SOCKBUF_LOCK(sb); } } /* * For MSG_WAITALL we may have to loop again and wait for * more data to come in. */ if ((flags & MSG_WAITALL) && uio->uio_resid > 0) goto restart; out: SBLASTRECORDCHK(sb); SBLASTMBUFCHK(sb); SOCKBUF_UNLOCK(sb); SOCK_IO_RECV_UNLOCK(so); return (error); } /* * Optimized version of soreceive() for simple datagram cases from userspace. * Unlike in the stream case, we're able to drop a datagram if copyout() * fails, and because we handle datagrams atomically, we don't need to use a * sleep lock to prevent I/O interlacing. */ int soreceive_dgram(struct socket *so, struct sockaddr **psa, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) { struct mbuf *m, *m2; int flags, error; ssize_t len; struct protosw *pr = so->so_proto; struct mbuf *nextrecord; if (psa != NULL) *psa = NULL; if (controlp != NULL) *controlp = NULL; if (flagsp != NULL) flags = *flagsp &~ MSG_EOR; else flags = 0; /* * For any complicated cases, fall back to the full * soreceive_generic(). */ if (mp0 != NULL || (flags & (MSG_PEEK | MSG_OOB | MSG_TRUNC))) return (soreceive_generic(so, psa, uio, mp0, controlp, flagsp)); /* * Enforce restrictions on use. */ KASSERT((pr->pr_flags & PR_WANTRCVD) == 0, ("soreceive_dgram: wantrcvd")); KASSERT(pr->pr_flags & PR_ATOMIC, ("soreceive_dgram: !atomic")); KASSERT((so->so_rcv.sb_state & SBS_RCVATMARK) == 0, ("soreceive_dgram: SBS_RCVATMARK")); KASSERT((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0, ("soreceive_dgram: P_CONNREQUIRED")); /* * Loop blocking while waiting for a datagram. */ SOCKBUF_LOCK(&so->so_rcv); while ((m = so->so_rcv.sb_mb) == NULL) { KASSERT(sbavail(&so->so_rcv) == 0, ("soreceive_dgram: sb_mb NULL but sbavail %u", sbavail(&so->so_rcv))); if (so->so_error) { error = so->so_error; so->so_error = 0; SOCKBUF_UNLOCK(&so->so_rcv); return (error); } if (so->so_rcv.sb_state & SBS_CANTRCVMORE || uio->uio_resid == 0) { SOCKBUF_UNLOCK(&so->so_rcv); return (0); } if ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO))) { SOCKBUF_UNLOCK(&so->so_rcv); return (EWOULDBLOCK); } SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); error = sbwait(so, SO_RCV); if (error) { SOCKBUF_UNLOCK(&so->so_rcv); return (error); } } SOCKBUF_LOCK_ASSERT(&so->so_rcv); if (uio->uio_td) uio->uio_td->td_ru.ru_msgrcv++; SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); nextrecord = m->m_nextpkt; if (nextrecord == NULL) { KASSERT(so->so_rcv.sb_lastrecord == m, ("soreceive_dgram: lastrecord != m")); } KASSERT(so->so_rcv.sb_mb->m_nextpkt == nextrecord, ("soreceive_dgram: m_nextpkt != nextrecord")); /* * Pull 'm' and its chain off the front of the packet queue. */ so->so_rcv.sb_mb = NULL; sockbuf_pushsync(&so->so_rcv, nextrecord); /* * Walk 'm's chain and free that many bytes from the socket buffer. */ for (m2 = m; m2 != NULL; m2 = m2->m_next) sbfree(&so->so_rcv, m2); /* * Do a few last checks before we let go of the lock. */ SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); SOCKBUF_UNLOCK(&so->so_rcv); if (pr->pr_flags & PR_ADDR) { KASSERT(m->m_type == MT_SONAME, ("m->m_type == %d", m->m_type)); if (psa != NULL) *psa = sodupsockaddr(mtod(m, struct sockaddr *), M_NOWAIT); m = m_free(m); } if (m == NULL) { /* XXXRW: Can this happen? */ return (0); } /* * Packet to copyout() is now in 'm' and it is disconnected from the * queue. * * Process one or more MT_CONTROL mbufs present before any data mbufs * in the first mbuf chain on the socket buffer. We call into the * protocol to perform externalization (or freeing if controlp == * NULL). In some cases there can be only MT_CONTROL mbufs without * MT_DATA mbufs. */ if (m->m_type == MT_CONTROL) { struct mbuf *cm = NULL, *cmn; struct mbuf **cme = &cm; do { m2 = m->m_next; m->m_next = NULL; *cme = m; cme = &(*cme)->m_next; m = m2; } while (m != NULL && m->m_type == MT_CONTROL); while (cm != NULL) { cmn = cm->m_next; cm->m_next = NULL; if (pr->pr_domain->dom_externalize != NULL) { error = (*pr->pr_domain->dom_externalize) (cm, controlp, flags); } else if (controlp != NULL) *controlp = cm; else m_freem(cm); if (controlp != NULL) { while (*controlp != NULL) controlp = &(*controlp)->m_next; } cm = cmn; } } KASSERT(m == NULL || m->m_type == MT_DATA, ("soreceive_dgram: !data")); while (m != NULL && uio->uio_resid > 0) { len = uio->uio_resid; if (len > m->m_len) len = m->m_len; error = uiomove(mtod(m, char *), (int)len, uio); if (error) { m_freem(m); return (error); } if (len == m->m_len) m = m_free(m); else { m->m_data += len; m->m_len -= len; } } if (m != NULL) { flags |= MSG_TRUNC; m_freem(m); } if (flagsp != NULL) *flagsp |= flags; return (0); } int soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) { int error; CURVNET_SET(so->so_vnet); error = so->so_proto->pr_soreceive(so, psa, uio, mp0, controlp, flagsp); CURVNET_RESTORE(); return (error); } int soshutdown(struct socket *so, int how) { struct protosw *pr; int error, soerror_enotconn; if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR)) return (EINVAL); soerror_enotconn = 0; SOCK_LOCK(so); if ((so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) == 0) { /* * POSIX mandates us to return ENOTCONN when shutdown(2) is * invoked on a datagram sockets, however historically we would * actually tear socket down. This is known to be leveraged by * some applications to unblock process waiting in recvXXX(2) * by other process that it shares that socket with. Try to meet * both backward-compatibility and POSIX requirements by forcing * ENOTCONN but still asking protocol to perform pru_shutdown(). */ if (so->so_type != SOCK_DGRAM && !SOLISTENING(so)) { SOCK_UNLOCK(so); return (ENOTCONN); } soerror_enotconn = 1; } if (SOLISTENING(so)) { if (how != SHUT_WR) { so->so_error = ECONNABORTED; solisten_wakeup(so); /* unlocks so */ } else { SOCK_UNLOCK(so); } goto done; } SOCK_UNLOCK(so); CURVNET_SET(so->so_vnet); pr = so->so_proto; if (pr->pr_flush != NULL) pr->pr_flush(so, how); if (how != SHUT_WR) sorflush(so); if (how != SHUT_RD) { error = pr->pr_shutdown(so); wakeup(&so->so_timeo); CURVNET_RESTORE(); return ((error == 0 && soerror_enotconn) ? ENOTCONN : error); } wakeup(&so->so_timeo); CURVNET_RESTORE(); done: return (soerror_enotconn ? ENOTCONN : 0); } void sorflush(struct socket *so) { struct protosw *pr; int error; VNET_SO_ASSERT(so); /* * Dislodge threads currently blocked in receive and wait to acquire * a lock against other simultaneous readers before clearing the * socket buffer. Don't let our acquire be interrupted by a signal * despite any existing socket disposition on interruptable waiting. */ socantrcvmore(so); error = SOCK_IO_RECV_LOCK(so, SBL_WAIT | SBL_NOINTR); if (error != 0) { KASSERT(SOLISTENING(so), ("%s: soiolock(%p) failed", __func__, so)); return; } pr = so->so_proto; if (pr->pr_flags & PR_RIGHTS) { MPASS(pr->pr_domain->dom_dispose != NULL); (*pr->pr_domain->dom_dispose)(so); } else { sbrelease(so, SO_RCV); SOCK_IO_RECV_UNLOCK(so); } } /* * Wrapper for Socket established helper hook. * Parameters: socket, context of the hook point, hook id. */ static int inline hhook_run_socket(struct socket *so, void *hctx, int32_t h_id) { struct socket_hhook_data hhook_data = { .so = so, .hctx = hctx, .m = NULL, .status = 0 }; CURVNET_SET(so->so_vnet); HHOOKS_RUN_IF(V_socket_hhh[h_id], &hhook_data, &so->osd); CURVNET_RESTORE(); /* Ugly but needed, since hhooks return void for now */ return (hhook_data.status); } /* * Perhaps this routine, and sooptcopyout(), below, ought to come in an * additional variant to handle the case where the option value needs to be * some kind of integer, but not a specific size. In addition to their use * here, these functions are also called by the protocol-level pr_ctloutput() * routines. */ int sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen) { size_t valsize; /* * If the user gives us more than we wanted, we ignore it, but if we * don't get the minimum length the caller wants, we return EINVAL. * On success, sopt->sopt_valsize is set to however much we actually * retrieved. */ if ((valsize = sopt->sopt_valsize) < minlen) return EINVAL; if (valsize > len) sopt->sopt_valsize = valsize = len; if (sopt->sopt_td != NULL) return (copyin(sopt->sopt_val, buf, valsize)); bcopy(sopt->sopt_val, buf, valsize); return (0); } /* * Kernel version of setsockopt(2). * * XXX: optlen is size_t, not socklen_t */ int so_setsockopt(struct socket *so, int level, int optname, void *optval, size_t optlen) { struct sockopt sopt; sopt.sopt_level = level; sopt.sopt_name = optname; sopt.sopt_dir = SOPT_SET; sopt.sopt_val = optval; sopt.sopt_valsize = optlen; sopt.sopt_td = NULL; return (sosetopt(so, &sopt)); } int sosetopt(struct socket *so, struct sockopt *sopt) { int error, optval; struct linger l; struct timeval tv; sbintime_t val, *valp; uint32_t val32; #ifdef MAC struct mac extmac; #endif CURVNET_SET(so->so_vnet); error = 0; if (sopt->sopt_level != SOL_SOCKET) { if (so->so_proto->pr_ctloutput != NULL) error = (*so->so_proto->pr_ctloutput)(so, sopt); else error = ENOPROTOOPT; } else { switch (sopt->sopt_name) { case SO_ACCEPTFILTER: error = accept_filt_setopt(so, sopt); if (error) goto bad; break; case SO_LINGER: error = sooptcopyin(sopt, &l, sizeof l, sizeof l); if (error) goto bad; if (l.l_linger < 0 || l.l_linger > USHRT_MAX || l.l_linger > (INT_MAX / hz)) { error = EDOM; goto bad; } SOCK_LOCK(so); so->so_linger = l.l_linger; if (l.l_onoff) so->so_options |= SO_LINGER; else so->so_options &= ~SO_LINGER; SOCK_UNLOCK(so); break; case SO_DEBUG: case SO_KEEPALIVE: case SO_DONTROUTE: case SO_USELOOPBACK: case SO_BROADCAST: case SO_REUSEADDR: case SO_REUSEPORT: case SO_REUSEPORT_LB: case SO_OOBINLINE: case SO_TIMESTAMP: case SO_BINTIME: case SO_NOSIGPIPE: case SO_NO_DDP: case SO_NO_OFFLOAD: case SO_RERROR: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) goto bad; SOCK_LOCK(so); if (optval) so->so_options |= sopt->sopt_name; else so->so_options &= ~sopt->sopt_name; SOCK_UNLOCK(so); break; case SO_SETFIB: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) goto bad; if (optval < 0 || optval >= rt_numfibs) { error = EINVAL; goto bad; } if (((so->so_proto->pr_domain->dom_family == PF_INET) || (so->so_proto->pr_domain->dom_family == PF_INET6) || (so->so_proto->pr_domain->dom_family == PF_ROUTE))) so->so_fibnum = optval; else so->so_fibnum = 0; break; case SO_USER_COOKIE: error = sooptcopyin(sopt, &val32, sizeof val32, sizeof val32); if (error) goto bad; so->so_user_cookie = val32; break; case SO_SNDBUF: case SO_RCVBUF: case SO_SNDLOWAT: case SO_RCVLOWAT: error = so->so_proto->pr_setsbopt(so, sopt); if (error) goto bad; break; case SO_SNDTIMEO: case SO_RCVTIMEO: #ifdef COMPAT_FREEBSD32 if (SV_CURPROC_FLAG(SV_ILP32)) { struct timeval32 tv32; error = sooptcopyin(sopt, &tv32, sizeof tv32, sizeof tv32); CP(tv32, tv, tv_sec); CP(tv32, tv, tv_usec); } else #endif error = sooptcopyin(sopt, &tv, sizeof tv, sizeof tv); if (error) goto bad; if (tv.tv_sec < 0 || tv.tv_usec < 0 || tv.tv_usec >= 1000000) { error = EDOM; goto bad; } if (tv.tv_sec > INT32_MAX) val = SBT_MAX; else val = tvtosbt(tv); SOCK_LOCK(so); valp = sopt->sopt_name == SO_SNDTIMEO ? (SOLISTENING(so) ? &so->sol_sbsnd_timeo : &so->so_snd.sb_timeo) : (SOLISTENING(so) ? &so->sol_sbrcv_timeo : &so->so_rcv.sb_timeo); *valp = val; SOCK_UNLOCK(so); break; case SO_LABEL: #ifdef MAC error = sooptcopyin(sopt, &extmac, sizeof extmac, sizeof extmac); if (error) goto bad; error = mac_setsockopt_label(sopt->sopt_td->td_ucred, so, &extmac); #else error = EOPNOTSUPP; #endif break; case SO_TS_CLOCK: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) goto bad; if (optval < 0 || optval > SO_TS_CLOCK_MAX) { error = EINVAL; goto bad; } so->so_ts_clock = optval; break; case SO_MAX_PACING_RATE: error = sooptcopyin(sopt, &val32, sizeof(val32), sizeof(val32)); if (error) goto bad; so->so_max_pacing_rate = val32; break; default: if (V_socket_hhh[HHOOK_SOCKET_OPT]->hhh_nhooks > 0) error = hhook_run_socket(so, sopt, HHOOK_SOCKET_OPT); else error = ENOPROTOOPT; break; } if (error == 0 && so->so_proto->pr_ctloutput != NULL) (void)(*so->so_proto->pr_ctloutput)(so, sopt); } bad: CURVNET_RESTORE(); return (error); } /* * Helper routine for getsockopt. */ int sooptcopyout(struct sockopt *sopt, const void *buf, size_t len) { int error; size_t valsize; error = 0; /* * Documented get behavior is that we always return a value, possibly * truncated to fit in the user's buffer. Traditional behavior is * that we always tell the user precisely how much we copied, rather * than something useful like the total amount we had available for * her. Note that this interface is not idempotent; the entire * answer must be generated ahead of time. */ valsize = min(len, sopt->sopt_valsize); sopt->sopt_valsize = valsize; if (sopt->sopt_val != NULL) { if (sopt->sopt_td != NULL) error = copyout(buf, sopt->sopt_val, valsize); else bcopy(buf, sopt->sopt_val, valsize); } return (error); } int sogetopt(struct socket *so, struct sockopt *sopt) { int error, optval; struct linger l; struct timeval tv; #ifdef MAC struct mac extmac; #endif CURVNET_SET(so->so_vnet); error = 0; if (sopt->sopt_level != SOL_SOCKET) { if (so->so_proto->pr_ctloutput != NULL) error = (*so->so_proto->pr_ctloutput)(so, sopt); else error = ENOPROTOOPT; CURVNET_RESTORE(); return (error); } else { switch (sopt->sopt_name) { case SO_ACCEPTFILTER: error = accept_filt_getopt(so, sopt); break; case SO_LINGER: SOCK_LOCK(so); l.l_onoff = so->so_options & SO_LINGER; l.l_linger = so->so_linger; SOCK_UNLOCK(so); error = sooptcopyout(sopt, &l, sizeof l); break; case SO_USELOOPBACK: case SO_DONTROUTE: case SO_DEBUG: case SO_KEEPALIVE: case SO_REUSEADDR: case SO_REUSEPORT: case SO_REUSEPORT_LB: case SO_BROADCAST: case SO_OOBINLINE: case SO_ACCEPTCONN: case SO_TIMESTAMP: case SO_BINTIME: case SO_NOSIGPIPE: case SO_NO_DDP: case SO_NO_OFFLOAD: case SO_RERROR: optval = so->so_options & sopt->sopt_name; integer: error = sooptcopyout(sopt, &optval, sizeof optval); break; case SO_DOMAIN: optval = so->so_proto->pr_domain->dom_family; goto integer; case SO_TYPE: optval = so->so_type; goto integer; case SO_PROTOCOL: optval = so->so_proto->pr_protocol; goto integer; case SO_ERROR: SOCK_LOCK(so); if (so->so_error) { optval = so->so_error; so->so_error = 0; } else { optval = so->so_rerror; so->so_rerror = 0; } SOCK_UNLOCK(so); goto integer; case SO_SNDBUF: optval = SOLISTENING(so) ? so->sol_sbsnd_hiwat : so->so_snd.sb_hiwat; goto integer; case SO_RCVBUF: optval = SOLISTENING(so) ? so->sol_sbrcv_hiwat : so->so_rcv.sb_hiwat; goto integer; case SO_SNDLOWAT: optval = SOLISTENING(so) ? so->sol_sbsnd_lowat : so->so_snd.sb_lowat; goto integer; case SO_RCVLOWAT: optval = SOLISTENING(so) ? so->sol_sbrcv_lowat : so->so_rcv.sb_lowat; goto integer; case SO_SNDTIMEO: case SO_RCVTIMEO: SOCK_LOCK(so); tv = sbttotv(sopt->sopt_name == SO_SNDTIMEO ? (SOLISTENING(so) ? so->sol_sbsnd_timeo : so->so_snd.sb_timeo) : (SOLISTENING(so) ? so->sol_sbrcv_timeo : so->so_rcv.sb_timeo)); SOCK_UNLOCK(so); #ifdef COMPAT_FREEBSD32 if (SV_CURPROC_FLAG(SV_ILP32)) { struct timeval32 tv32; CP(tv, tv32, tv_sec); CP(tv, tv32, tv_usec); error = sooptcopyout(sopt, &tv32, sizeof tv32); } else #endif error = sooptcopyout(sopt, &tv, sizeof tv); break; case SO_LABEL: #ifdef MAC error = sooptcopyin(sopt, &extmac, sizeof(extmac), sizeof(extmac)); if (error) goto bad; error = mac_getsockopt_label(sopt->sopt_td->td_ucred, so, &extmac); if (error) goto bad; /* Don't copy out extmac, it is unchanged. */ #else error = EOPNOTSUPP; #endif break; case SO_PEERLABEL: #ifdef MAC error = sooptcopyin(sopt, &extmac, sizeof(extmac), sizeof(extmac)); if (error) goto bad; error = mac_getsockopt_peerlabel( sopt->sopt_td->td_ucred, so, &extmac); if (error) goto bad; /* Don't copy out extmac, it is unchanged. */ #else error = EOPNOTSUPP; #endif break; case SO_LISTENQLIMIT: optval = SOLISTENING(so) ? so->sol_qlimit : 0; goto integer; case SO_LISTENQLEN: optval = SOLISTENING(so) ? so->sol_qlen : 0; goto integer; case SO_LISTENINCQLEN: optval = SOLISTENING(so) ? so->sol_incqlen : 0; goto integer; case SO_TS_CLOCK: optval = so->so_ts_clock; goto integer; case SO_MAX_PACING_RATE: optval = so->so_max_pacing_rate; goto integer; default: if (V_socket_hhh[HHOOK_SOCKET_OPT]->hhh_nhooks > 0) error = hhook_run_socket(so, sopt, HHOOK_SOCKET_OPT); else error = ENOPROTOOPT; break; } } #ifdef MAC bad: #endif CURVNET_RESTORE(); return (error); } int soopt_getm(struct sockopt *sopt, struct mbuf **mp) { struct mbuf *m, *m_prev; int sopt_size = sopt->sopt_valsize; MGET(m, sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA); if (m == NULL) return ENOBUFS; if (sopt_size > MLEN) { MCLGET(m, sopt->sopt_td ? M_WAITOK : M_NOWAIT); if ((m->m_flags & M_EXT) == 0) { m_free(m); return ENOBUFS; } m->m_len = min(MCLBYTES, sopt_size); } else { m->m_len = min(MLEN, sopt_size); } sopt_size -= m->m_len; *mp = m; m_prev = m; while (sopt_size) { MGET(m, sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA); if (m == NULL) { m_freem(*mp); return ENOBUFS; } if (sopt_size > MLEN) { MCLGET(m, sopt->sopt_td != NULL ? M_WAITOK : M_NOWAIT); if ((m->m_flags & M_EXT) == 0) { m_freem(m); m_freem(*mp); return ENOBUFS; } m->m_len = min(MCLBYTES, sopt_size); } else { m->m_len = min(MLEN, sopt_size); } sopt_size -= m->m_len; m_prev->m_next = m; m_prev = m; } return (0); } int soopt_mcopyin(struct sockopt *sopt, struct mbuf *m) { struct mbuf *m0 = m; if (sopt->sopt_val == NULL) return (0); while (m != NULL && sopt->sopt_valsize >= m->m_len) { if (sopt->sopt_td != NULL) { int error; error = copyin(sopt->sopt_val, mtod(m, char *), m->m_len); if (error != 0) { m_freem(m0); return(error); } } else bcopy(sopt->sopt_val, mtod(m, char *), m->m_len); sopt->sopt_valsize -= m->m_len; sopt->sopt_val = (char *)sopt->sopt_val + m->m_len; m = m->m_next; } if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */ panic("ip6_sooptmcopyin"); return (0); } int soopt_mcopyout(struct sockopt *sopt, struct mbuf *m) { struct mbuf *m0 = m; size_t valsize = 0; if (sopt->sopt_val == NULL) return (0); while (m != NULL && sopt->sopt_valsize >= m->m_len) { if (sopt->sopt_td != NULL) { int error; error = copyout(mtod(m, char *), sopt->sopt_val, m->m_len); if (error != 0) { m_freem(m0); return(error); } } else bcopy(mtod(m, char *), sopt->sopt_val, m->m_len); sopt->sopt_valsize -= m->m_len; sopt->sopt_val = (char *)sopt->sopt_val + m->m_len; valsize += m->m_len; m = m->m_next; } if (m != NULL) { /* enough soopt buffer should be given from user-land */ m_freem(m0); return(EINVAL); } sopt->sopt_valsize = valsize; return (0); } /* * sohasoutofband(): protocol notifies socket layer of the arrival of new * out-of-band data, which will then notify socket consumers. */ void sohasoutofband(struct socket *so) { if (so->so_sigio != NULL) pgsigio(&so->so_sigio, SIGURG, 0); selwakeuppri(&so->so_rdsel, PSOCK); } int sopoll(struct socket *so, int events, struct ucred *active_cred, struct thread *td) { /* * We do not need to set or assert curvnet as long as everyone uses * sopoll_generic(). */ return (so->so_proto->pr_sopoll(so, events, active_cred, td)); } int sopoll_generic(struct socket *so, int events, struct ucred *active_cred, struct thread *td) { int revents; SOCK_LOCK(so); if (SOLISTENING(so)) { if (!(events & (POLLIN | POLLRDNORM))) revents = 0; else if (!TAILQ_EMPTY(&so->sol_comp)) revents = events & (POLLIN | POLLRDNORM); else if ((events & POLLINIGNEOF) == 0 && so->so_error) revents = (events & (POLLIN | POLLRDNORM)) | POLLHUP; else { selrecord(td, &so->so_rdsel); revents = 0; } } else { revents = 0; SOCK_SENDBUF_LOCK(so); SOCK_RECVBUF_LOCK(so); if (events & (POLLIN | POLLRDNORM)) if (soreadabledata(so)) revents |= events & (POLLIN | POLLRDNORM); if (events & (POLLOUT | POLLWRNORM)) if (sowriteable(so)) revents |= events & (POLLOUT | POLLWRNORM); if (events & (POLLPRI | POLLRDBAND)) if (so->so_oobmark || (so->so_rcv.sb_state & SBS_RCVATMARK)) revents |= events & (POLLPRI | POLLRDBAND); if ((events & POLLINIGNEOF) == 0) { if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { revents |= events & (POLLIN | POLLRDNORM); if (so->so_snd.sb_state & SBS_CANTSENDMORE) revents |= POLLHUP; } } if (so->so_rcv.sb_state & SBS_CANTRCVMORE) revents |= events & POLLRDHUP; if (revents == 0) { if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND | POLLRDHUP)) { selrecord(td, &so->so_rdsel); so->so_rcv.sb_flags |= SB_SEL; } if (events & (POLLOUT | POLLWRNORM)) { selrecord(td, &so->so_wrsel); so->so_snd.sb_flags |= SB_SEL; } } SOCK_RECVBUF_UNLOCK(so); SOCK_SENDBUF_UNLOCK(so); } SOCK_UNLOCK(so); return (revents); } int soo_kqfilter(struct file *fp, struct knote *kn) { struct socket *so = kn->kn_fp->f_data; struct sockbuf *sb; sb_which which; struct knlist *knl; switch (kn->kn_filter) { case EVFILT_READ: kn->kn_fop = &soread_filtops; knl = &so->so_rdsel.si_note; sb = &so->so_rcv; which = SO_RCV; break; case EVFILT_WRITE: kn->kn_fop = &sowrite_filtops; knl = &so->so_wrsel.si_note; sb = &so->so_snd; which = SO_SND; break; case EVFILT_EMPTY: kn->kn_fop = &soempty_filtops; knl = &so->so_wrsel.si_note; sb = &so->so_snd; which = SO_SND; break; default: return (EINVAL); } SOCK_LOCK(so); if (SOLISTENING(so)) { knlist_add(knl, kn, 1); } else { SOCK_BUF_LOCK(so, which); knlist_add(knl, kn, 1); sb->sb_flags |= SB_KNOTE; SOCK_BUF_UNLOCK(so, which); } SOCK_UNLOCK(so); return (0); } static void filt_sordetach(struct knote *kn) { struct socket *so = kn->kn_fp->f_data; so_rdknl_lock(so); knlist_remove(&so->so_rdsel.si_note, kn, 1); if (!SOLISTENING(so) && knlist_empty(&so->so_rdsel.si_note)) so->so_rcv.sb_flags &= ~SB_KNOTE; so_rdknl_unlock(so); } /*ARGSUSED*/ static int filt_soread(struct knote *kn, long hint) { struct socket *so; so = kn->kn_fp->f_data; if (SOLISTENING(so)) { SOCK_LOCK_ASSERT(so); kn->kn_data = so->sol_qlen; if (so->so_error) { kn->kn_flags |= EV_EOF; kn->kn_fflags = so->so_error; return (1); } return (!TAILQ_EMPTY(&so->sol_comp)); } SOCK_RECVBUF_LOCK_ASSERT(so); kn->kn_data = sbavail(&so->so_rcv) - so->so_rcv.sb_ctl; if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { kn->kn_flags |= EV_EOF; kn->kn_fflags = so->so_error; return (1); } else if (so->so_error || so->so_rerror) return (1); if (kn->kn_sfflags & NOTE_LOWAT) { if (kn->kn_data >= kn->kn_sdata) return (1); } else if (sbavail(&so->so_rcv) >= so->so_rcv.sb_lowat) return (1); /* This hook returning non-zero indicates an event, not error */ return (hhook_run_socket(so, NULL, HHOOK_FILT_SOREAD)); } static void filt_sowdetach(struct knote *kn) { struct socket *so = kn->kn_fp->f_data; so_wrknl_lock(so); knlist_remove(&so->so_wrsel.si_note, kn, 1); if (!SOLISTENING(so) && knlist_empty(&so->so_wrsel.si_note)) so->so_snd.sb_flags &= ~SB_KNOTE; so_wrknl_unlock(so); } /*ARGSUSED*/ static int filt_sowrite(struct knote *kn, long hint) { struct socket *so; so = kn->kn_fp->f_data; if (SOLISTENING(so)) return (0); SOCK_SENDBUF_LOCK_ASSERT(so); kn->kn_data = sbspace(&so->so_snd); hhook_run_socket(so, kn, HHOOK_FILT_SOWRITE); if (so->so_snd.sb_state & SBS_CANTSENDMORE) { kn->kn_flags |= EV_EOF; kn->kn_fflags = so->so_error; return (1); } else if (so->so_error) /* temporary udp error */ return (1); else if (((so->so_state & SS_ISCONNECTED) == 0) && (so->so_proto->pr_flags & PR_CONNREQUIRED)) return (0); else if (kn->kn_sfflags & NOTE_LOWAT) return (kn->kn_data >= kn->kn_sdata); else return (kn->kn_data >= so->so_snd.sb_lowat); } static int filt_soempty(struct knote *kn, long hint) { struct socket *so; so = kn->kn_fp->f_data; if (SOLISTENING(so)) return (1); SOCK_SENDBUF_LOCK_ASSERT(so); kn->kn_data = sbused(&so->so_snd); if (kn->kn_data == 0) return (1); else return (0); } int socheckuid(struct socket *so, uid_t uid) { if (so == NULL) return (EPERM); if (so->so_cred->cr_uid != uid) return (EPERM); return (0); } /* * These functions are used by protocols to notify the socket layer (and its * consumers) of state changes in the sockets driven by protocol-side events. */ /* * Procedures to manipulate state flags of socket and do appropriate wakeups. * * Normal sequence from the active (originating) side is that * soisconnecting() is called during processing of connect() call, resulting * in an eventual call to soisconnected() if/when the connection is * established. When the connection is torn down soisdisconnecting() is * called during processing of disconnect() call, and soisdisconnected() is * called when the connection to the peer is totally severed. The semantics * of these routines are such that connectionless protocols can call * soisconnected() and soisdisconnected() only, bypassing the in-progress * calls when setting up a ``connection'' takes no time. * * From the passive side, a socket is created with two queues of sockets: * so_incomp for connections in progress and so_comp for connections already * made and awaiting user acceptance. As a protocol is preparing incoming * connections, it creates a socket structure queued on so_incomp by calling * sonewconn(). When the connection is established, soisconnected() is * called, and transfers the socket structure to so_comp, making it available * to accept(). * * If a socket is closed with sockets on either so_incomp or so_comp, these * sockets are dropped. * * If higher-level protocols are implemented in the kernel, the wakeups done * here will sometimes cause software-interrupt process scheduling. */ void soisconnecting(struct socket *so) { SOCK_LOCK(so); so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); so->so_state |= SS_ISCONNECTING; SOCK_UNLOCK(so); } void soisconnected(struct socket *so) { bool last __diagused; SOCK_LOCK(so); so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING); so->so_state |= SS_ISCONNECTED; if (so->so_qstate == SQ_INCOMP) { struct socket *head = so->so_listen; int ret; KASSERT(head, ("%s: so %p on incomp of NULL", __func__, so)); /* * Promoting a socket from incomplete queue to complete, we * need to go through reverse order of locking. We first do * trylock, and if that doesn't succeed, we go the hard way * leaving a reference and rechecking consistency after proper * locking. */ if (__predict_false(SOLISTEN_TRYLOCK(head) == 0)) { soref(head); SOCK_UNLOCK(so); SOLISTEN_LOCK(head); SOCK_LOCK(so); if (__predict_false(head != so->so_listen)) { /* * The socket went off the listen queue, * should be lost race to close(2) of sol. * The socket is about to soabort(). */ SOCK_UNLOCK(so); sorele_locked(head); return; } last = refcount_release(&head->so_count); KASSERT(!last, ("%s: released last reference for %p", __func__, head)); } again: if ((so->so_options & SO_ACCEPTFILTER) == 0) { TAILQ_REMOVE(&head->sol_incomp, so, so_list); head->sol_incqlen--; TAILQ_INSERT_TAIL(&head->sol_comp, so, so_list); head->sol_qlen++; so->so_qstate = SQ_COMP; SOCK_UNLOCK(so); solisten_wakeup(head); /* unlocks */ } else { SOCK_RECVBUF_LOCK(so); soupcall_set(so, SO_RCV, head->sol_accept_filter->accf_callback, head->sol_accept_filter_arg); so->so_options &= ~SO_ACCEPTFILTER; ret = head->sol_accept_filter->accf_callback(so, head->sol_accept_filter_arg, M_NOWAIT); if (ret == SU_ISCONNECTED) { soupcall_clear(so, SO_RCV); SOCK_RECVBUF_UNLOCK(so); goto again; } SOCK_RECVBUF_UNLOCK(so); SOCK_UNLOCK(so); SOLISTEN_UNLOCK(head); } return; } SOCK_UNLOCK(so); wakeup(&so->so_timeo); sorwakeup(so); sowwakeup(so); } void soisdisconnecting(struct socket *so) { SOCK_LOCK(so); so->so_state &= ~SS_ISCONNECTING; so->so_state |= SS_ISDISCONNECTING; if (!SOLISTENING(so)) { SOCK_RECVBUF_LOCK(so); socantrcvmore_locked(so); SOCK_SENDBUF_LOCK(so); socantsendmore_locked(so); } SOCK_UNLOCK(so); wakeup(&so->so_timeo); } void soisdisconnected(struct socket *so) { SOCK_LOCK(so); /* * There is at least one reader of so_state that does not * acquire socket lock, namely soreceive_generic(). Ensure * that it never sees all flags that track connection status * cleared, by ordering the update with a barrier semantic of * our release thread fence. */ so->so_state |= SS_ISDISCONNECTED; atomic_thread_fence_rel(); so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); if (!SOLISTENING(so)) { SOCK_UNLOCK(so); SOCK_RECVBUF_LOCK(so); socantrcvmore_locked(so); SOCK_SENDBUF_LOCK(so); sbdrop_locked(&so->so_snd, sbused(&so->so_snd)); socantsendmore_locked(so); } else SOCK_UNLOCK(so); wakeup(&so->so_timeo); } int soiolock(struct socket *so, struct sx *sx, int flags) { int error; KASSERT((flags & SBL_VALID) == flags, ("soiolock: invalid flags %#x", flags)); if ((flags & SBL_WAIT) != 0) { if ((flags & SBL_NOINTR) != 0) { sx_xlock(sx); } else { error = sx_xlock_sig(sx); if (error != 0) return (error); } } else if (!sx_try_xlock(sx)) { return (EWOULDBLOCK); } if (__predict_false(SOLISTENING(so))) { sx_xunlock(sx); return (ENOTCONN); } return (0); } void soiounlock(struct sx *sx) { sx_xunlock(sx); } /* * Make a copy of a sockaddr in a malloced buffer of type M_SONAME. */ struct sockaddr * sodupsockaddr(const struct sockaddr *sa, int mflags) { struct sockaddr *sa2; sa2 = malloc(sa->sa_len, M_SONAME, mflags); if (sa2) bcopy(sa, sa2, sa->sa_len); return sa2; } /* * Register per-socket destructor. */ void sodtor_set(struct socket *so, so_dtor_t *func) { SOCK_LOCK_ASSERT(so); so->so_dtor = func; } /* * Register per-socket buffer upcalls. */ void soupcall_set(struct socket *so, sb_which which, so_upcall_t func, void *arg) { struct sockbuf *sb; KASSERT(!SOLISTENING(so), ("%s: so %p listening", __func__, so)); switch (which) { case SO_RCV: sb = &so->so_rcv; break; case SO_SND: sb = &so->so_snd; break; } SOCK_BUF_LOCK_ASSERT(so, which); sb->sb_upcall = func; sb->sb_upcallarg = arg; sb->sb_flags |= SB_UPCALL; } void soupcall_clear(struct socket *so, sb_which which) { struct sockbuf *sb; KASSERT(!SOLISTENING(so), ("%s: so %p listening", __func__, so)); switch (which) { case SO_RCV: sb = &so->so_rcv; break; case SO_SND: sb = &so->so_snd; break; } SOCK_BUF_LOCK_ASSERT(so, which); KASSERT(sb->sb_upcall != NULL, ("%s: so %p no upcall to clear", __func__, so)); sb->sb_upcall = NULL; sb->sb_upcallarg = NULL; sb->sb_flags &= ~SB_UPCALL; } void solisten_upcall_set(struct socket *so, so_upcall_t func, void *arg) { SOLISTEN_LOCK_ASSERT(so); so->sol_upcall = func; so->sol_upcallarg = arg; } static void so_rdknl_lock(void *arg) { struct socket *so = arg; retry: if (SOLISTENING(so)) { SOLISTEN_LOCK(so); } else { SOCK_RECVBUF_LOCK(so); if (__predict_false(SOLISTENING(so))) { SOCK_RECVBUF_UNLOCK(so); goto retry; } } } static void so_rdknl_unlock(void *arg) { struct socket *so = arg; if (SOLISTENING(so)) SOLISTEN_UNLOCK(so); else SOCK_RECVBUF_UNLOCK(so); } static void so_rdknl_assert_lock(void *arg, int what) { struct socket *so = arg; if (what == LA_LOCKED) { if (SOLISTENING(so)) SOLISTEN_LOCK_ASSERT(so); else SOCK_RECVBUF_LOCK_ASSERT(so); } else { if (SOLISTENING(so)) SOLISTEN_UNLOCK_ASSERT(so); else SOCK_RECVBUF_UNLOCK_ASSERT(so); } } static void so_wrknl_lock(void *arg) { struct socket *so = arg; retry: if (SOLISTENING(so)) { SOLISTEN_LOCK(so); } else { SOCK_SENDBUF_LOCK(so); if (__predict_false(SOLISTENING(so))) { SOCK_SENDBUF_UNLOCK(so); goto retry; } } } static void so_wrknl_unlock(void *arg) { struct socket *so = arg; if (SOLISTENING(so)) SOLISTEN_UNLOCK(so); else SOCK_SENDBUF_UNLOCK(so); } static void so_wrknl_assert_lock(void *arg, int what) { struct socket *so = arg; if (what == LA_LOCKED) { if (SOLISTENING(so)) SOLISTEN_LOCK_ASSERT(so); else SOCK_SENDBUF_LOCK_ASSERT(so); } else { if (SOLISTENING(so)) SOLISTEN_UNLOCK_ASSERT(so); else SOCK_SENDBUF_UNLOCK_ASSERT(so); } } /* * Create an external-format (``xsocket'') structure using the information in * the kernel-format socket structure pointed to by so. This is done to * reduce the spew of irrelevant information over this interface, to isolate * user code from changes in the kernel structure, and potentially to provide * information-hiding if we decide that some of this information should be * hidden from users. */ void sotoxsocket(struct socket *so, struct xsocket *xso) { bzero(xso, sizeof(*xso)); xso->xso_len = sizeof *xso; xso->xso_so = (uintptr_t)so; xso->so_type = so->so_type; xso->so_options = so->so_options; xso->so_linger = so->so_linger; xso->so_state = so->so_state; xso->so_pcb = (uintptr_t)so->so_pcb; xso->xso_protocol = so->so_proto->pr_protocol; xso->xso_family = so->so_proto->pr_domain->dom_family; xso->so_timeo = so->so_timeo; xso->so_error = so->so_error; xso->so_uid = so->so_cred->cr_uid; xso->so_pgid = so->so_sigio ? so->so_sigio->sio_pgid : 0; if (SOLISTENING(so)) { xso->so_qlen = so->sol_qlen; xso->so_incqlen = so->sol_incqlen; xso->so_qlimit = so->sol_qlimit; xso->so_oobmark = 0; } else { xso->so_state |= so->so_qstate; xso->so_qlen = xso->so_incqlen = xso->so_qlimit = 0; xso->so_oobmark = so->so_oobmark; sbtoxsockbuf(&so->so_snd, &xso->so_snd); sbtoxsockbuf(&so->so_rcv, &xso->so_rcv); } } struct sockbuf * so_sockbuf_rcv(struct socket *so) { return (&so->so_rcv); } struct sockbuf * so_sockbuf_snd(struct socket *so) { return (&so->so_snd); } int so_state_get(const struct socket *so) { return (so->so_state); } void so_state_set(struct socket *so, int val) { so->so_state = val; } int so_options_get(const struct socket *so) { return (so->so_options); } void so_options_set(struct socket *so, int val) { so->so_options = val; } int so_error_get(const struct socket *so) { return (so->so_error); } void so_error_set(struct socket *so, int val) { so->so_error = val; } int so_linger_get(const struct socket *so) { return (so->so_linger); } void so_linger_set(struct socket *so, int val) { KASSERT(val >= 0 && val <= USHRT_MAX && val <= (INT_MAX / hz), ("%s: val %d out of range", __func__, val)); so->so_linger = val; } struct protosw * so_protosw_get(const struct socket *so) { return (so->so_proto); } void so_protosw_set(struct socket *so, struct protosw *val) { so->so_proto = val; } void so_sorwakeup(struct socket *so) { sorwakeup(so); } void so_sowwakeup(struct socket *so) { sowwakeup(so); } void so_sorwakeup_locked(struct socket *so) { sorwakeup_locked(so); } void so_sowwakeup_locked(struct socket *so) { sowwakeup_locked(so); } void so_lock(struct socket *so) { SOCK_LOCK(so); } void so_unlock(struct socket *so) { SOCK_UNLOCK(so); } diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c index a7ffb6ef3254..0361144f2763 100644 --- a/sys/kern/uipc_syscalls.c +++ b/sys/kern/uipc_syscalls.c @@ -1,1603 +1,1586 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1989, 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include "opt_capsicum.h" #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ktrace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef COMPAT_43 #include #endif #include #include #include #ifdef KTRACE #include #endif #ifdef COMPAT_FREEBSD32 #include #endif #include #include #include static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp); static int accept1(struct thread *td, int s, struct sockaddr *uname, socklen_t *anamelen, int flags); static int sockargs(struct mbuf **, char *, socklen_t, int); /* * Convert a user file descriptor to a kernel file entry and check if required * capability rights are present. * If required copy of current set of capability rights is returned. * A reference on the file entry is held upon returning. */ int getsock_cap(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp, struct filecaps *havecapsp) { struct file *fp; int error; error = fget_cap(td, fd, rightsp, &fp, havecapsp); if (__predict_false(error != 0)) return (error); if (__predict_false(fp->f_type != DTYPE_SOCKET)) { fdrop(fp, td); if (havecapsp != NULL) filecaps_free(havecapsp); return (ENOTSOCK); } *fpp = fp; return (0); } int getsock(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) { struct file *fp; int error; error = fget_unlocked(td, fd, rightsp, &fp); if (__predict_false(error != 0)) return (error); if (__predict_false(fp->f_type != DTYPE_SOCKET)) { fdrop(fp, td); return (ENOTSOCK); } *fpp = fp; return (0); } /* * System call interface to the socket abstraction. */ #if defined(COMPAT_43) #define COMPAT_OLDSOCK #endif int sys_socket(struct thread *td, struct socket_args *uap) { return (kern_socket(td, uap->domain, uap->type, uap->protocol)); } int kern_socket(struct thread *td, int domain, int type, int protocol) { struct socket *so; struct file *fp; int fd, error, oflag, fflag; AUDIT_ARG_SOCKET(domain, type, protocol); oflag = 0; fflag = 0; if ((type & SOCK_CLOEXEC) != 0) { type &= ~SOCK_CLOEXEC; oflag |= O_CLOEXEC; } if ((type & SOCK_NONBLOCK) != 0) { type &= ~SOCK_NONBLOCK; fflag |= FNONBLOCK; } #ifdef MAC error = mac_socket_check_create(td->td_ucred, domain, type, protocol); if (error != 0) return (error); #endif error = falloc(td, &fp, &fd, oflag); if (error != 0) return (error); /* An extra reference on `fp' has been held for us by falloc(). */ error = socreate(domain, &so, type, protocol, td->td_ucred, td); if (error != 0) { fdclose(td, fp, fd); } else { finit(fp, FREAD | FWRITE | fflag, DTYPE_SOCKET, so, &socketops); if ((fflag & FNONBLOCK) != 0) (void) fo_ioctl(fp, FIONBIO, &fflag, td->td_ucred, td); td->td_retval[0] = fd; } fdrop(fp, td); return (error); } int sys_bind(struct thread *td, struct bind_args *uap) { struct sockaddr *sa; int error; error = getsockaddr(&sa, uap->name, uap->namelen); if (error == 0) { error = kern_bindat(td, AT_FDCWD, uap->s, sa); free(sa, M_SONAME); } return (error); } int kern_bindat(struct thread *td, int dirfd, int fd, struct sockaddr *sa) { struct socket *so; struct file *fp; int error; #ifdef CAPABILITY_MODE if (IN_CAPABILITY_MODE(td) && (dirfd == AT_FDCWD)) return (ECAPMODE); #endif AUDIT_ARG_FD(fd); AUDIT_ARG_SOCKADDR(td, dirfd, sa); error = getsock(td, fd, &cap_bind_rights, &fp); if (error != 0) return (error); so = fp->f_data; #ifdef KTRACE if (KTRPOINT(td, KTR_STRUCT)) ktrsockaddr(sa); #endif #ifdef MAC error = mac_socket_check_bind(td->td_ucred, so, sa); if (error == 0) { #endif if (dirfd == AT_FDCWD) error = sobind(so, sa, td); else error = sobindat(dirfd, so, sa, td); #ifdef MAC } #endif fdrop(fp, td); return (error); } int sys_bindat(struct thread *td, struct bindat_args *uap) { struct sockaddr *sa; int error; error = getsockaddr(&sa, uap->name, uap->namelen); if (error == 0) { error = kern_bindat(td, uap->fd, uap->s, sa); free(sa, M_SONAME); } return (error); } int sys_listen(struct thread *td, struct listen_args *uap) { return (kern_listen(td, uap->s, uap->backlog)); } int kern_listen(struct thread *td, int s, int backlog) { struct socket *so; struct file *fp; int error; AUDIT_ARG_FD(s); error = getsock(td, s, &cap_listen_rights, &fp); if (error == 0) { so = fp->f_data; #ifdef MAC error = mac_socket_check_listen(td->td_ucred, so); if (error == 0) #endif error = solisten(so, backlog, td); fdrop(fp, td); } return (error); } /* * accept1() */ static int accept1(struct thread *td, int s, struct sockaddr *uname, socklen_t *anamelen, int flags) { - struct sockaddr *name; - socklen_t namelen; + struct sockaddr_storage ss = { .ss_len = sizeof(ss) }; + socklen_t addrlen; struct file *fp; int error; - if (uname == NULL) - return (kern_accept4(td, s, NULL, NULL, flags, NULL)); - - error = copyin(anamelen, &namelen, sizeof (namelen)); - if (error != 0) - return (error); + if (uname != NULL) { + error = copyin(anamelen, &addrlen, sizeof(addrlen)); + if (error != 0) + return (error); + } - error = kern_accept4(td, s, &name, &namelen, flags, &fp); + error = kern_accept4(td, s, (struct sockaddr *)&ss, flags, &fp); if (error != 0) return (error); #ifdef COMPAT_OLDSOCK if (SV_PROC_FLAG(td->td_proc, SV_AOUT) && (flags & ACCEPT4_COMPAT) != 0) - ((struct osockaddr *)name)->sa_family = - name->sa_family; + ((struct osockaddr *)&ss)->sa_family = ss.ss_family; #endif - error = copyout(name, uname, namelen); - if (error == 0) - error = copyout(&namelen, anamelen, - sizeof(namelen)); + if (uname != NULL) { + addrlen = min(ss.ss_len, addrlen); + error = copyout(&ss, uname, addrlen); + if (error == 0) { + addrlen = ss.ss_len; + error = copyout(&addrlen, anamelen, sizeof(addrlen)); + } + } if (error != 0) fdclose(td, fp, td->td_retval[0]); fdrop(fp, td); - free(name, M_SONAME); + return (error); } int -kern_accept(struct thread *td, int s, struct sockaddr **name, - socklen_t *namelen, struct file **fp) +kern_accept(struct thread *td, int s, struct sockaddr *sa, struct file **fp) { - return (kern_accept4(td, s, name, namelen, ACCEPT4_INHERIT, fp)); + return (kern_accept4(td, s, sa, ACCEPT4_INHERIT, fp)); } int -kern_accept4(struct thread *td, int s, struct sockaddr **name, - socklen_t *namelen, int flags, struct file **fp) +kern_accept4(struct thread *td, int s, struct sockaddr *sa, int flags, + struct file **fp) { struct file *headfp, *nfp = NULL; - struct sockaddr *sa = NULL; struct socket *head, *so; struct filecaps fcaps; u_int fflag; pid_t pgid; int error, fd, tmp; - if (name != NULL) - *name = NULL; - AUDIT_ARG_FD(s); error = getsock_cap(td, s, &cap_accept_rights, &headfp, &fcaps); if (error != 0) return (error); fflag = atomic_load_int(&headfp->f_flag); head = headfp->f_data; if (!SOLISTENING(head)) { error = EINVAL; goto done; } #ifdef MAC error = mac_socket_check_accept(td->td_ucred, head); if (error != 0) goto done; #endif error = falloc_caps(td, &nfp, &fd, (flags & SOCK_CLOEXEC) ? O_CLOEXEC : 0, &fcaps); if (error != 0) goto done; SOCK_LOCK(head); if (!SOLISTENING(head)) { SOCK_UNLOCK(head); error = EINVAL; goto noconnection; } error = solisten_dequeue(head, &so, flags); if (error != 0) goto noconnection; /* An extra reference on `nfp' has been held for us by falloc(). */ td->td_retval[0] = fd; /* Connection has been removed from the listen queue. */ KNOTE_UNLOCKED(&head->so_rdsel.si_note, 0); if (flags & ACCEPT4_INHERIT) { pgid = fgetown(&head->so_sigio); if (pgid != 0) fsetown(pgid, &so->so_sigio); } else { fflag &= ~(FNONBLOCK | FASYNC); if (flags & SOCK_NONBLOCK) fflag |= FNONBLOCK; } finit(nfp, fflag, DTYPE_SOCKET, so, &socketops); /* Sync socket nonblocking/async state with file flags */ tmp = fflag & FNONBLOCK; (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td); tmp = fflag & FASYNC; (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td); - error = soaccept(so, &sa); - if (error != 0) - goto noconnection; - if (sa == NULL) { - if (name) - *namelen = 0; - goto done; - } - AUDIT_ARG_SOCKADDR(td, AT_FDCWD, sa); - if (name) { - /* check sa_len before it is destroyed */ - if (*namelen > sa->sa_len) - *namelen = sa->sa_len; + + if ((error = soaccept(so, sa)) == 0) { + AUDIT_ARG_SOCKADDR(td, AT_FDCWD, sa); #ifdef KTRACE if (KTRPOINT(td, KTR_STRUCT)) ktrsockaddr(sa); #endif - *name = sa; - sa = NULL; } noconnection: - free(sa, M_SONAME); - /* * close the new descriptor, assuming someone hasn't ripped it * out from under us. */ if (error != 0) fdclose(td, nfp, fd); /* * Release explicitly held references before returning. We return * a reference on nfp to the caller on success if they request it. */ done: if (nfp == NULL) filecaps_free(&fcaps); if (fp != NULL) { if (error == 0) { *fp = nfp; nfp = NULL; } else *fp = NULL; } if (nfp != NULL) fdrop(nfp, td); fdrop(headfp, td); return (error); } int sys_accept(struct thread *td, struct accept_args *uap) { return (accept1(td, uap->s, uap->name, uap->anamelen, ACCEPT4_INHERIT)); } int sys_accept4(struct thread *td, struct accept4_args *uap) { if (uap->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) return (EINVAL); return (accept1(td, uap->s, uap->name, uap->anamelen, uap->flags)); } #ifdef COMPAT_OLDSOCK int oaccept(struct thread *td, struct oaccept_args *uap) { return (accept1(td, uap->s, uap->name, uap->anamelen, ACCEPT4_INHERIT | ACCEPT4_COMPAT)); } #endif /* COMPAT_OLDSOCK */ int sys_connect(struct thread *td, struct connect_args *uap) { struct sockaddr *sa; int error; error = getsockaddr(&sa, uap->name, uap->namelen); if (error == 0) { error = kern_connectat(td, AT_FDCWD, uap->s, sa); free(sa, M_SONAME); } return (error); } int kern_connectat(struct thread *td, int dirfd, int fd, struct sockaddr *sa) { struct socket *so; struct file *fp; int error; #ifdef CAPABILITY_MODE if (IN_CAPABILITY_MODE(td) && (dirfd == AT_FDCWD)) return (ECAPMODE); #endif AUDIT_ARG_FD(fd); AUDIT_ARG_SOCKADDR(td, dirfd, sa); error = getsock(td, fd, &cap_connect_rights, &fp); if (error != 0) return (error); so = fp->f_data; if (so->so_state & SS_ISCONNECTING) { error = EALREADY; goto done1; } #ifdef KTRACE if (KTRPOINT(td, KTR_STRUCT)) ktrsockaddr(sa); #endif #ifdef MAC error = mac_socket_check_connect(td->td_ucred, so, sa); if (error != 0) goto bad; #endif error = soconnectat(dirfd, so, sa, td); if (error != 0) goto bad; if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { error = EINPROGRESS; goto done1; } SOCK_LOCK(so); while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { error = msleep(&so->so_timeo, &so->so_lock, PSOCK | PCATCH, "connec", 0); if (error != 0) break; } if (error == 0) { error = so->so_error; so->so_error = 0; } SOCK_UNLOCK(so); bad: if (error == ERESTART) error = EINTR; done1: fdrop(fp, td); return (error); } int sys_connectat(struct thread *td, struct connectat_args *uap) { struct sockaddr *sa; int error; error = getsockaddr(&sa, uap->name, uap->namelen); if (error == 0) { error = kern_connectat(td, uap->fd, uap->s, sa); free(sa, M_SONAME); } return (error); } int kern_socketpair(struct thread *td, int domain, int type, int protocol, int *rsv) { struct file *fp1, *fp2; struct socket *so1, *so2; int fd, error, oflag, fflag; AUDIT_ARG_SOCKET(domain, type, protocol); oflag = 0; fflag = 0; if ((type & SOCK_CLOEXEC) != 0) { type &= ~SOCK_CLOEXEC; oflag |= O_CLOEXEC; } if ((type & SOCK_NONBLOCK) != 0) { type &= ~SOCK_NONBLOCK; fflag |= FNONBLOCK; } #ifdef MAC /* We might want to have a separate check for socket pairs. */ error = mac_socket_check_create(td->td_ucred, domain, type, protocol); if (error != 0) return (error); #endif error = socreate(domain, &so1, type, protocol, td->td_ucred, td); if (error != 0) return (error); error = socreate(domain, &so2, type, protocol, td->td_ucred, td); if (error != 0) goto free1; /* On success extra reference to `fp1' and 'fp2' is set by falloc. */ error = falloc(td, &fp1, &fd, oflag); if (error != 0) goto free2; rsv[0] = fd; fp1->f_data = so1; /* so1 already has ref count */ error = falloc(td, &fp2, &fd, oflag); if (error != 0) goto free3; fp2->f_data = so2; /* so2 already has ref count */ rsv[1] = fd; error = soconnect2(so1, so2); if (error != 0) goto free4; if (type == SOCK_DGRAM) { /* * Datagram socket connection is asymmetric. */ error = soconnect2(so2, so1); if (error != 0) goto free4; } else if (so1->so_proto->pr_flags & PR_CONNREQUIRED) { struct unpcb *unp, *unp2; unp = sotounpcb(so1); unp2 = sotounpcb(so2); /* * No need to lock the unps, because the sockets are brand-new. * No other threads can be using them yet */ unp_copy_peercred(td, unp, unp2, unp); } finit(fp1, FREAD | FWRITE | fflag, DTYPE_SOCKET, fp1->f_data, &socketops); finit(fp2, FREAD | FWRITE | fflag, DTYPE_SOCKET, fp2->f_data, &socketops); if ((fflag & FNONBLOCK) != 0) { (void) fo_ioctl(fp1, FIONBIO, &fflag, td->td_ucred, td); (void) fo_ioctl(fp2, FIONBIO, &fflag, td->td_ucred, td); } fdrop(fp1, td); fdrop(fp2, td); return (0); free4: fdclose(td, fp2, rsv[1]); fdrop(fp2, td); free3: fdclose(td, fp1, rsv[0]); fdrop(fp1, td); free2: if (so2 != NULL) (void)soclose(so2); free1: if (so1 != NULL) (void)soclose(so1); return (error); } int sys_socketpair(struct thread *td, struct socketpair_args *uap) { int error, sv[2]; error = kern_socketpair(td, uap->domain, uap->type, uap->protocol, sv); if (error != 0) return (error); error = copyout(sv, uap->rsv, 2 * sizeof(int)); if (error != 0) { (void)kern_close(td, sv[0]); (void)kern_close(td, sv[1]); } return (error); } static int sendit(struct thread *td, int s, struct msghdr *mp, int flags) { struct mbuf *control; struct sockaddr *to; int error; #ifdef CAPABILITY_MODE if (IN_CAPABILITY_MODE(td) && (mp->msg_name != NULL)) return (ECAPMODE); #endif if (mp->msg_name != NULL) { error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); if (error != 0) { to = NULL; goto bad; } mp->msg_name = to; } else { to = NULL; } if (mp->msg_control) { if (mp->msg_controllen < sizeof(struct cmsghdr) #ifdef COMPAT_OLDSOCK && (mp->msg_flags != MSG_COMPAT || !SV_PROC_FLAG(td->td_proc, SV_AOUT)) #endif ) { error = EINVAL; goto bad; } error = sockargs(&control, mp->msg_control, mp->msg_controllen, MT_CONTROL); if (error != 0) goto bad; #ifdef COMPAT_OLDSOCK if (mp->msg_flags == MSG_COMPAT && SV_PROC_FLAG(td->td_proc, SV_AOUT)) { struct cmsghdr *cm; M_PREPEND(control, sizeof(*cm), M_WAITOK); cm = mtod(control, struct cmsghdr *); cm->cmsg_len = control->m_len; cm->cmsg_level = SOL_SOCKET; cm->cmsg_type = SCM_RIGHTS; } #endif } else { control = NULL; } error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE); bad: free(to, M_SONAME); return (error); } int kern_sendit(struct thread *td, int s, struct msghdr *mp, int flags, struct mbuf *control, enum uio_seg segflg) { struct file *fp; struct uio auio; struct iovec *iov; struct socket *so; cap_rights_t *rights; #ifdef KTRACE struct uio *ktruio = NULL; #endif ssize_t len; int i, error; AUDIT_ARG_FD(s); rights = &cap_send_rights; if (mp->msg_name != NULL) { AUDIT_ARG_SOCKADDR(td, AT_FDCWD, mp->msg_name); rights = &cap_send_connect_rights; } error = getsock(td, s, rights, &fp); if (error != 0) { m_freem(control); return (error); } so = (struct socket *)fp->f_data; #ifdef KTRACE if (mp->msg_name != NULL && KTRPOINT(td, KTR_STRUCT)) ktrsockaddr(mp->msg_name); #endif #ifdef MAC if (mp->msg_name != NULL) { error = mac_socket_check_connect(td->td_ucred, so, mp->msg_name); if (error != 0) { m_freem(control); goto bad; } } error = mac_socket_check_send(td->td_ucred, so); if (error != 0) { m_freem(control); goto bad; } #endif auio.uio_iov = mp->msg_iov; auio.uio_iovcnt = mp->msg_iovlen; auio.uio_segflg = segflg; auio.uio_rw = UIO_WRITE; auio.uio_td = td; auio.uio_offset = 0; /* XXX */ auio.uio_resid = 0; iov = mp->msg_iov; for (i = 0; i < mp->msg_iovlen; i++, iov++) { if ((auio.uio_resid += iov->iov_len) < 0) { error = EINVAL; m_freem(control); goto bad; } } #ifdef KTRACE if (KTRPOINT(td, KTR_GENIO)) ktruio = cloneuio(&auio); #endif len = auio.uio_resid; error = sousrsend(so, mp->msg_name, &auio, control, flags, NULL); if (error == 0) td->td_retval[0] = len - auio.uio_resid; #ifdef KTRACE if (ktruio != NULL) { ktruio->uio_resid = td->td_retval[0]; ktrgenio(s, UIO_WRITE, ktruio, error); } #endif bad: fdrop(fp, td); return (error); } int sys_sendto(struct thread *td, struct sendto_args *uap) { struct msghdr msg; struct iovec aiov; msg.msg_name = __DECONST(void *, uap->to); msg.msg_namelen = uap->tolen; msg.msg_iov = &aiov; msg.msg_iovlen = 1; msg.msg_control = 0; #ifdef COMPAT_OLDSOCK if (SV_PROC_FLAG(td->td_proc, SV_AOUT)) msg.msg_flags = 0; #endif aiov.iov_base = __DECONST(void *, uap->buf); aiov.iov_len = uap->len; return (sendit(td, uap->s, &msg, uap->flags)); } #ifdef COMPAT_OLDSOCK int osend(struct thread *td, struct osend_args *uap) { struct msghdr msg; struct iovec aiov; msg.msg_name = 0; msg.msg_namelen = 0; msg.msg_iov = &aiov; msg.msg_iovlen = 1; aiov.iov_base = __DECONST(void *, uap->buf); aiov.iov_len = uap->len; msg.msg_control = 0; msg.msg_flags = 0; return (sendit(td, uap->s, &msg, uap->flags)); } int osendmsg(struct thread *td, struct osendmsg_args *uap) { struct msghdr msg; struct iovec *iov; int error; error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); if (error != 0) return (error); error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); if (error != 0) return (error); msg.msg_iov = iov; msg.msg_flags = MSG_COMPAT; error = sendit(td, uap->s, &msg, uap->flags); free(iov, M_IOV); return (error); } #endif int sys_sendmsg(struct thread *td, struct sendmsg_args *uap) { struct msghdr msg; struct iovec *iov; int error; error = copyin(uap->msg, &msg, sizeof (msg)); if (error != 0) return (error); error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); if (error != 0) return (error); msg.msg_iov = iov; #ifdef COMPAT_OLDSOCK if (SV_PROC_FLAG(td->td_proc, SV_AOUT)) msg.msg_flags = 0; #endif error = sendit(td, uap->s, &msg, uap->flags); free(iov, M_IOV); return (error); } int kern_recvit(struct thread *td, int s, struct msghdr *mp, enum uio_seg fromseg, struct mbuf **controlp) { struct uio auio; struct iovec *iov; struct mbuf *control, *m; caddr_t ctlbuf; struct file *fp; struct socket *so; struct sockaddr *fromsa = NULL; #ifdef KTRACE struct uio *ktruio = NULL; #endif ssize_t len; int error, i; if (controlp != NULL) *controlp = NULL; AUDIT_ARG_FD(s); error = getsock(td, s, &cap_recv_rights, &fp); if (error != 0) return (error); so = fp->f_data; #ifdef MAC error = mac_socket_check_receive(td->td_ucred, so); if (error != 0) { fdrop(fp, td); return (error); } #endif auio.uio_iov = mp->msg_iov; auio.uio_iovcnt = mp->msg_iovlen; auio.uio_segflg = UIO_USERSPACE; auio.uio_rw = UIO_READ; auio.uio_td = td; auio.uio_offset = 0; /* XXX */ auio.uio_resid = 0; iov = mp->msg_iov; for (i = 0; i < mp->msg_iovlen; i++, iov++) { if ((auio.uio_resid += iov->iov_len) < 0) { fdrop(fp, td); return (EINVAL); } } #ifdef KTRACE if (KTRPOINT(td, KTR_GENIO)) ktruio = cloneuio(&auio); #endif control = NULL; len = auio.uio_resid; error = soreceive(so, &fromsa, &auio, NULL, (mp->msg_control || controlp) ? &control : NULL, &mp->msg_flags); if (error != 0) { if (auio.uio_resid != len && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) error = 0; } if (fromsa != NULL) AUDIT_ARG_SOCKADDR(td, AT_FDCWD, fromsa); #ifdef KTRACE if (ktruio != NULL) { /* MSG_TRUNC can trigger underflow of uio_resid. */ ktruio->uio_resid = MIN(len - auio.uio_resid, len); ktrgenio(s, UIO_READ, ktruio, error); } #endif if (error != 0) goto out; td->td_retval[0] = len - auio.uio_resid; if (mp->msg_name) { len = mp->msg_namelen; if (len <= 0 || fromsa == NULL) len = 0; else { /* save sa_len before it is destroyed by MSG_COMPAT */ len = MIN(len, fromsa->sa_len); #ifdef COMPAT_OLDSOCK if ((mp->msg_flags & MSG_COMPAT) != 0 && SV_PROC_FLAG(td->td_proc, SV_AOUT)) ((struct osockaddr *)fromsa)->sa_family = fromsa->sa_family; #endif if (fromseg == UIO_USERSPACE) { error = copyout(fromsa, mp->msg_name, (unsigned)len); if (error != 0) goto out; } else bcopy(fromsa, mp->msg_name, len); } mp->msg_namelen = len; } if (mp->msg_control && controlp == NULL) { #ifdef COMPAT_OLDSOCK /* * We assume that old recvmsg calls won't receive access * rights and other control info, esp. as control info * is always optional and those options didn't exist in 4.3. * If we receive rights, trim the cmsghdr; anything else * is tossed. */ if (control && (mp->msg_flags & MSG_COMPAT) != 0 && SV_PROC_FLAG(td->td_proc, SV_AOUT)) { if (mtod(control, struct cmsghdr *)->cmsg_level != SOL_SOCKET || mtod(control, struct cmsghdr *)->cmsg_type != SCM_RIGHTS) { mp->msg_controllen = 0; goto out; } control->m_len -= sizeof (struct cmsghdr); control->m_data += sizeof (struct cmsghdr); } #endif ctlbuf = mp->msg_control; len = mp->msg_controllen; mp->msg_controllen = 0; for (m = control; m != NULL && len >= m->m_len; m = m->m_next) { if ((error = copyout(mtod(m, caddr_t), ctlbuf, m->m_len)) != 0) goto out; ctlbuf += m->m_len; len -= m->m_len; mp->msg_controllen += m->m_len; } if (m != NULL) { mp->msg_flags |= MSG_CTRUNC; m_dispose_extcontrolm(m); } } out: fdrop(fp, td); #ifdef KTRACE if (fromsa && KTRPOINT(td, KTR_STRUCT)) ktrsockaddr(fromsa); #endif free(fromsa, M_SONAME); if (error == 0 && controlp != NULL) *controlp = control; else if (control != NULL) { if (error != 0) m_dispose_extcontrolm(control); m_freem(control); } return (error); } static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp) { int error; error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL); if (error != 0) return (error); if (namelenp != NULL) { error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t)); #ifdef COMPAT_OLDSOCK if ((mp->msg_flags & MSG_COMPAT) != 0 && SV_PROC_FLAG(td->td_proc, SV_AOUT)) error = 0; /* old recvfrom didn't check */ #endif } return (error); } static int kern_recvfrom(struct thread *td, int s, void *buf, size_t len, int flags, struct sockaddr *from, socklen_t *fromlenaddr) { struct msghdr msg; struct iovec aiov; int error; if (fromlenaddr != NULL) { error = copyin(fromlenaddr, &msg.msg_namelen, sizeof (msg.msg_namelen)); if (error != 0) goto done2; } else { msg.msg_namelen = 0; } msg.msg_name = from; msg.msg_iov = &aiov; msg.msg_iovlen = 1; aiov.iov_base = buf; aiov.iov_len = len; msg.msg_control = 0; msg.msg_flags = flags; error = recvit(td, s, &msg, fromlenaddr); done2: return (error); } int sys_recvfrom(struct thread *td, struct recvfrom_args *uap) { return (kern_recvfrom(td, uap->s, uap->buf, uap->len, uap->flags, uap->from, uap->fromlenaddr)); } #ifdef COMPAT_OLDSOCK int orecvfrom(struct thread *td, struct orecvfrom_args *uap) { return (kern_recvfrom(td, uap->s, uap->buf, uap->len, uap->flags | MSG_COMPAT, uap->from, uap->fromlenaddr)); } #endif #ifdef COMPAT_OLDSOCK int orecv(struct thread *td, struct orecv_args *uap) { struct msghdr msg; struct iovec aiov; msg.msg_name = 0; msg.msg_namelen = 0; msg.msg_iov = &aiov; msg.msg_iovlen = 1; aiov.iov_base = uap->buf; aiov.iov_len = uap->len; msg.msg_control = 0; msg.msg_flags = uap->flags; return (recvit(td, uap->s, &msg, NULL)); } /* * Old recvmsg. This code takes advantage of the fact that the old msghdr * overlays the new one, missing only the flags, and with the (old) access * rights where the control fields are now. */ int orecvmsg(struct thread *td, struct orecvmsg_args *uap) { struct msghdr msg; struct iovec *iov; int error; error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); if (error != 0) return (error); error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); if (error != 0) return (error); msg.msg_flags = uap->flags | MSG_COMPAT; msg.msg_iov = iov; error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen); if (msg.msg_controllen && error == 0) error = copyout(&msg.msg_controllen, &uap->msg->msg_accrightslen, sizeof (int)); free(iov, M_IOV); return (error); } #endif int sys_recvmsg(struct thread *td, struct recvmsg_args *uap) { struct msghdr msg; struct iovec *uiov, *iov; int error; error = copyin(uap->msg, &msg, sizeof (msg)); if (error != 0) return (error); error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); if (error != 0) return (error); msg.msg_flags = uap->flags; #ifdef COMPAT_OLDSOCK if (SV_PROC_FLAG(td->td_proc, SV_AOUT)) msg.msg_flags &= ~MSG_COMPAT; #endif uiov = msg.msg_iov; msg.msg_iov = iov; error = recvit(td, uap->s, &msg, NULL); if (error == 0) { msg.msg_iov = uiov; error = copyout(&msg, uap->msg, sizeof(msg)); } free(iov, M_IOV); return (error); } int sys_shutdown(struct thread *td, struct shutdown_args *uap) { return (kern_shutdown(td, uap->s, uap->how)); } int kern_shutdown(struct thread *td, int s, int how) { struct socket *so; struct file *fp; int error; AUDIT_ARG_FD(s); error = getsock(td, s, &cap_shutdown_rights, &fp); if (error == 0) { so = fp->f_data; error = soshutdown(so, how); /* * Previous versions did not return ENOTCONN, but 0 in * case the socket was not connected. Some important * programs like syslogd up to r279016, 2015-02-19, * still depend on this behavior. */ if (error == ENOTCONN && td->td_proc->p_osrel < P_OSREL_SHUTDOWN_ENOTCONN) error = 0; fdrop(fp, td); } return (error); } int sys_setsockopt(struct thread *td, struct setsockopt_args *uap) { return (kern_setsockopt(td, uap->s, uap->level, uap->name, uap->val, UIO_USERSPACE, uap->valsize)); } int kern_setsockopt(struct thread *td, int s, int level, int name, const void *val, enum uio_seg valseg, socklen_t valsize) { struct socket *so; struct file *fp; struct sockopt sopt; int error; if (val == NULL && valsize != 0) return (EFAULT); if ((int)valsize < 0) return (EINVAL); sopt.sopt_dir = SOPT_SET; sopt.sopt_level = level; sopt.sopt_name = name; sopt.sopt_val = __DECONST(void *, val); sopt.sopt_valsize = valsize; switch (valseg) { case UIO_USERSPACE: sopt.sopt_td = td; break; case UIO_SYSSPACE: sopt.sopt_td = NULL; break; default: panic("kern_setsockopt called with bad valseg"); } AUDIT_ARG_FD(s); error = getsock(td, s, &cap_setsockopt_rights, &fp); if (error == 0) { so = fp->f_data; error = sosetopt(so, &sopt); fdrop(fp, td); } return(error); } int sys_getsockopt(struct thread *td, struct getsockopt_args *uap) { socklen_t valsize; int error; if (uap->val) { error = copyin(uap->avalsize, &valsize, sizeof (valsize)); if (error != 0) return (error); } error = kern_getsockopt(td, uap->s, uap->level, uap->name, uap->val, UIO_USERSPACE, &valsize); if (error == 0) error = copyout(&valsize, uap->avalsize, sizeof (valsize)); return (error); } /* * Kernel version of getsockopt. * optval can be a userland or userspace. optlen is always a kernel pointer. */ int kern_getsockopt(struct thread *td, int s, int level, int name, void *val, enum uio_seg valseg, socklen_t *valsize) { struct socket *so; struct file *fp; struct sockopt sopt; int error; if (val == NULL) *valsize = 0; if ((int)*valsize < 0) return (EINVAL); sopt.sopt_dir = SOPT_GET; sopt.sopt_level = level; sopt.sopt_name = name; sopt.sopt_val = val; sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */ switch (valseg) { case UIO_USERSPACE: sopt.sopt_td = td; break; case UIO_SYSSPACE: sopt.sopt_td = NULL; break; default: panic("kern_getsockopt called with bad valseg"); } AUDIT_ARG_FD(s); error = getsock(td, s, &cap_getsockopt_rights, &fp); if (error == 0) { so = fp->f_data; error = sogetopt(so, &sopt); *valsize = sopt.sopt_valsize; fdrop(fp, td); } return (error); } static int user_getsockname(struct thread *td, int fdes, struct sockaddr *asa, socklen_t *alen, bool compat) { struct sockaddr *sa; socklen_t len; int error; error = copyin(alen, &len, sizeof(len)); if (error != 0) return (error); error = kern_getsockname(td, fdes, &sa, &len); if (error != 0) return (error); if (len != 0) { #ifdef COMPAT_OLDSOCK if (compat && SV_PROC_FLAG(td->td_proc, SV_AOUT)) ((struct osockaddr *)sa)->sa_family = sa->sa_family; #endif error = copyout(sa, asa, len); } free(sa, M_SONAME); if (error == 0) error = copyout(&len, alen, sizeof(len)); return (error); } int kern_getsockname(struct thread *td, int fd, struct sockaddr **sa, socklen_t *alen) { struct socket *so; struct file *fp; socklen_t len; int error; AUDIT_ARG_FD(fd); error = getsock(td, fd, &cap_getsockname_rights, &fp); if (error != 0) return (error); so = fp->f_data; *sa = NULL; CURVNET_SET(so->so_vnet); error = so->so_proto->pr_sockaddr(so, sa); CURVNET_RESTORE(); if (error != 0) goto bad; if (*sa == NULL) len = 0; else len = MIN(*alen, (*sa)->sa_len); *alen = len; #ifdef KTRACE if (KTRPOINT(td, KTR_STRUCT)) ktrsockaddr(*sa); #endif bad: fdrop(fp, td); if (error != 0 && *sa != NULL) { free(*sa, M_SONAME); *sa = NULL; } return (error); } int sys_getsockname(struct thread *td, struct getsockname_args *uap) { return (user_getsockname(td, uap->fdes, uap->asa, uap->alen, false)); } #ifdef COMPAT_OLDSOCK int ogetsockname(struct thread *td, struct ogetsockname_args *uap) { return (user_getsockname(td, uap->fdes, uap->asa, uap->alen, true)); } #endif /* COMPAT_OLDSOCK */ static int user_getpeername(struct thread *td, int fdes, struct sockaddr *asa, socklen_t *alen, bool compat) { struct sockaddr *sa; socklen_t len; int error; error = copyin(alen, &len, sizeof (len)); if (error != 0) return (error); error = kern_getpeername(td, fdes, &sa, &len); if (error != 0) return (error); if (len != 0) { #ifdef COMPAT_OLDSOCK if (compat && SV_PROC_FLAG(td->td_proc, SV_AOUT)) ((struct osockaddr *)sa)->sa_family = sa->sa_family; #endif error = copyout(sa, asa, len); } free(sa, M_SONAME); if (error == 0) error = copyout(&len, alen, sizeof(len)); return (error); } int kern_getpeername(struct thread *td, int fd, struct sockaddr **sa, socklen_t *alen) { struct socket *so; struct file *fp; socklen_t len; int error; AUDIT_ARG_FD(fd); error = getsock(td, fd, &cap_getpeername_rights, &fp); if (error != 0) return (error); so = fp->f_data; if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { error = ENOTCONN; goto done; } *sa = NULL; CURVNET_SET(so->so_vnet); error = so->so_proto->pr_peeraddr(so, sa); CURVNET_RESTORE(); if (error != 0) goto bad; if (*sa == NULL) len = 0; else len = MIN(*alen, (*sa)->sa_len); *alen = len; #ifdef KTRACE if (KTRPOINT(td, KTR_STRUCT)) ktrsockaddr(*sa); #endif bad: if (error != 0 && *sa != NULL) { free(*sa, M_SONAME); *sa = NULL; } done: fdrop(fp, td); return (error); } int sys_getpeername(struct thread *td, struct getpeername_args *uap) { return (user_getpeername(td, uap->fdes, uap->asa, uap->alen, false)); } #ifdef COMPAT_OLDSOCK int ogetpeername(struct thread *td, struct ogetpeername_args *uap) { return (user_getpeername(td, uap->fdes, uap->asa, uap->alen, true)); } #endif /* COMPAT_OLDSOCK */ static int sockargs(struct mbuf **mp, char *buf, socklen_t buflen, int type) { struct sockaddr *sa; struct mbuf *m; int error; if (buflen > MLEN) { #ifdef COMPAT_OLDSOCK if (type == MT_SONAME && buflen <= 112 && SV_CURPROC_FLAG(SV_AOUT)) buflen = MLEN; /* unix domain compat. hack */ else #endif if (buflen > MCLBYTES) return (EMSGSIZE); } m = m_get2(buflen, M_WAITOK, type, 0); m->m_len = buflen; error = copyin(buf, mtod(m, void *), buflen); if (error != 0) (void) m_free(m); else { *mp = m; if (type == MT_SONAME) { sa = mtod(m, struct sockaddr *); #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN if (sa->sa_family == 0 && sa->sa_len < AF_MAX && SV_CURPROC_FLAG(SV_AOUT)) sa->sa_family = sa->sa_len; #endif sa->sa_len = buflen; } } return (error); } int getsockaddr(struct sockaddr **namp, const struct sockaddr *uaddr, size_t len) { struct sockaddr *sa; int error; if (len > SOCK_MAXADDRLEN) return (ENAMETOOLONG); if (len < offsetof(struct sockaddr, sa_data[0])) return (EINVAL); sa = malloc(len, M_SONAME, M_WAITOK); error = copyin(uaddr, sa, len); if (error != 0) { free(sa, M_SONAME); } else { #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN if (sa->sa_family == 0 && sa->sa_len < AF_MAX && SV_CURPROC_FLAG(SV_AOUT)) sa->sa_family = sa->sa_len; #endif sa->sa_len = len; *namp = sa; } return (error); } /* * Dispose of externalized rights from an SCM_RIGHTS message. This function * should be used in error or truncation cases to avoid leaking file descriptors * into the recipient's (the current thread's) table. */ void m_dispose_extcontrolm(struct mbuf *m) { struct cmsghdr *cm; struct file *fp; struct thread *td; socklen_t clen, datalen; int error, fd, *fds, nfd; td = curthread; for (; m != NULL; m = m->m_next) { if (m->m_type != MT_EXTCONTROL) continue; cm = mtod(m, struct cmsghdr *); clen = m->m_len; while (clen > 0) { if (clen < sizeof(*cm)) panic("%s: truncated mbuf %p", __func__, m); datalen = CMSG_SPACE(cm->cmsg_len - CMSG_SPACE(0)); if (clen < datalen) panic("%s: truncated mbuf %p", __func__, m); if (cm->cmsg_level == SOL_SOCKET && cm->cmsg_type == SCM_RIGHTS) { fds = (int *)CMSG_DATA(cm); nfd = (cm->cmsg_len - CMSG_SPACE(0)) / sizeof(int); while (nfd-- > 0) { fd = *fds++; error = fget(td, fd, &cap_no_rights, &fp); if (error == 0) { fdclose(td, fp, fd); fdrop(fp, td); } } } clen -= datalen; cm = (struct cmsghdr *)((uint8_t *)cm + datalen); } m_chtype(m, MT_CONTROL); } } diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index 53a7c2b2915a..3071a5169b72 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -1,3561 +1,3563 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1989, 1991, 1993 * The Regents of the University of California. All Rights Reserved. * Copyright (c) 2004-2009 Robert N. M. Watson All Rights Reserved. * Copyright (c) 2018 Matthew Macy * Copyright (c) 2022 Gleb Smirnoff * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * UNIX Domain (Local) Sockets * * This is an implementation of UNIX (local) domain sockets. Each socket has * an associated struct unpcb (UNIX protocol control block). Stream sockets * may be connected to 0 or 1 other socket. Datagram sockets may be * connected to 0, 1, or many other sockets. Sockets may be created and * connected in pairs (socketpair(2)), or bound/connected to using the file * system name space. For most purposes, only the receive socket buffer is * used, as sending on one socket delivers directly to the receive socket * buffer of a second socket. * * The implementation is substantially complicated by the fact that * "ancillary data", such as file descriptors or credentials, may be passed * across UNIX domain sockets. The potential for passing UNIX domain sockets * over other UNIX domain sockets requires the implementation of a simple * garbage collector to find and tear down cycles of disconnected sockets. * * TODO: * RDM * rethink name space problems * need a proper out-of-band */ #include #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif #include #include MALLOC_DECLARE(M_FILECAPS); static struct domain localdomain; static uma_zone_t unp_zone; static unp_gen_t unp_gencnt; /* (l) */ static u_int unp_count; /* (l) Count of local sockets. */ static ino_t unp_ino; /* Prototype for fake inode numbers. */ static int unp_rights; /* (g) File descriptors in flight. */ static struct unp_head unp_shead; /* (l) List of stream sockets. */ static struct unp_head unp_dhead; /* (l) List of datagram sockets. */ static struct unp_head unp_sphead; /* (l) List of seqpacket sockets. */ struct unp_defer { SLIST_ENTRY(unp_defer) ud_link; struct file *ud_fp; }; static SLIST_HEAD(, unp_defer) unp_defers; static int unp_defers_count; -static const struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL }; +static const struct sockaddr sun_noname = { + .sa_len = sizeof(sun_noname), + .sa_family = AF_LOCAL, +}; /* * Garbage collection of cyclic file descriptor/socket references occurs * asynchronously in a taskqueue context in order to avoid recursion and * reentrance in the UNIX domain socket, file descriptor, and socket layer * code. See unp_gc() for a full description. */ static struct timeout_task unp_gc_task; /* * The close of unix domain sockets attached as SCM_RIGHTS is * postponed to the taskqueue, to avoid arbitrary recursion depth. * The attached sockets might have another sockets attached. */ static struct task unp_defer_task; /* * Both send and receive buffers are allocated PIPSIZ bytes of buffering for * stream sockets, although the total for sender and receiver is actually * only PIPSIZ. * * Datagram sockets really use the sendspace as the maximum datagram size, * and don't really want to reserve the sendspace. Their recvspace should be * large enough for at least one max-size datagram plus address. */ #ifndef PIPSIZ #define PIPSIZ 8192 #endif static u_long unpst_sendspace = PIPSIZ; static u_long unpst_recvspace = PIPSIZ; static u_long unpdg_maxdgram = 2*1024; static u_long unpdg_recvspace = 16*1024; /* support 8KB syslog msgs */ static u_long unpsp_sendspace = PIPSIZ; /* really max datagram size */ static u_long unpsp_recvspace = PIPSIZ; static SYSCTL_NODE(_net, PF_LOCAL, local, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "Local domain"); static SYSCTL_NODE(_net_local, SOCK_STREAM, stream, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "SOCK_STREAM"); static SYSCTL_NODE(_net_local, SOCK_DGRAM, dgram, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "SOCK_DGRAM"); static SYSCTL_NODE(_net_local, SOCK_SEQPACKET, seqpacket, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "SOCK_SEQPACKET"); SYSCTL_ULONG(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW, &unpst_sendspace, 0, "Default stream send space."); SYSCTL_ULONG(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW, &unpst_recvspace, 0, "Default stream receive space."); SYSCTL_ULONG(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW, &unpdg_maxdgram, 0, "Maximum datagram size."); SYSCTL_ULONG(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW, &unpdg_recvspace, 0, "Default datagram receive space."); SYSCTL_ULONG(_net_local_seqpacket, OID_AUTO, maxseqpacket, CTLFLAG_RW, &unpsp_sendspace, 0, "Default seqpacket send space."); SYSCTL_ULONG(_net_local_seqpacket, OID_AUTO, recvspace, CTLFLAG_RW, &unpsp_recvspace, 0, "Default seqpacket receive space."); SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, "File descriptors in flight."); SYSCTL_INT(_net_local, OID_AUTO, deferred, CTLFLAG_RD, &unp_defers_count, 0, "File descriptors deferred to taskqueue for close."); /* * Locking and synchronization: * * Several types of locks exist in the local domain socket implementation: * - a global linkage lock * - a global connection list lock * - the mtxpool lock * - per-unpcb mutexes * * The linkage lock protects the global socket lists, the generation number * counter and garbage collector state. * * The connection list lock protects the list of referring sockets in a datagram * socket PCB. This lock is also overloaded to protect a global list of * sockets whose buffers contain socket references in the form of SCM_RIGHTS * messages. To avoid recursion, such references are released by a dedicated * thread. * * The mtxpool lock protects the vnode from being modified while referenced. * Lock ordering rules require that it be acquired before any PCB locks. * * The unpcb lock (unp_mtx) protects the most commonly referenced fields in the * unpcb. This includes the unp_conn field, which either links two connected * PCBs together (for connected socket types) or points at the destination * socket (for connectionless socket types). The operations of creating or * destroying a connection therefore involve locking multiple PCBs. To avoid * lock order reversals, in some cases this involves dropping a PCB lock and * using a reference counter to maintain liveness. * * UNIX domain sockets each have an unpcb hung off of their so_pcb pointer, * allocated in pr_attach() and freed in pr_detach(). The validity of that * pointer is an invariant, so no lock is required to dereference the so_pcb * pointer if a valid socket reference is held by the caller. In practice, * this is always true during operations performed on a socket. Each unpcb * has a back-pointer to its socket, unp_socket, which will be stable under * the same circumstances. * * This pointer may only be safely dereferenced as long as a valid reference * to the unpcb is held. Typically, this reference will be from the socket, * or from another unpcb when the referring unpcb's lock is held (in order * that the reference not be invalidated during use). For example, to follow * unp->unp_conn->unp_socket, you need to hold a lock on unp_conn to guarantee * that detach is not run clearing unp_socket. * * Blocking with UNIX domain sockets is a tricky issue: unlike most network * protocols, bind() is a non-atomic operation, and connect() requires * potential sleeping in the protocol, due to potentially waiting on local or * distributed file systems. We try to separate "lookup" operations, which * may sleep, and the IPC operations themselves, which typically can occur * with relative atomicity as locks can be held over the entire operation. * * Another tricky issue is simultaneous multi-threaded or multi-process * access to a single UNIX domain socket. These are handled by the flags * UNP_CONNECTING and UNP_BINDING, which prevent concurrent connecting or * binding, both of which involve dropping UNIX domain socket locks in order * to perform namei() and other file system operations. */ static struct rwlock unp_link_rwlock; static struct mtx unp_defers_lock; #define UNP_LINK_LOCK_INIT() rw_init(&unp_link_rwlock, \ "unp_link_rwlock") #define UNP_LINK_LOCK_ASSERT() rw_assert(&unp_link_rwlock, \ RA_LOCKED) #define UNP_LINK_UNLOCK_ASSERT() rw_assert(&unp_link_rwlock, \ RA_UNLOCKED) #define UNP_LINK_RLOCK() rw_rlock(&unp_link_rwlock) #define UNP_LINK_RUNLOCK() rw_runlock(&unp_link_rwlock) #define UNP_LINK_WLOCK() rw_wlock(&unp_link_rwlock) #define UNP_LINK_WUNLOCK() rw_wunlock(&unp_link_rwlock) #define UNP_LINK_WLOCK_ASSERT() rw_assert(&unp_link_rwlock, \ RA_WLOCKED) #define UNP_LINK_WOWNED() rw_wowned(&unp_link_rwlock) #define UNP_DEFERRED_LOCK_INIT() mtx_init(&unp_defers_lock, \ "unp_defer", NULL, MTX_DEF) #define UNP_DEFERRED_LOCK() mtx_lock(&unp_defers_lock) #define UNP_DEFERRED_UNLOCK() mtx_unlock(&unp_defers_lock) #define UNP_REF_LIST_LOCK() UNP_DEFERRED_LOCK(); #define UNP_REF_LIST_UNLOCK() UNP_DEFERRED_UNLOCK(); #define UNP_PCB_LOCK_INIT(unp) mtx_init(&(unp)->unp_mtx, \ "unp", "unp", \ MTX_DUPOK|MTX_DEF) #define UNP_PCB_LOCK_DESTROY(unp) mtx_destroy(&(unp)->unp_mtx) #define UNP_PCB_LOCKPTR(unp) (&(unp)->unp_mtx) #define UNP_PCB_LOCK(unp) mtx_lock(&(unp)->unp_mtx) #define UNP_PCB_TRYLOCK(unp) mtx_trylock(&(unp)->unp_mtx) #define UNP_PCB_UNLOCK(unp) mtx_unlock(&(unp)->unp_mtx) #define UNP_PCB_OWNED(unp) mtx_owned(&(unp)->unp_mtx) #define UNP_PCB_LOCK_ASSERT(unp) mtx_assert(&(unp)->unp_mtx, MA_OWNED) #define UNP_PCB_UNLOCK_ASSERT(unp) mtx_assert(&(unp)->unp_mtx, MA_NOTOWNED) static int uipc_connect2(struct socket *, struct socket *); static int uipc_ctloutput(struct socket *, struct sockopt *); static int unp_connect(struct socket *, struct sockaddr *, struct thread *); static int unp_connectat(int, struct socket *, struct sockaddr *, struct thread *, bool); typedef enum { PRU_CONNECT, PRU_CONNECT2 } conn2_how; static void unp_connect2(struct socket *so, struct socket *so2, conn2_how); static void unp_disconnect(struct unpcb *unp, struct unpcb *unp2); static void unp_dispose(struct socket *so); static void unp_shutdown(struct unpcb *); static void unp_drop(struct unpcb *); static void unp_gc(__unused void *, int); static void unp_scan(struct mbuf *, void (*)(struct filedescent **, int)); static void unp_discard(struct file *); static void unp_freerights(struct filedescent **, int); static int unp_internalize(struct mbuf **, struct thread *, struct mbuf **, u_int *, u_int *); static void unp_internalize_fp(struct file *); static int unp_externalize(struct mbuf *, struct mbuf **, int); static int unp_externalize_fp(struct file *); static struct mbuf *unp_addsockcred(struct thread *, struct mbuf *, int, struct mbuf **, u_int *, u_int *); static void unp_process_defers(void * __unused, int); static void unp_pcb_hold(struct unpcb *unp) { u_int old __unused; old = refcount_acquire(&unp->unp_refcount); KASSERT(old > 0, ("%s: unpcb %p has no references", __func__, unp)); } static __result_use_check bool unp_pcb_rele(struct unpcb *unp) { bool ret; UNP_PCB_LOCK_ASSERT(unp); if ((ret = refcount_release(&unp->unp_refcount))) { UNP_PCB_UNLOCK(unp); UNP_PCB_LOCK_DESTROY(unp); uma_zfree(unp_zone, unp); } return (ret); } static void unp_pcb_rele_notlast(struct unpcb *unp) { bool ret __unused; ret = refcount_release(&unp->unp_refcount); KASSERT(!ret, ("%s: unpcb %p has no references", __func__, unp)); } static void unp_pcb_lock_pair(struct unpcb *unp, struct unpcb *unp2) { UNP_PCB_UNLOCK_ASSERT(unp); UNP_PCB_UNLOCK_ASSERT(unp2); if (unp == unp2) { UNP_PCB_LOCK(unp); } else if ((uintptr_t)unp2 > (uintptr_t)unp) { UNP_PCB_LOCK(unp); UNP_PCB_LOCK(unp2); } else { UNP_PCB_LOCK(unp2); UNP_PCB_LOCK(unp); } } static void unp_pcb_unlock_pair(struct unpcb *unp, struct unpcb *unp2) { UNP_PCB_UNLOCK(unp); if (unp != unp2) UNP_PCB_UNLOCK(unp2); } /* * Try to lock the connected peer of an already locked socket. In some cases * this requires that we unlock the current socket. The pairbusy counter is * used to block concurrent connection attempts while the lock is dropped. The * caller must be careful to revalidate PCB state. */ static struct unpcb * unp_pcb_lock_peer(struct unpcb *unp) { struct unpcb *unp2; UNP_PCB_LOCK_ASSERT(unp); unp2 = unp->unp_conn; if (unp2 == NULL) return (NULL); if (__predict_false(unp == unp2)) return (unp); UNP_PCB_UNLOCK_ASSERT(unp2); if (__predict_true(UNP_PCB_TRYLOCK(unp2))) return (unp2); if ((uintptr_t)unp2 > (uintptr_t)unp) { UNP_PCB_LOCK(unp2); return (unp2); } unp->unp_pairbusy++; unp_pcb_hold(unp2); UNP_PCB_UNLOCK(unp); UNP_PCB_LOCK(unp2); UNP_PCB_LOCK(unp); KASSERT(unp->unp_conn == unp2 || unp->unp_conn == NULL, ("%s: socket %p was reconnected", __func__, unp)); if (--unp->unp_pairbusy == 0 && (unp->unp_flags & UNP_WAITING) != 0) { unp->unp_flags &= ~UNP_WAITING; wakeup(unp); } if (unp_pcb_rele(unp2)) { /* unp2 is unlocked. */ return (NULL); } if (unp->unp_conn == NULL) { UNP_PCB_UNLOCK(unp2); return (NULL); } return (unp2); } static void uipc_abort(struct socket *so) { struct unpcb *unp, *unp2; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_abort: unp == NULL")); UNP_PCB_UNLOCK_ASSERT(unp); UNP_PCB_LOCK(unp); unp2 = unp->unp_conn; if (unp2 != NULL) { unp_pcb_hold(unp2); UNP_PCB_UNLOCK(unp); unp_drop(unp2); } else UNP_PCB_UNLOCK(unp); } static int -uipc_accept(struct socket *so, struct sockaddr **nam) +uipc_accept(struct socket *so, struct sockaddr *ret) { struct unpcb *unp, *unp2; const struct sockaddr *sa; /* * Pass back name of connected socket, if it was bound and we are * still connected (our peer may have closed already!). */ unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_accept: unp == NULL")); - *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); UNP_PCB_LOCK(unp); unp2 = unp_pcb_lock_peer(unp); if (unp2 != NULL && unp2->unp_addr != NULL) sa = (struct sockaddr *)unp2->unp_addr; else sa = &sun_noname; - bcopy(sa, *nam, sa->sa_len); + bcopy(sa, ret, sa->sa_len); if (unp2 != NULL) unp_pcb_unlock_pair(unp, unp2); else UNP_PCB_UNLOCK(unp); return (0); } static int uipc_attach(struct socket *so, int proto, struct thread *td) { u_long sendspace, recvspace; struct unpcb *unp; int error; bool locked; KASSERT(so->so_pcb == NULL, ("uipc_attach: so_pcb != NULL")); if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { switch (so->so_type) { case SOCK_STREAM: sendspace = unpst_sendspace; recvspace = unpst_recvspace; break; case SOCK_DGRAM: STAILQ_INIT(&so->so_rcv.uxdg_mb); STAILQ_INIT(&so->so_snd.uxdg_mb); TAILQ_INIT(&so->so_rcv.uxdg_conns); /* * Since send buffer is either bypassed or is a part * of one-to-many receive buffer, we assign both space * limits to unpdg_recvspace. */ sendspace = recvspace = unpdg_recvspace; break; case SOCK_SEQPACKET: sendspace = unpsp_sendspace; recvspace = unpsp_recvspace; break; default: panic("uipc_attach"); } error = soreserve(so, sendspace, recvspace); if (error) return (error); } unp = uma_zalloc(unp_zone, M_NOWAIT | M_ZERO); if (unp == NULL) return (ENOBUFS); LIST_INIT(&unp->unp_refs); UNP_PCB_LOCK_INIT(unp); unp->unp_socket = so; so->so_pcb = unp; refcount_init(&unp->unp_refcount, 1); if ((locked = UNP_LINK_WOWNED()) == false) UNP_LINK_WLOCK(); unp->unp_gencnt = ++unp_gencnt; unp->unp_ino = ++unp_ino; unp_count++; switch (so->so_type) { case SOCK_STREAM: LIST_INSERT_HEAD(&unp_shead, unp, unp_link); break; case SOCK_DGRAM: LIST_INSERT_HEAD(&unp_dhead, unp, unp_link); break; case SOCK_SEQPACKET: LIST_INSERT_HEAD(&unp_sphead, unp, unp_link); break; default: panic("uipc_attach"); } if (locked == false) UNP_LINK_WUNLOCK(); return (0); } static int uipc_bindat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td) { struct sockaddr_un *soun = (struct sockaddr_un *)nam; struct vattr vattr; int error, namelen; struct nameidata nd; struct unpcb *unp; struct vnode *vp; struct mount *mp; cap_rights_t rights; char *buf; if (nam->sa_family != AF_UNIX) return (EAFNOSUPPORT); unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_bind: unp == NULL")); if (soun->sun_len > sizeof(struct sockaddr_un)) return (EINVAL); namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path); if (namelen <= 0) return (EINVAL); /* * We don't allow simultaneous bind() calls on a single UNIX domain * socket, so flag in-progress operations, and return an error if an * operation is already in progress. * * Historically, we have not allowed a socket to be rebound, so this * also returns an error. Not allowing re-binding simplifies the * implementation and avoids a great many possible failure modes. */ UNP_PCB_LOCK(unp); if (unp->unp_vnode != NULL) { UNP_PCB_UNLOCK(unp); return (EINVAL); } if (unp->unp_flags & UNP_BINDING) { UNP_PCB_UNLOCK(unp); return (EALREADY); } unp->unp_flags |= UNP_BINDING; UNP_PCB_UNLOCK(unp); buf = malloc(namelen + 1, M_TEMP, M_WAITOK); bcopy(soun->sun_path, buf, namelen); buf[namelen] = 0; restart: NDINIT_ATRIGHTS(&nd, CREATE, NOFOLLOW | LOCKPARENT | NOCACHE, UIO_SYSSPACE, buf, fd, cap_rights_init_one(&rights, CAP_BINDAT)); /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ error = namei(&nd); if (error) goto error; vp = nd.ni_vp; if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE_PNBUF(&nd); if (nd.ni_dvp == vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); if (vp != NULL) { vrele(vp); error = EADDRINUSE; goto error; } error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH); if (error) goto error; goto restart; } VATTR_NULL(&vattr); vattr.va_type = VSOCK; vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_pd->pd_cmask); #ifdef MAC error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, &vattr); #endif if (error == 0) error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); NDFREE_PNBUF(&nd); if (error) { VOP_VPUT_PAIR(nd.ni_dvp, NULL, true); vn_finished_write(mp); if (error == ERELOOKUP) goto restart; goto error; } vp = nd.ni_vp; ASSERT_VOP_ELOCKED(vp, "uipc_bind"); soun = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK); UNP_PCB_LOCK(unp); VOP_UNP_BIND(vp, unp); unp->unp_vnode = vp; unp->unp_addr = soun; unp->unp_flags &= ~UNP_BINDING; UNP_PCB_UNLOCK(unp); vref(vp); VOP_VPUT_PAIR(nd.ni_dvp, &vp, true); vn_finished_write(mp); free(buf, M_TEMP); return (0); error: UNP_PCB_LOCK(unp); unp->unp_flags &= ~UNP_BINDING; UNP_PCB_UNLOCK(unp); free(buf, M_TEMP); return (error); } static int uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { return (uipc_bindat(AT_FDCWD, so, nam, td)); } static int uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { int error; KASSERT(td == curthread, ("uipc_connect: td != curthread")); error = unp_connect(so, nam, td); return (error); } static int uipc_connectat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td) { int error; KASSERT(td == curthread, ("uipc_connectat: td != curthread")); error = unp_connectat(fd, so, nam, td, false); return (error); } static void uipc_close(struct socket *so) { struct unpcb *unp, *unp2; struct vnode *vp = NULL; struct mtx *vplock; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_close: unp == NULL")); vplock = NULL; if ((vp = unp->unp_vnode) != NULL) { vplock = mtx_pool_find(mtxpool_sleep, vp); mtx_lock(vplock); } UNP_PCB_LOCK(unp); if (vp && unp->unp_vnode == NULL) { mtx_unlock(vplock); vp = NULL; } if (vp != NULL) { VOP_UNP_DETACH(vp); unp->unp_vnode = NULL; } if ((unp2 = unp_pcb_lock_peer(unp)) != NULL) unp_disconnect(unp, unp2); else UNP_PCB_UNLOCK(unp); if (vp) { mtx_unlock(vplock); vrele(vp); } } static int uipc_connect2(struct socket *so1, struct socket *so2) { struct unpcb *unp, *unp2; if (so1->so_type != so2->so_type) return (EPROTOTYPE); unp = so1->so_pcb; KASSERT(unp != NULL, ("uipc_connect2: unp == NULL")); unp2 = so2->so_pcb; KASSERT(unp2 != NULL, ("uipc_connect2: unp2 == NULL")); unp_pcb_lock_pair(unp, unp2); unp_connect2(so1, so2, PRU_CONNECT2); unp_pcb_unlock_pair(unp, unp2); return (0); } static void uipc_detach(struct socket *so) { struct unpcb *unp, *unp2; struct mtx *vplock; struct vnode *vp; int local_unp_rights; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_detach: unp == NULL")); vp = NULL; vplock = NULL; UNP_LINK_WLOCK(); LIST_REMOVE(unp, unp_link); if (unp->unp_gcflag & UNPGC_DEAD) LIST_REMOVE(unp, unp_dead); unp->unp_gencnt = ++unp_gencnt; --unp_count; UNP_LINK_WUNLOCK(); UNP_PCB_UNLOCK_ASSERT(unp); restart: if ((vp = unp->unp_vnode) != NULL) { vplock = mtx_pool_find(mtxpool_sleep, vp); mtx_lock(vplock); } UNP_PCB_LOCK(unp); if (unp->unp_vnode != vp && unp->unp_vnode != NULL) { if (vplock) mtx_unlock(vplock); UNP_PCB_UNLOCK(unp); goto restart; } if ((vp = unp->unp_vnode) != NULL) { VOP_UNP_DETACH(vp); unp->unp_vnode = NULL; } if ((unp2 = unp_pcb_lock_peer(unp)) != NULL) unp_disconnect(unp, unp2); else UNP_PCB_UNLOCK(unp); UNP_REF_LIST_LOCK(); while (!LIST_EMPTY(&unp->unp_refs)) { struct unpcb *ref = LIST_FIRST(&unp->unp_refs); unp_pcb_hold(ref); UNP_REF_LIST_UNLOCK(); MPASS(ref != unp); UNP_PCB_UNLOCK_ASSERT(ref); unp_drop(ref); UNP_REF_LIST_LOCK(); } UNP_REF_LIST_UNLOCK(); UNP_PCB_LOCK(unp); local_unp_rights = unp_rights; unp->unp_socket->so_pcb = NULL; unp->unp_socket = NULL; free(unp->unp_addr, M_SONAME); unp->unp_addr = NULL; if (!unp_pcb_rele(unp)) UNP_PCB_UNLOCK(unp); if (vp) { mtx_unlock(vplock); vrele(vp); } if (local_unp_rights) taskqueue_enqueue_timeout(taskqueue_thread, &unp_gc_task, -1); switch (so->so_type) { case SOCK_DGRAM: /* * Everything should have been unlinked/freed by unp_dispose() * and/or unp_disconnect(). */ MPASS(so->so_rcv.uxdg_peeked == NULL); MPASS(STAILQ_EMPTY(&so->so_rcv.uxdg_mb)); MPASS(TAILQ_EMPTY(&so->so_rcv.uxdg_conns)); MPASS(STAILQ_EMPTY(&so->so_snd.uxdg_mb)); } } static int uipc_disconnect(struct socket *so) { struct unpcb *unp, *unp2; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_disconnect: unp == NULL")); UNP_PCB_LOCK(unp); if ((unp2 = unp_pcb_lock_peer(unp)) != NULL) unp_disconnect(unp, unp2); else UNP_PCB_UNLOCK(unp); return (0); } static int uipc_listen(struct socket *so, int backlog, struct thread *td) { struct unpcb *unp; int error; MPASS(so->so_type != SOCK_DGRAM); /* * Synchronize with concurrent connection attempts. */ error = 0; unp = sotounpcb(so); UNP_PCB_LOCK(unp); if (unp->unp_conn != NULL || (unp->unp_flags & UNP_CONNECTING) != 0) error = EINVAL; else if (unp->unp_vnode == NULL) error = EDESTADDRREQ; if (error != 0) { UNP_PCB_UNLOCK(unp); return (error); } SOCK_LOCK(so); error = solisten_proto_check(so); if (error == 0) { cru2xt(td, &unp->unp_peercred); solisten_proto(so, backlog); } SOCK_UNLOCK(so); UNP_PCB_UNLOCK(unp); return (error); } static int uipc_peeraddr(struct socket *so, struct sockaddr **nam) { struct unpcb *unp, *unp2; const struct sockaddr *sa; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_peeraddr: unp == NULL")); *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); UNP_PCB_LOCK(unp); unp2 = unp_pcb_lock_peer(unp); if (unp2 != NULL) { if (unp2->unp_addr != NULL) sa = (struct sockaddr *)unp2->unp_addr; else sa = &sun_noname; bcopy(sa, *nam, sa->sa_len); unp_pcb_unlock_pair(unp, unp2); } else { sa = &sun_noname; bcopy(sa, *nam, sa->sa_len); UNP_PCB_UNLOCK(unp); } return (0); } static int uipc_rcvd(struct socket *so, int flags) { struct unpcb *unp, *unp2; struct socket *so2; u_int mbcnt, sbcc; unp = sotounpcb(so); KASSERT(unp != NULL, ("%s: unp == NULL", __func__)); KASSERT(so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET, ("%s: socktype %d", __func__, so->so_type)); /* * Adjust backpressure on sender and wakeup any waiting to write. * * The unp lock is acquired to maintain the validity of the unp_conn * pointer; no lock on unp2 is required as unp2->unp_socket will be * static as long as we don't permit unp2 to disconnect from unp, * which is prevented by the lock on unp. We cache values from * so_rcv to avoid holding the so_rcv lock over the entire * transaction on the remote so_snd. */ SOCKBUF_LOCK(&so->so_rcv); mbcnt = so->so_rcv.sb_mbcnt; sbcc = sbavail(&so->so_rcv); SOCKBUF_UNLOCK(&so->so_rcv); /* * There is a benign race condition at this point. If we're planning to * clear SB_STOP, but uipc_send is called on the connected socket at * this instant, it might add data to the sockbuf and set SB_STOP. Then * we would erroneously clear SB_STOP below, even though the sockbuf is * full. The race is benign because the only ill effect is to allow the * sockbuf to exceed its size limit, and the size limits are not * strictly guaranteed anyway. */ UNP_PCB_LOCK(unp); unp2 = unp->unp_conn; if (unp2 == NULL) { UNP_PCB_UNLOCK(unp); return (0); } so2 = unp2->unp_socket; SOCKBUF_LOCK(&so2->so_snd); if (sbcc < so2->so_snd.sb_hiwat && mbcnt < so2->so_snd.sb_mbmax) so2->so_snd.sb_flags &= ~SB_STOP; sowwakeup_locked(so2); UNP_PCB_UNLOCK(unp); return (0); } static int uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct thread *td) { struct unpcb *unp, *unp2; struct socket *so2; u_int mbcnt, sbcc; int error; unp = sotounpcb(so); KASSERT(unp != NULL, ("%s: unp == NULL", __func__)); KASSERT(so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET, ("%s: socktype %d", __func__, so->so_type)); error = 0; if (flags & PRUS_OOB) { error = EOPNOTSUPP; goto release; } if (control != NULL && (error = unp_internalize(&control, td, NULL, NULL, NULL))) goto release; unp2 = NULL; if ((so->so_state & SS_ISCONNECTED) == 0) { if (nam != NULL) { if ((error = unp_connect(so, nam, td)) != 0) goto out; } else { error = ENOTCONN; goto out; } } UNP_PCB_LOCK(unp); if ((unp2 = unp_pcb_lock_peer(unp)) == NULL) { UNP_PCB_UNLOCK(unp); error = ENOTCONN; goto out; } else if (so->so_snd.sb_state & SBS_CANTSENDMORE) { unp_pcb_unlock_pair(unp, unp2); error = EPIPE; goto out; } UNP_PCB_UNLOCK(unp); if ((so2 = unp2->unp_socket) == NULL) { UNP_PCB_UNLOCK(unp2); error = ENOTCONN; goto out; } SOCKBUF_LOCK(&so2->so_rcv); if (unp2->unp_flags & UNP_WANTCRED_MASK) { /* * Credentials are passed only once on SOCK_STREAM and * SOCK_SEQPACKET (LOCAL_CREDS => WANTCRED_ONESHOT), or * forever (LOCAL_CREDS_PERSISTENT => WANTCRED_ALWAYS). */ control = unp_addsockcred(td, control, unp2->unp_flags, NULL, NULL, NULL); unp2->unp_flags &= ~UNP_WANTCRED_ONESHOT; } /* * Send to paired receive port and wake up readers. Don't * check for space available in the receive buffer if we're * attaching ancillary data; Unix domain sockets only check * for space in the sending sockbuf, and that check is * performed one level up the stack. At that level we cannot * precisely account for the amount of buffer space used * (e.g., because control messages are not yet internalized). */ switch (so->so_type) { case SOCK_STREAM: if (control != NULL) { sbappendcontrol_locked(&so2->so_rcv, m, control, flags); control = NULL; } else sbappend_locked(&so2->so_rcv, m, flags); break; case SOCK_SEQPACKET: if (sbappendaddr_nospacecheck_locked(&so2->so_rcv, &sun_noname, m, control)) control = NULL; break; } mbcnt = so2->so_rcv.sb_mbcnt; sbcc = sbavail(&so2->so_rcv); if (sbcc) sorwakeup_locked(so2); else SOCKBUF_UNLOCK(&so2->so_rcv); /* * The PCB lock on unp2 protects the SB_STOP flag. Without it, * it would be possible for uipc_rcvd to be called at this * point, drain the receiving sockbuf, clear SB_STOP, and then * we would set SB_STOP below. That could lead to an empty * sockbuf having SB_STOP set */ SOCKBUF_LOCK(&so->so_snd); if (sbcc >= so->so_snd.sb_hiwat || mbcnt >= so->so_snd.sb_mbmax) so->so_snd.sb_flags |= SB_STOP; SOCKBUF_UNLOCK(&so->so_snd); UNP_PCB_UNLOCK(unp2); m = NULL; out: /* * PRUS_EOF is equivalent to pr_send followed by pr_shutdown. */ if (flags & PRUS_EOF) { UNP_PCB_LOCK(unp); socantsendmore(so); unp_shutdown(unp); UNP_PCB_UNLOCK(unp); } if (control != NULL && error != 0) unp_scan(control, unp_freerights); release: if (control != NULL) m_freem(control); /* * In case of PRUS_NOTREADY, uipc_ready() is responsible * for freeing memory. */ if (m != NULL && (flags & PRUS_NOTREADY) == 0) m_freem(m); return (error); } /* PF_UNIX/SOCK_DGRAM version of sbspace() */ static inline bool uipc_dgram_sbspace(struct sockbuf *sb, u_int cc, u_int mbcnt) { u_int bleft, mleft; /* * Negative space may happen if send(2) is followed by * setsockopt(SO_SNDBUF/SO_RCVBUF) that shrinks maximum. */ if (__predict_false(sb->sb_hiwat < sb->uxdg_cc || sb->sb_mbmax < sb->uxdg_mbcnt)) return (false); if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) return (false); bleft = sb->sb_hiwat - sb->uxdg_cc; mleft = sb->sb_mbmax - sb->uxdg_mbcnt; return (bleft >= cc && mleft >= mbcnt); } /* * PF_UNIX/SOCK_DGRAM send * * Allocate a record consisting of 3 mbufs in the sequence of * from -> control -> data and append it to the socket buffer. * * The first mbuf carries sender's name and is a pkthdr that stores * overall length of datagram, its memory consumption and control length. */ #define ctllen PH_loc.thirtytwo[1] _Static_assert(offsetof(struct pkthdr, memlen) + sizeof(u_int) <= offsetof(struct pkthdr, ctllen), "unix/dgram can not store ctllen"); static int uipc_sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *m, struct mbuf *c, int flags, struct thread *td) { struct unpcb *unp, *unp2; const struct sockaddr *from; struct socket *so2; struct sockbuf *sb; struct mbuf *f, *clast; u_int cc, ctl, mbcnt; u_int dcc __diagused, dctl __diagused, dmbcnt __diagused; int error; MPASS((uio != NULL && m == NULL) || (m != NULL && uio == NULL)); error = 0; f = NULL; ctl = 0; if (__predict_false(flags & MSG_OOB)) { error = EOPNOTSUPP; goto out; } if (m == NULL) { if (__predict_false(uio->uio_resid > unpdg_maxdgram)) { error = EMSGSIZE; goto out; } m = m_uiotombuf(uio, M_WAITOK, 0, max_hdr, M_PKTHDR); if (__predict_false(m == NULL)) { error = EFAULT; goto out; } f = m_gethdr(M_WAITOK, MT_SONAME); cc = m->m_pkthdr.len; mbcnt = MSIZE + m->m_pkthdr.memlen; if (c != NULL && (error = unp_internalize(&c, td, &clast, &ctl, &mbcnt))) goto out; } else { /* pr_sosend() with mbuf usually is a kernel thread. */ M_ASSERTPKTHDR(m); if (__predict_false(c != NULL)) panic("%s: control from a kernel thread", __func__); if (__predict_false(m->m_pkthdr.len > unpdg_maxdgram)) { error = EMSGSIZE; goto out; } if ((f = m_gethdr(M_NOWAIT, MT_SONAME)) == NULL) { error = ENOBUFS; goto out; } /* Condition the foreign mbuf to our standards. */ m_clrprotoflags(m); m_tag_delete_chain(m, NULL); m->m_pkthdr.rcvif = NULL; m->m_pkthdr.flowid = 0; m->m_pkthdr.csum_flags = 0; m->m_pkthdr.fibnum = 0; m->m_pkthdr.rsstype = 0; cc = m->m_pkthdr.len; mbcnt = MSIZE; for (struct mbuf *mb = m; mb != NULL; mb = mb->m_next) { mbcnt += MSIZE; if (mb->m_flags & M_EXT) mbcnt += mb->m_ext.ext_size; } } unp = sotounpcb(so); MPASS(unp); /* * XXXGL: would be cool to fully remove so_snd out of the equation * and avoid this lock, which is not only extraneous, but also being * released, thus still leaving possibility for a race. We can easily * handle SBS_CANTSENDMORE/SS_ISCONNECTED complement in unpcb, but it * is more difficult to invent something to handle so_error. */ error = SOCK_IO_SEND_LOCK(so, SBLOCKWAIT(flags)); if (error) goto out2; SOCK_SENDBUF_LOCK(so); if (so->so_snd.sb_state & SBS_CANTSENDMORE) { SOCK_SENDBUF_UNLOCK(so); error = EPIPE; goto out3; } if (so->so_error != 0) { error = so->so_error; so->so_error = 0; SOCK_SENDBUF_UNLOCK(so); goto out3; } if (((so->so_state & SS_ISCONNECTED) == 0) && addr == NULL) { SOCK_SENDBUF_UNLOCK(so); error = EDESTADDRREQ; goto out3; } SOCK_SENDBUF_UNLOCK(so); if (addr != NULL) { if ((error = unp_connectat(AT_FDCWD, so, addr, td, true))) goto out3; UNP_PCB_LOCK_ASSERT(unp); unp2 = unp->unp_conn; UNP_PCB_LOCK_ASSERT(unp2); } else { UNP_PCB_LOCK(unp); unp2 = unp_pcb_lock_peer(unp); if (unp2 == NULL) { UNP_PCB_UNLOCK(unp); error = ENOTCONN; goto out3; } } if (unp2->unp_flags & UNP_WANTCRED_MASK) c = unp_addsockcred(td, c, unp2->unp_flags, &clast, &ctl, &mbcnt); if (unp->unp_addr != NULL) from = (struct sockaddr *)unp->unp_addr; else from = &sun_noname; f->m_len = from->sa_len; MPASS(from->sa_len <= MLEN); bcopy(from, mtod(f, void *), from->sa_len); ctl += f->m_len; /* * Concatenate mbufs: from -> control -> data. * Save overall cc and mbcnt in "from" mbuf. */ if (c != NULL) { #ifdef INVARIANTS struct mbuf *mc; for (mc = c; mc->m_next != NULL; mc = mc->m_next); MPASS(mc == clast); #endif f->m_next = c; clast->m_next = m; c = NULL; } else f->m_next = m; m = NULL; #ifdef INVARIANTS dcc = dctl = dmbcnt = 0; for (struct mbuf *mb = f; mb != NULL; mb = mb->m_next) { if (mb->m_type == MT_DATA) dcc += mb->m_len; else dctl += mb->m_len; dmbcnt += MSIZE; if (mb->m_flags & M_EXT) dmbcnt += mb->m_ext.ext_size; } MPASS(dcc == cc); MPASS(dctl == ctl); MPASS(dmbcnt == mbcnt); #endif f->m_pkthdr.len = cc + ctl; f->m_pkthdr.memlen = mbcnt; f->m_pkthdr.ctllen = ctl; /* * Destination socket buffer selection. * * Unconnected sends, when !(so->so_state & SS_ISCONNECTED) and the * destination address is supplied, create a temporary connection for * the run time of the function (see call to unp_connectat() above and * to unp_disconnect() below). We distinguish them by condition of * (addr != NULL). We intentionally avoid adding 'bool connected' for * that condition, since, again, through the run time of this code we * are always connected. For such "unconnected" sends, the destination * buffer would be the receive buffer of destination socket so2. * * For connected sends, data lands on the send buffer of the sender's * socket "so". Then, if we just added the very first datagram * on this send buffer, we need to add the send buffer on to the * receiving socket's buffer list. We put ourselves on top of the * list. Such logic gives infrequent senders priority over frequent * senders. * * Note on byte count management. As long as event methods kevent(2), * select(2) are not protocol specific (yet), we need to maintain * meaningful values on the receive buffer. So, the receive buffer * would accumulate counters from all connected buffers potentially * having sb_ccc > sb_hiwat or sb_mbcnt > sb_mbmax. */ so2 = unp2->unp_socket; sb = (addr == NULL) ? &so->so_snd : &so2->so_rcv; SOCK_RECVBUF_LOCK(so2); if (uipc_dgram_sbspace(sb, cc + ctl, mbcnt)) { if (addr == NULL && STAILQ_EMPTY(&sb->uxdg_mb)) TAILQ_INSERT_HEAD(&so2->so_rcv.uxdg_conns, &so->so_snd, uxdg_clist); STAILQ_INSERT_TAIL(&sb->uxdg_mb, f, m_stailqpkt); sb->uxdg_cc += cc + ctl; sb->uxdg_ctl += ctl; sb->uxdg_mbcnt += mbcnt; so2->so_rcv.sb_acc += cc + ctl; so2->so_rcv.sb_ccc += cc + ctl; so2->so_rcv.sb_ctl += ctl; so2->so_rcv.sb_mbcnt += mbcnt; sorwakeup_locked(so2); f = NULL; } else { soroverflow_locked(so2); error = ENOBUFS; if (f->m_next->m_type == MT_CONTROL) { c = f->m_next; f->m_next = NULL; } } if (addr != NULL) unp_disconnect(unp, unp2); else unp_pcb_unlock_pair(unp, unp2); td->td_ru.ru_msgsnd++; out3: SOCK_IO_SEND_UNLOCK(so); out2: if (c) unp_scan(c, unp_freerights); out: if (f) m_freem(f); if (c) m_freem(c); if (m) m_freem(m); return (error); } /* * PF_UNIX/SOCK_DGRAM receive with MSG_PEEK. * The mbuf has already been unlinked from the uxdg_mb of socket buffer * and needs to be linked onto uxdg_peeked of receive socket buffer. */ static int uipc_peek_dgram(struct socket *so, struct mbuf *m, struct sockaddr **psa, struct uio *uio, struct mbuf **controlp, int *flagsp) { ssize_t len = 0; int error; so->so_rcv.uxdg_peeked = m; so->so_rcv.uxdg_cc += m->m_pkthdr.len; so->so_rcv.uxdg_ctl += m->m_pkthdr.ctllen; so->so_rcv.uxdg_mbcnt += m->m_pkthdr.memlen; SOCK_RECVBUF_UNLOCK(so); KASSERT(m->m_type == MT_SONAME, ("m->m_type == %d", m->m_type)); if (psa != NULL) *psa = sodupsockaddr(mtod(m, struct sockaddr *), M_WAITOK); m = m->m_next; KASSERT(m, ("%s: no data or control after soname", __func__)); /* * With MSG_PEEK the control isn't executed, just copied. */ while (m != NULL && m->m_type == MT_CONTROL) { if (controlp != NULL) { *controlp = m_copym(m, 0, m->m_len, M_WAITOK); controlp = &(*controlp)->m_next; } m = m->m_next; } KASSERT(m == NULL || m->m_type == MT_DATA, ("%s: not MT_DATA mbuf %p", __func__, m)); while (m != NULL && uio->uio_resid > 0) { len = uio->uio_resid; if (len > m->m_len) len = m->m_len; error = uiomove(mtod(m, char *), (int)len, uio); if (error) { SOCK_IO_RECV_UNLOCK(so); return (error); } if (len == m->m_len) m = m->m_next; } SOCK_IO_RECV_UNLOCK(so); if (flagsp != NULL) { if (m != NULL) { if (*flagsp & MSG_TRUNC) { /* Report real length of the packet */ uio->uio_resid -= m_length(m, NULL) - len; } *flagsp |= MSG_TRUNC; } else *flagsp &= ~MSG_TRUNC; } return (0); } /* * PF_UNIX/SOCK_DGRAM receive */ static int uipc_soreceive_dgram(struct socket *so, struct sockaddr **psa, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) { struct sockbuf *sb = NULL; struct mbuf *m; int flags, error; ssize_t len = 0; bool nonblock; MPASS(mp0 == NULL); if (psa != NULL) *psa = NULL; if (controlp != NULL) *controlp = NULL; flags = flagsp != NULL ? *flagsp : 0; nonblock = (so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT | MSG_NBIO)); error = SOCK_IO_RECV_LOCK(so, SBLOCKWAIT(flags)); if (__predict_false(error)) return (error); /* * Loop blocking while waiting for a datagram. Prioritize connected * peers over unconnected sends. Set sb to selected socket buffer * containing an mbuf on exit from the wait loop. A datagram that * had already been peeked at has top priority. */ SOCK_RECVBUF_LOCK(so); while ((m = so->so_rcv.uxdg_peeked) == NULL && (sb = TAILQ_FIRST(&so->so_rcv.uxdg_conns)) == NULL && (m = STAILQ_FIRST(&so->so_rcv.uxdg_mb)) == NULL) { if (so->so_error) { error = so->so_error; so->so_error = 0; SOCK_RECVBUF_UNLOCK(so); SOCK_IO_RECV_UNLOCK(so); return (error); } if (so->so_rcv.sb_state & SBS_CANTRCVMORE || uio->uio_resid == 0) { SOCK_RECVBUF_UNLOCK(so); SOCK_IO_RECV_UNLOCK(so); return (0); } if (nonblock) { SOCK_RECVBUF_UNLOCK(so); SOCK_IO_RECV_UNLOCK(so); return (EWOULDBLOCK); } error = sbwait(so, SO_RCV); if (error) { SOCK_RECVBUF_UNLOCK(so); SOCK_IO_RECV_UNLOCK(so); return (error); } } if (sb == NULL) sb = &so->so_rcv; else if (m == NULL) m = STAILQ_FIRST(&sb->uxdg_mb); else MPASS(m == so->so_rcv.uxdg_peeked); MPASS(sb->uxdg_cc > 0); M_ASSERTPKTHDR(m); KASSERT(m->m_type == MT_SONAME, ("m->m_type == %d", m->m_type)); if (uio->uio_td) uio->uio_td->td_ru.ru_msgrcv++; if (__predict_true(m != so->so_rcv.uxdg_peeked)) { STAILQ_REMOVE_HEAD(&sb->uxdg_mb, m_stailqpkt); if (STAILQ_EMPTY(&sb->uxdg_mb) && sb != &so->so_rcv) TAILQ_REMOVE(&so->so_rcv.uxdg_conns, sb, uxdg_clist); } else so->so_rcv.uxdg_peeked = NULL; sb->uxdg_cc -= m->m_pkthdr.len; sb->uxdg_ctl -= m->m_pkthdr.ctllen; sb->uxdg_mbcnt -= m->m_pkthdr.memlen; if (__predict_false(flags & MSG_PEEK)) return (uipc_peek_dgram(so, m, psa, uio, controlp, flagsp)); so->so_rcv.sb_acc -= m->m_pkthdr.len; so->so_rcv.sb_ccc -= m->m_pkthdr.len; so->so_rcv.sb_ctl -= m->m_pkthdr.ctllen; so->so_rcv.sb_mbcnt -= m->m_pkthdr.memlen; SOCK_RECVBUF_UNLOCK(so); if (psa != NULL) *psa = sodupsockaddr(mtod(m, struct sockaddr *), M_WAITOK); m = m_free(m); KASSERT(m, ("%s: no data or control after soname", __func__)); /* * Packet to copyout() is now in 'm' and it is disconnected from the * queue. * * Process one or more MT_CONTROL mbufs present before any data mbufs * in the first mbuf chain on the socket buffer. We call into the * unp_externalize() to perform externalization (or freeing if * controlp == NULL). In some cases there can be only MT_CONTROL mbufs * without MT_DATA mbufs. */ while (m != NULL && m->m_type == MT_CONTROL) { struct mbuf *cm; /* XXXGL: unp_externalize() is also dom_externalize() KBI and * it frees whole chain, so we must disconnect the mbuf. */ cm = m; m = m->m_next; cm->m_next = NULL; error = unp_externalize(cm, controlp, flags); if (error != 0) { SOCK_IO_RECV_UNLOCK(so); unp_scan(m, unp_freerights); m_freem(m); return (error); } if (controlp != NULL) { while (*controlp != NULL) controlp = &(*controlp)->m_next; } } KASSERT(m == NULL || m->m_type == MT_DATA, ("%s: not MT_DATA mbuf %p", __func__, m)); while (m != NULL && uio->uio_resid > 0) { len = uio->uio_resid; if (len > m->m_len) len = m->m_len; error = uiomove(mtod(m, char *), (int)len, uio); if (error) { SOCK_IO_RECV_UNLOCK(so); m_freem(m); return (error); } if (len == m->m_len) m = m_free(m); else { m->m_data += len; m->m_len -= len; } } SOCK_IO_RECV_UNLOCK(so); if (m != NULL) { if (flagsp != NULL) { if (flags & MSG_TRUNC) { /* Report real length of the packet */ uio->uio_resid -= m_length(m, NULL); } *flagsp |= MSG_TRUNC; } m_freem(m); } else if (flagsp != NULL) *flagsp &= ~MSG_TRUNC; return (0); } static bool uipc_ready_scan(struct socket *so, struct mbuf *m, int count, int *errorp) { struct mbuf *mb, *n; struct sockbuf *sb; SOCK_LOCK(so); if (SOLISTENING(so)) { SOCK_UNLOCK(so); return (false); } mb = NULL; sb = &so->so_rcv; SOCKBUF_LOCK(sb); if (sb->sb_fnrdy != NULL) { for (mb = sb->sb_mb, n = mb->m_nextpkt; mb != NULL;) { if (mb == m) { *errorp = sbready(sb, m, count); break; } mb = mb->m_next; if (mb == NULL) { mb = n; if (mb != NULL) n = mb->m_nextpkt; } } } SOCKBUF_UNLOCK(sb); SOCK_UNLOCK(so); return (mb != NULL); } static int uipc_ready(struct socket *so, struct mbuf *m, int count) { struct unpcb *unp, *unp2; struct socket *so2; int error, i; unp = sotounpcb(so); KASSERT(so->so_type == SOCK_STREAM, ("%s: unexpected socket type for %p", __func__, so)); UNP_PCB_LOCK(unp); if ((unp2 = unp_pcb_lock_peer(unp)) != NULL) { UNP_PCB_UNLOCK(unp); so2 = unp2->unp_socket; SOCKBUF_LOCK(&so2->so_rcv); if ((error = sbready(&so2->so_rcv, m, count)) == 0) sorwakeup_locked(so2); else SOCKBUF_UNLOCK(&so2->so_rcv); UNP_PCB_UNLOCK(unp2); return (error); } UNP_PCB_UNLOCK(unp); /* * The receiving socket has been disconnected, but may still be valid. * In this case, the now-ready mbufs are still present in its socket * buffer, so perform an exhaustive search before giving up and freeing * the mbufs. */ UNP_LINK_RLOCK(); LIST_FOREACH(unp, &unp_shead, unp_link) { if (uipc_ready_scan(unp->unp_socket, m, count, &error)) break; } UNP_LINK_RUNLOCK(); if (unp == NULL) { for (i = 0; i < count; i++) m = m_free(m); error = ECONNRESET; } return (error); } static int uipc_sense(struct socket *so, struct stat *sb) { struct unpcb *unp; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_sense: unp == NULL")); sb->st_blksize = so->so_snd.sb_hiwat; sb->st_dev = NODEV; sb->st_ino = unp->unp_ino; return (0); } static int uipc_shutdown(struct socket *so) { struct unpcb *unp; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_shutdown: unp == NULL")); UNP_PCB_LOCK(unp); socantsendmore(so); unp_shutdown(unp); UNP_PCB_UNLOCK(unp); return (0); } static int uipc_sockaddr(struct socket *so, struct sockaddr **nam) { struct unpcb *unp; const struct sockaddr *sa; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_sockaddr: unp == NULL")); *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); UNP_PCB_LOCK(unp); if (unp->unp_addr != NULL) sa = (struct sockaddr *) unp->unp_addr; else sa = &sun_noname; bcopy(sa, *nam, sa->sa_len); UNP_PCB_UNLOCK(unp); return (0); } static int uipc_ctloutput(struct socket *so, struct sockopt *sopt) { struct unpcb *unp; struct xucred xu; int error, optval; if (sopt->sopt_level != SOL_LOCAL) return (EINVAL); unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_ctloutput: unp == NULL")); error = 0; switch (sopt->sopt_dir) { case SOPT_GET: switch (sopt->sopt_name) { case LOCAL_PEERCRED: UNP_PCB_LOCK(unp); if (unp->unp_flags & UNP_HAVEPC) xu = unp->unp_peercred; else { if (so->so_type == SOCK_STREAM) error = ENOTCONN; else error = EINVAL; } UNP_PCB_UNLOCK(unp); if (error == 0) error = sooptcopyout(sopt, &xu, sizeof(xu)); break; case LOCAL_CREDS: /* Unlocked read. */ optval = unp->unp_flags & UNP_WANTCRED_ONESHOT ? 1 : 0; error = sooptcopyout(sopt, &optval, sizeof(optval)); break; case LOCAL_CREDS_PERSISTENT: /* Unlocked read. */ optval = unp->unp_flags & UNP_WANTCRED_ALWAYS ? 1 : 0; error = sooptcopyout(sopt, &optval, sizeof(optval)); break; case LOCAL_CONNWAIT: /* Unlocked read. */ optval = unp->unp_flags & UNP_CONNWAIT ? 1 : 0; error = sooptcopyout(sopt, &optval, sizeof(optval)); break; default: error = EOPNOTSUPP; break; } break; case SOPT_SET: switch (sopt->sopt_name) { case LOCAL_CREDS: case LOCAL_CREDS_PERSISTENT: case LOCAL_CONNWAIT: error = sooptcopyin(sopt, &optval, sizeof(optval), sizeof(optval)); if (error) break; #define OPTSET(bit, exclusive) do { \ UNP_PCB_LOCK(unp); \ if (optval) { \ if ((unp->unp_flags & (exclusive)) != 0) { \ UNP_PCB_UNLOCK(unp); \ error = EINVAL; \ break; \ } \ unp->unp_flags |= (bit); \ } else \ unp->unp_flags &= ~(bit); \ UNP_PCB_UNLOCK(unp); \ } while (0) switch (sopt->sopt_name) { case LOCAL_CREDS: OPTSET(UNP_WANTCRED_ONESHOT, UNP_WANTCRED_ALWAYS); break; case LOCAL_CREDS_PERSISTENT: OPTSET(UNP_WANTCRED_ALWAYS, UNP_WANTCRED_ONESHOT); break; case LOCAL_CONNWAIT: OPTSET(UNP_CONNWAIT, 0); break; default: break; } break; #undef OPTSET default: error = ENOPROTOOPT; break; } break; default: error = EOPNOTSUPP; break; } return (error); } static int unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { return (unp_connectat(AT_FDCWD, so, nam, td, false)); } static int unp_connectat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td, bool return_locked) { struct mtx *vplock; struct sockaddr_un *soun; struct vnode *vp; struct socket *so2; struct unpcb *unp, *unp2, *unp3; struct nameidata nd; char buf[SOCK_MAXADDRLEN]; struct sockaddr *sa; cap_rights_t rights; int error, len; bool connreq; if (nam->sa_family != AF_UNIX) return (EAFNOSUPPORT); if (nam->sa_len > sizeof(struct sockaddr_un)) return (EINVAL); len = nam->sa_len - offsetof(struct sockaddr_un, sun_path); if (len <= 0) return (EINVAL); soun = (struct sockaddr_un *)nam; bcopy(soun->sun_path, buf, len); buf[len] = 0; error = 0; unp = sotounpcb(so); UNP_PCB_LOCK(unp); for (;;) { /* * Wait for connection state to stabilize. If a connection * already exists, give up. For datagram sockets, which permit * multiple consecutive connect(2) calls, upper layers are * responsible for disconnecting in advance of a subsequent * connect(2), but this is not synchronized with PCB connection * state. * * Also make sure that no threads are currently attempting to * lock the peer socket, to ensure that unp_conn cannot * transition between two valid sockets while locks are dropped. */ if (SOLISTENING(so)) error = EOPNOTSUPP; else if (unp->unp_conn != NULL) error = EISCONN; else if ((unp->unp_flags & UNP_CONNECTING) != 0) { error = EALREADY; } if (error != 0) { UNP_PCB_UNLOCK(unp); return (error); } if (unp->unp_pairbusy > 0) { unp->unp_flags |= UNP_WAITING; mtx_sleep(unp, UNP_PCB_LOCKPTR(unp), 0, "unpeer", 0); continue; } break; } unp->unp_flags |= UNP_CONNECTING; UNP_PCB_UNLOCK(unp); connreq = (so->so_proto->pr_flags & PR_CONNREQUIRED) != 0; if (connreq) sa = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); else sa = NULL; NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE, buf, fd, cap_rights_init_one(&rights, CAP_CONNECTAT)); error = namei(&nd); if (error) vp = NULL; else vp = nd.ni_vp; ASSERT_VOP_LOCKED(vp, "unp_connect"); if (error) goto bad; NDFREE_PNBUF(&nd); if (vp->v_type != VSOCK) { error = ENOTSOCK; goto bad; } #ifdef MAC error = mac_vnode_check_open(td->td_ucred, vp, VWRITE | VREAD); if (error) goto bad; #endif error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td); if (error) goto bad; unp = sotounpcb(so); KASSERT(unp != NULL, ("unp_connect: unp == NULL")); vplock = mtx_pool_find(mtxpool_sleep, vp); mtx_lock(vplock); VOP_UNP_CONNECT(vp, &unp2); if (unp2 == NULL) { error = ECONNREFUSED; goto bad2; } so2 = unp2->unp_socket; if (so->so_type != so2->so_type) { error = EPROTOTYPE; goto bad2; } if (connreq) { if (SOLISTENING(so2)) { CURVNET_SET(so2->so_vnet); so2 = sonewconn(so2, 0); CURVNET_RESTORE(); } else so2 = NULL; if (so2 == NULL) { error = ECONNREFUSED; goto bad2; } unp3 = sotounpcb(so2); unp_pcb_lock_pair(unp2, unp3); if (unp2->unp_addr != NULL) { bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len); unp3->unp_addr = (struct sockaddr_un *) sa; sa = NULL; } unp_copy_peercred(td, unp3, unp, unp2); UNP_PCB_UNLOCK(unp2); unp2 = unp3; /* * It is safe to block on the PCB lock here since unp2 is * nascent and cannot be connected to any other sockets. */ UNP_PCB_LOCK(unp); #ifdef MAC mac_socketpeer_set_from_socket(so, so2); mac_socketpeer_set_from_socket(so2, so); #endif } else { unp_pcb_lock_pair(unp, unp2); } KASSERT(unp2 != NULL && so2 != NULL && unp2->unp_socket == so2 && sotounpcb(so2) == unp2, ("%s: unp2 %p so2 %p", __func__, unp2, so2)); unp_connect2(so, so2, PRU_CONNECT); KASSERT((unp->unp_flags & UNP_CONNECTING) != 0, ("%s: unp %p has UNP_CONNECTING clear", __func__, unp)); unp->unp_flags &= ~UNP_CONNECTING; if (!return_locked) unp_pcb_unlock_pair(unp, unp2); bad2: mtx_unlock(vplock); bad: if (vp != NULL) { /* * If we are returning locked (called via uipc_sosend_dgram()), * we need to be sure that vput() won't sleep. This is * guaranteed by VOP_UNP_CONNECT() call above and unp2 lock. * SOCK_STREAM/SEQPACKET can't request return_locked (yet). */ MPASS(!(return_locked && connreq)); vput(vp); } free(sa, M_SONAME); if (__predict_false(error)) { UNP_PCB_LOCK(unp); KASSERT((unp->unp_flags & UNP_CONNECTING) != 0, ("%s: unp %p has UNP_CONNECTING clear", __func__, unp)); unp->unp_flags &= ~UNP_CONNECTING; UNP_PCB_UNLOCK(unp); } return (error); } /* * Set socket peer credentials at connection time. * * The client's PCB credentials are copied from its process structure. The * server's PCB credentials are copied from the socket on which it called * listen(2). uipc_listen cached that process's credentials at the time. */ void unp_copy_peercred(struct thread *td, struct unpcb *client_unp, struct unpcb *server_unp, struct unpcb *listen_unp) { cru2xt(td, &client_unp->unp_peercred); client_unp->unp_flags |= UNP_HAVEPC; memcpy(&server_unp->unp_peercred, &listen_unp->unp_peercred, sizeof(server_unp->unp_peercred)); server_unp->unp_flags |= UNP_HAVEPC; client_unp->unp_flags |= (listen_unp->unp_flags & UNP_WANTCRED_MASK); } static void unp_connect2(struct socket *so, struct socket *so2, conn2_how req) { struct unpcb *unp; struct unpcb *unp2; MPASS(so2->so_type == so->so_type); unp = sotounpcb(so); KASSERT(unp != NULL, ("unp_connect2: unp == NULL")); unp2 = sotounpcb(so2); KASSERT(unp2 != NULL, ("unp_connect2: unp2 == NULL")); UNP_PCB_LOCK_ASSERT(unp); UNP_PCB_LOCK_ASSERT(unp2); KASSERT(unp->unp_conn == NULL, ("%s: socket %p is already connected", __func__, unp)); unp->unp_conn = unp2; unp_pcb_hold(unp2); unp_pcb_hold(unp); switch (so->so_type) { case SOCK_DGRAM: UNP_REF_LIST_LOCK(); LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink); UNP_REF_LIST_UNLOCK(); soisconnected(so); break; case SOCK_STREAM: case SOCK_SEQPACKET: KASSERT(unp2->unp_conn == NULL, ("%s: socket %p is already connected", __func__, unp2)); unp2->unp_conn = unp; if (req == PRU_CONNECT && ((unp->unp_flags | unp2->unp_flags) & UNP_CONNWAIT)) soisconnecting(so); else soisconnected(so); soisconnected(so2); break; default: panic("unp_connect2"); } } static void unp_disconnect(struct unpcb *unp, struct unpcb *unp2) { struct socket *so, *so2; struct mbuf *m = NULL; #ifdef INVARIANTS struct unpcb *unptmp; #endif UNP_PCB_LOCK_ASSERT(unp); UNP_PCB_LOCK_ASSERT(unp2); KASSERT(unp->unp_conn == unp2, ("%s: unpcb %p is not connected to %p", __func__, unp, unp2)); unp->unp_conn = NULL; so = unp->unp_socket; so2 = unp2->unp_socket; switch (unp->unp_socket->so_type) { case SOCK_DGRAM: /* * Remove our send socket buffer from the peer's receive buffer. * Move the data to the receive buffer only if it is empty. * This is a protection against a scenario where a peer * connects, floods and disconnects, effectively blocking * sendto() from unconnected sockets. */ SOCK_RECVBUF_LOCK(so2); if (!STAILQ_EMPTY(&so->so_snd.uxdg_mb)) { TAILQ_REMOVE(&so2->so_rcv.uxdg_conns, &so->so_snd, uxdg_clist); if (__predict_true((so2->so_rcv.sb_state & SBS_CANTRCVMORE) == 0) && STAILQ_EMPTY(&so2->so_rcv.uxdg_mb)) { STAILQ_CONCAT(&so2->so_rcv.uxdg_mb, &so->so_snd.uxdg_mb); so2->so_rcv.uxdg_cc += so->so_snd.uxdg_cc; so2->so_rcv.uxdg_ctl += so->so_snd.uxdg_ctl; so2->so_rcv.uxdg_mbcnt += so->so_snd.uxdg_mbcnt; } else { m = STAILQ_FIRST(&so->so_snd.uxdg_mb); STAILQ_INIT(&so->so_snd.uxdg_mb); so2->so_rcv.sb_acc -= so->so_snd.uxdg_cc; so2->so_rcv.sb_ccc -= so->so_snd.uxdg_cc; so2->so_rcv.sb_ctl -= so->so_snd.uxdg_ctl; so2->so_rcv.sb_mbcnt -= so->so_snd.uxdg_mbcnt; } /* Note: so may reconnect. */ so->so_snd.uxdg_cc = 0; so->so_snd.uxdg_ctl = 0; so->so_snd.uxdg_mbcnt = 0; } SOCK_RECVBUF_UNLOCK(so2); UNP_REF_LIST_LOCK(); #ifdef INVARIANTS LIST_FOREACH(unptmp, &unp2->unp_refs, unp_reflink) { if (unptmp == unp) break; } KASSERT(unptmp != NULL, ("%s: %p not found in reflist of %p", __func__, unp, unp2)); #endif LIST_REMOVE(unp, unp_reflink); UNP_REF_LIST_UNLOCK(); if (so) { SOCK_LOCK(so); so->so_state &= ~SS_ISCONNECTED; SOCK_UNLOCK(so); } break; case SOCK_STREAM: case SOCK_SEQPACKET: if (so) soisdisconnected(so); MPASS(unp2->unp_conn == unp); unp2->unp_conn = NULL; if (so2) soisdisconnected(so2); break; } if (unp == unp2) { unp_pcb_rele_notlast(unp); if (!unp_pcb_rele(unp)) UNP_PCB_UNLOCK(unp); } else { if (!unp_pcb_rele(unp)) UNP_PCB_UNLOCK(unp); if (!unp_pcb_rele(unp2)) UNP_PCB_UNLOCK(unp2); } if (m != NULL) { unp_scan(m, unp_freerights); m_freem(m); } } /* * unp_pcblist() walks the global list of struct unpcb's to generate a * pointer list, bumping the refcount on each unpcb. It then copies them out * sequentially, validating the generation number on each to see if it has * been detached. All of this is necessary because copyout() may sleep on * disk I/O. */ static int unp_pcblist(SYSCTL_HANDLER_ARGS) { struct unpcb *unp, **unp_list; unp_gen_t gencnt; struct xunpgen *xug; struct unp_head *head; struct xunpcb *xu; u_int i; int error, n; switch ((intptr_t)arg1) { case SOCK_STREAM: head = &unp_shead; break; case SOCK_DGRAM: head = &unp_dhead; break; case SOCK_SEQPACKET: head = &unp_sphead; break; default: panic("unp_pcblist: arg1 %d", (int)(intptr_t)arg1); } /* * The process of preparing the PCB list is too time-consuming and * resource-intensive to repeat twice on every request. */ if (req->oldptr == NULL) { n = unp_count; req->oldidx = 2 * (sizeof *xug) + (n + n/8) * sizeof(struct xunpcb); return (0); } if (req->newptr != NULL) return (EPERM); /* * OK, now we're committed to doing something. */ xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK | M_ZERO); UNP_LINK_RLOCK(); gencnt = unp_gencnt; n = unp_count; UNP_LINK_RUNLOCK(); xug->xug_len = sizeof *xug; xug->xug_count = n; xug->xug_gen = gencnt; xug->xug_sogen = so_gencnt; error = SYSCTL_OUT(req, xug, sizeof *xug); if (error) { free(xug, M_TEMP); return (error); } unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK); UNP_LINK_RLOCK(); for (unp = LIST_FIRST(head), i = 0; unp && i < n; unp = LIST_NEXT(unp, unp_link)) { UNP_PCB_LOCK(unp); if (unp->unp_gencnt <= gencnt) { if (cr_cansee(req->td->td_ucred, unp->unp_socket->so_cred)) { UNP_PCB_UNLOCK(unp); continue; } unp_list[i++] = unp; unp_pcb_hold(unp); } UNP_PCB_UNLOCK(unp); } UNP_LINK_RUNLOCK(); n = i; /* In case we lost some during malloc. */ error = 0; xu = malloc(sizeof(*xu), M_TEMP, M_WAITOK | M_ZERO); for (i = 0; i < n; i++) { unp = unp_list[i]; UNP_PCB_LOCK(unp); if (unp_pcb_rele(unp)) continue; if (unp->unp_gencnt <= gencnt) { xu->xu_len = sizeof *xu; xu->xu_unpp = (uintptr_t)unp; /* * XXX - need more locking here to protect against * connect/disconnect races for SMP. */ if (unp->unp_addr != NULL) bcopy(unp->unp_addr, &xu->xu_addr, unp->unp_addr->sun_len); else bzero(&xu->xu_addr, sizeof(xu->xu_addr)); if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) bcopy(unp->unp_conn->unp_addr, &xu->xu_caddr, unp->unp_conn->unp_addr->sun_len); else bzero(&xu->xu_caddr, sizeof(xu->xu_caddr)); xu->unp_vnode = (uintptr_t)unp->unp_vnode; xu->unp_conn = (uintptr_t)unp->unp_conn; xu->xu_firstref = (uintptr_t)LIST_FIRST(&unp->unp_refs); xu->xu_nextref = (uintptr_t)LIST_NEXT(unp, unp_reflink); xu->unp_gencnt = unp->unp_gencnt; sotoxsocket(unp->unp_socket, &xu->xu_socket); UNP_PCB_UNLOCK(unp); error = SYSCTL_OUT(req, xu, sizeof *xu); } else { UNP_PCB_UNLOCK(unp); } } free(xu, M_TEMP); if (!error) { /* * Give the user an updated idea of our state. If the * generation differs from what we told her before, she knows * that something happened while we were processing this * request, and it might be necessary to retry. */ xug->xug_gen = unp_gencnt; xug->xug_sogen = so_gencnt; xug->xug_count = unp_count; error = SYSCTL_OUT(req, xug, sizeof *xug); } free(unp_list, M_TEMP); free(xug, M_TEMP); return (error); } SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, (void *)(intptr_t)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb", "List of active local datagram sockets"); SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, (void *)(intptr_t)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb", "List of active local stream sockets"); SYSCTL_PROC(_net_local_seqpacket, OID_AUTO, pcblist, CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, (void *)(intptr_t)SOCK_SEQPACKET, 0, unp_pcblist, "S,xunpcb", "List of active local seqpacket sockets"); static void unp_shutdown(struct unpcb *unp) { struct unpcb *unp2; struct socket *so; UNP_PCB_LOCK_ASSERT(unp); unp2 = unp->unp_conn; if ((unp->unp_socket->so_type == SOCK_STREAM || (unp->unp_socket->so_type == SOCK_SEQPACKET)) && unp2 != NULL) { so = unp2->unp_socket; if (so != NULL) socantrcvmore(so); } } static void unp_drop(struct unpcb *unp) { struct socket *so; struct unpcb *unp2; /* * Regardless of whether the socket's peer dropped the connection * with this socket by aborting or disconnecting, POSIX requires * that ECONNRESET is returned. */ UNP_PCB_LOCK(unp); so = unp->unp_socket; if (so) so->so_error = ECONNRESET; if ((unp2 = unp_pcb_lock_peer(unp)) != NULL) { /* Last reference dropped in unp_disconnect(). */ unp_pcb_rele_notlast(unp); unp_disconnect(unp, unp2); } else if (!unp_pcb_rele(unp)) { UNP_PCB_UNLOCK(unp); } } static void unp_freerights(struct filedescent **fdep, int fdcount) { struct file *fp; int i; KASSERT(fdcount > 0, ("%s: fdcount %d", __func__, fdcount)); for (i = 0; i < fdcount; i++) { fp = fdep[i]->fde_file; filecaps_free(&fdep[i]->fde_caps); unp_discard(fp); } free(fdep[0], M_FILECAPS); } static int unp_externalize(struct mbuf *control, struct mbuf **controlp, int flags) { struct thread *td = curthread; /* XXX */ struct cmsghdr *cm = mtod(control, struct cmsghdr *); int i; int *fdp; struct filedesc *fdesc = td->td_proc->p_fd; struct filedescent **fdep; void *data; socklen_t clen = control->m_len, datalen; int error, newfds; u_int newlen; UNP_LINK_UNLOCK_ASSERT(); error = 0; if (controlp != NULL) /* controlp == NULL => free control messages */ *controlp = NULL; while (cm != NULL) { MPASS(clen >= sizeof(*cm) && clen >= cm->cmsg_len); data = CMSG_DATA(cm); datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; if (cm->cmsg_level == SOL_SOCKET && cm->cmsg_type == SCM_RIGHTS) { newfds = datalen / sizeof(*fdep); if (newfds == 0) goto next; fdep = data; /* If we're not outputting the descriptors free them. */ if (error || controlp == NULL) { unp_freerights(fdep, newfds); goto next; } FILEDESC_XLOCK(fdesc); /* * Now change each pointer to an fd in the global * table to an integer that is the index to the local * fd table entry that we set up to point to the * global one we are transferring. */ newlen = newfds * sizeof(int); *controlp = sbcreatecontrol(NULL, newlen, SCM_RIGHTS, SOL_SOCKET, M_WAITOK); fdp = (int *) CMSG_DATA(mtod(*controlp, struct cmsghdr *)); if ((error = fdallocn(td, 0, fdp, newfds))) { FILEDESC_XUNLOCK(fdesc); unp_freerights(fdep, newfds); m_freem(*controlp); *controlp = NULL; goto next; } for (i = 0; i < newfds; i++, fdp++) { _finstall(fdesc, fdep[i]->fde_file, *fdp, (flags & MSG_CMSG_CLOEXEC) != 0 ? O_CLOEXEC : 0, &fdep[i]->fde_caps); unp_externalize_fp(fdep[i]->fde_file); } /* * The new type indicates that the mbuf data refers to * kernel resources that may need to be released before * the mbuf is freed. */ m_chtype(*controlp, MT_EXTCONTROL); FILEDESC_XUNLOCK(fdesc); free(fdep[0], M_FILECAPS); } else { /* We can just copy anything else across. */ if (error || controlp == NULL) goto next; *controlp = sbcreatecontrol(NULL, datalen, cm->cmsg_type, cm->cmsg_level, M_WAITOK); bcopy(data, CMSG_DATA(mtod(*controlp, struct cmsghdr *)), datalen); } controlp = &(*controlp)->m_next; next: if (CMSG_SPACE(datalen) < clen) { clen -= CMSG_SPACE(datalen); cm = (struct cmsghdr *) ((caddr_t)cm + CMSG_SPACE(datalen)); } else { clen = 0; cm = NULL; } } m_freem(control); return (error); } static void unp_zone_change(void *tag) { uma_zone_set_max(unp_zone, maxsockets); } #ifdef INVARIANTS static void unp_zdtor(void *mem, int size __unused, void *arg __unused) { struct unpcb *unp; unp = mem; KASSERT(LIST_EMPTY(&unp->unp_refs), ("%s: unpcb %p has lingering refs", __func__, unp)); KASSERT(unp->unp_socket == NULL, ("%s: unpcb %p has socket backpointer", __func__, unp)); KASSERT(unp->unp_vnode == NULL, ("%s: unpcb %p has vnode references", __func__, unp)); KASSERT(unp->unp_conn == NULL, ("%s: unpcb %p is still connected", __func__, unp)); KASSERT(unp->unp_addr == NULL, ("%s: unpcb %p has leaked addr", __func__, unp)); } #endif static void unp_init(void *arg __unused) { uma_dtor dtor; #ifdef INVARIANTS dtor = unp_zdtor; #else dtor = NULL; #endif unp_zone = uma_zcreate("unpcb", sizeof(struct unpcb), NULL, dtor, NULL, NULL, UMA_ALIGN_CACHE, 0); uma_zone_set_max(unp_zone, maxsockets); uma_zone_set_warning(unp_zone, "kern.ipc.maxsockets limit reached"); EVENTHANDLER_REGISTER(maxsockets_change, unp_zone_change, NULL, EVENTHANDLER_PRI_ANY); LIST_INIT(&unp_dhead); LIST_INIT(&unp_shead); LIST_INIT(&unp_sphead); SLIST_INIT(&unp_defers); TIMEOUT_TASK_INIT(taskqueue_thread, &unp_gc_task, 0, unp_gc, NULL); TASK_INIT(&unp_defer_task, 0, unp_process_defers, NULL); UNP_LINK_LOCK_INIT(); UNP_DEFERRED_LOCK_INIT(); } SYSINIT(unp_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_SECOND, unp_init, NULL); static void unp_internalize_cleanup_rights(struct mbuf *control) { struct cmsghdr *cp; struct mbuf *m; void *data; socklen_t datalen; for (m = control; m != NULL; m = m->m_next) { cp = mtod(m, struct cmsghdr *); if (cp->cmsg_level != SOL_SOCKET || cp->cmsg_type != SCM_RIGHTS) continue; data = CMSG_DATA(cp); datalen = (caddr_t)cp + cp->cmsg_len - (caddr_t)data; unp_freerights(data, datalen / sizeof(struct filedesc *)); } } static int unp_internalize(struct mbuf **controlp, struct thread *td, struct mbuf **clast, u_int *space, u_int *mbcnt) { struct mbuf *control, **initial_controlp; struct proc *p; struct filedesc *fdesc; struct bintime *bt; struct cmsghdr *cm; struct cmsgcred *cmcred; struct filedescent *fde, **fdep, *fdev; struct file *fp; struct timeval *tv; struct timespec *ts; void *data; socklen_t clen, datalen; int i, j, error, *fdp, oldfds; u_int newlen; MPASS((*controlp)->m_next == NULL); /* COMPAT_OLDSOCK may violate */ UNP_LINK_UNLOCK_ASSERT(); p = td->td_proc; fdesc = p->p_fd; error = 0; control = *controlp; *controlp = NULL; initial_controlp = controlp; for (clen = control->m_len, cm = mtod(control, struct cmsghdr *), data = CMSG_DATA(cm); clen >= sizeof(*cm) && cm->cmsg_level == SOL_SOCKET && clen >= cm->cmsg_len && cm->cmsg_len >= sizeof(*cm) && (char *)cm + cm->cmsg_len >= (char *)data; clen -= min(CMSG_SPACE(datalen), clen), cm = (struct cmsghdr *) ((char *)cm + CMSG_SPACE(datalen)), data = CMSG_DATA(cm)) { datalen = (char *)cm + cm->cmsg_len - (char *)data; switch (cm->cmsg_type) { case SCM_CREDS: *controlp = sbcreatecontrol(NULL, sizeof(*cmcred), SCM_CREDS, SOL_SOCKET, M_WAITOK); cmcred = (struct cmsgcred *) CMSG_DATA(mtod(*controlp, struct cmsghdr *)); cmcred->cmcred_pid = p->p_pid; cmcred->cmcred_uid = td->td_ucred->cr_ruid; cmcred->cmcred_gid = td->td_ucred->cr_rgid; cmcred->cmcred_euid = td->td_ucred->cr_uid; cmcred->cmcred_ngroups = MIN(td->td_ucred->cr_ngroups, CMGROUP_MAX); for (i = 0; i < cmcred->cmcred_ngroups; i++) cmcred->cmcred_groups[i] = td->td_ucred->cr_groups[i]; break; case SCM_RIGHTS: oldfds = datalen / sizeof (int); if (oldfds == 0) continue; /* On some machines sizeof pointer is bigger than * sizeof int, so we need to check if data fits into * single mbuf. We could allocate several mbufs, and * unp_externalize() should even properly handle that. * But it is not worth to complicate the code for an * insane scenario of passing over 200 file descriptors * at once. */ newlen = oldfds * sizeof(fdep[0]); if (CMSG_SPACE(newlen) > MCLBYTES) { error = EMSGSIZE; goto out; } /* * Check that all the FDs passed in refer to legal * files. If not, reject the entire operation. */ fdp = data; FILEDESC_SLOCK(fdesc); for (i = 0; i < oldfds; i++, fdp++) { fp = fget_noref(fdesc, *fdp); if (fp == NULL) { FILEDESC_SUNLOCK(fdesc); error = EBADF; goto out; } if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) { FILEDESC_SUNLOCK(fdesc); error = EOPNOTSUPP; goto out; } } /* * Now replace the integer FDs with pointers to the * file structure and capability rights. */ *controlp = sbcreatecontrol(NULL, newlen, SCM_RIGHTS, SOL_SOCKET, M_WAITOK); fdp = data; for (i = 0; i < oldfds; i++, fdp++) { if (!fhold(fdesc->fd_ofiles[*fdp].fde_file)) { fdp = data; for (j = 0; j < i; j++, fdp++) { fdrop(fdesc->fd_ofiles[*fdp]. fde_file, td); } FILEDESC_SUNLOCK(fdesc); error = EBADF; goto out; } } fdp = data; fdep = (struct filedescent **) CMSG_DATA(mtod(*controlp, struct cmsghdr *)); fdev = malloc(sizeof(*fdev) * oldfds, M_FILECAPS, M_WAITOK); for (i = 0; i < oldfds; i++, fdev++, fdp++) { fde = &fdesc->fd_ofiles[*fdp]; fdep[i] = fdev; fdep[i]->fde_file = fde->fde_file; filecaps_copy(&fde->fde_caps, &fdep[i]->fde_caps, true); unp_internalize_fp(fdep[i]->fde_file); } FILEDESC_SUNLOCK(fdesc); break; case SCM_TIMESTAMP: *controlp = sbcreatecontrol(NULL, sizeof(*tv), SCM_TIMESTAMP, SOL_SOCKET, M_WAITOK); tv = (struct timeval *) CMSG_DATA(mtod(*controlp, struct cmsghdr *)); microtime(tv); break; case SCM_BINTIME: *controlp = sbcreatecontrol(NULL, sizeof(*bt), SCM_BINTIME, SOL_SOCKET, M_WAITOK); bt = (struct bintime *) CMSG_DATA(mtod(*controlp, struct cmsghdr *)); bintime(bt); break; case SCM_REALTIME: *controlp = sbcreatecontrol(NULL, sizeof(*ts), SCM_REALTIME, SOL_SOCKET, M_WAITOK); ts = (struct timespec *) CMSG_DATA(mtod(*controlp, struct cmsghdr *)); nanotime(ts); break; case SCM_MONOTONIC: *controlp = sbcreatecontrol(NULL, sizeof(*ts), SCM_MONOTONIC, SOL_SOCKET, M_WAITOK); ts = (struct timespec *) CMSG_DATA(mtod(*controlp, struct cmsghdr *)); nanouptime(ts); break; default: error = EINVAL; goto out; } if (space != NULL) { *space += (*controlp)->m_len; *mbcnt += MSIZE; if ((*controlp)->m_flags & M_EXT) *mbcnt += (*controlp)->m_ext.ext_size; *clast = *controlp; } controlp = &(*controlp)->m_next; } if (clen > 0) error = EINVAL; out: if (error != 0 && initial_controlp != NULL) unp_internalize_cleanup_rights(*initial_controlp); m_freem(control); return (error); } static struct mbuf * unp_addsockcred(struct thread *td, struct mbuf *control, int mode, struct mbuf **clast, u_int *space, u_int *mbcnt) { struct mbuf *m, *n, *n_prev; const struct cmsghdr *cm; int ngroups, i, cmsgtype; size_t ctrlsz; ngroups = MIN(td->td_ucred->cr_ngroups, CMGROUP_MAX); if (mode & UNP_WANTCRED_ALWAYS) { ctrlsz = SOCKCRED2SIZE(ngroups); cmsgtype = SCM_CREDS2; } else { ctrlsz = SOCKCREDSIZE(ngroups); cmsgtype = SCM_CREDS; } m = sbcreatecontrol(NULL, ctrlsz, cmsgtype, SOL_SOCKET, M_NOWAIT); if (m == NULL) return (control); MPASS((m->m_flags & M_EXT) == 0 && m->m_next == NULL); if (mode & UNP_WANTCRED_ALWAYS) { struct sockcred2 *sc; sc = (void *)CMSG_DATA(mtod(m, struct cmsghdr *)); sc->sc_version = 0; sc->sc_pid = td->td_proc->p_pid; sc->sc_uid = td->td_ucred->cr_ruid; sc->sc_euid = td->td_ucred->cr_uid; sc->sc_gid = td->td_ucred->cr_rgid; sc->sc_egid = td->td_ucred->cr_gid; sc->sc_ngroups = ngroups; for (i = 0; i < sc->sc_ngroups; i++) sc->sc_groups[i] = td->td_ucred->cr_groups[i]; } else { struct sockcred *sc; sc = (void *)CMSG_DATA(mtod(m, struct cmsghdr *)); sc->sc_uid = td->td_ucred->cr_ruid; sc->sc_euid = td->td_ucred->cr_uid; sc->sc_gid = td->td_ucred->cr_rgid; sc->sc_egid = td->td_ucred->cr_gid; sc->sc_ngroups = ngroups; for (i = 0; i < sc->sc_ngroups; i++) sc->sc_groups[i] = td->td_ucred->cr_groups[i]; } /* * Unlink SCM_CREDS control messages (struct cmsgcred), since just * created SCM_CREDS control message (struct sockcred) has another * format. */ if (control != NULL && cmsgtype == SCM_CREDS) for (n = control, n_prev = NULL; n != NULL;) { cm = mtod(n, struct cmsghdr *); if (cm->cmsg_level == SOL_SOCKET && cm->cmsg_type == SCM_CREDS) { if (n_prev == NULL) control = n->m_next; else n_prev->m_next = n->m_next; if (space != NULL) { MPASS(*space >= n->m_len); *space -= n->m_len; MPASS(*mbcnt >= MSIZE); *mbcnt -= MSIZE; if (n->m_flags & M_EXT) { MPASS(*mbcnt >= n->m_ext.ext_size); *mbcnt -= n->m_ext.ext_size; } MPASS(clast); if (*clast == n) { MPASS(n->m_next == NULL); if (n_prev == NULL) *clast = m; else *clast = n_prev; } } n = m_free(n); } else { n_prev = n; n = n->m_next; } } /* Prepend it to the head. */ m->m_next = control; if (space != NULL) { *space += m->m_len; *mbcnt += MSIZE; if (control == NULL) *clast = m; } return (m); } static struct unpcb * fptounp(struct file *fp) { struct socket *so; if (fp->f_type != DTYPE_SOCKET) return (NULL); if ((so = fp->f_data) == NULL) return (NULL); if (so->so_proto->pr_domain != &localdomain) return (NULL); return sotounpcb(so); } static void unp_discard(struct file *fp) { struct unp_defer *dr; if (unp_externalize_fp(fp)) { dr = malloc(sizeof(*dr), M_TEMP, M_WAITOK); dr->ud_fp = fp; UNP_DEFERRED_LOCK(); SLIST_INSERT_HEAD(&unp_defers, dr, ud_link); UNP_DEFERRED_UNLOCK(); atomic_add_int(&unp_defers_count, 1); taskqueue_enqueue(taskqueue_thread, &unp_defer_task); } else closef_nothread(fp); } static void unp_process_defers(void *arg __unused, int pending) { struct unp_defer *dr; SLIST_HEAD(, unp_defer) drl; int count; SLIST_INIT(&drl); for (;;) { UNP_DEFERRED_LOCK(); if (SLIST_FIRST(&unp_defers) == NULL) { UNP_DEFERRED_UNLOCK(); break; } SLIST_SWAP(&unp_defers, &drl, unp_defer); UNP_DEFERRED_UNLOCK(); count = 0; while ((dr = SLIST_FIRST(&drl)) != NULL) { SLIST_REMOVE_HEAD(&drl, ud_link); closef_nothread(dr->ud_fp); free(dr, M_TEMP); count++; } atomic_add_int(&unp_defers_count, -count); } } static void unp_internalize_fp(struct file *fp) { struct unpcb *unp; UNP_LINK_WLOCK(); if ((unp = fptounp(fp)) != NULL) { unp->unp_file = fp; unp->unp_msgcount++; } unp_rights++; UNP_LINK_WUNLOCK(); } static int unp_externalize_fp(struct file *fp) { struct unpcb *unp; int ret; UNP_LINK_WLOCK(); if ((unp = fptounp(fp)) != NULL) { unp->unp_msgcount--; ret = 1; } else ret = 0; unp_rights--; UNP_LINK_WUNLOCK(); return (ret); } /* * unp_defer indicates whether additional work has been defered for a future * pass through unp_gc(). It is thread local and does not require explicit * synchronization. */ static int unp_marked; static void unp_remove_dead_ref(struct filedescent **fdep, int fdcount) { struct unpcb *unp; struct file *fp; int i; /* * This function can only be called from the gc task. */ KASSERT(taskqueue_member(taskqueue_thread, curthread) != 0, ("%s: not on gc callout", __func__)); UNP_LINK_LOCK_ASSERT(); for (i = 0; i < fdcount; i++) { fp = fdep[i]->fde_file; if ((unp = fptounp(fp)) == NULL) continue; if ((unp->unp_gcflag & UNPGC_DEAD) == 0) continue; unp->unp_gcrefs--; } } static void unp_restore_undead_ref(struct filedescent **fdep, int fdcount) { struct unpcb *unp; struct file *fp; int i; /* * This function can only be called from the gc task. */ KASSERT(taskqueue_member(taskqueue_thread, curthread) != 0, ("%s: not on gc callout", __func__)); UNP_LINK_LOCK_ASSERT(); for (i = 0; i < fdcount; i++) { fp = fdep[i]->fde_file; if ((unp = fptounp(fp)) == NULL) continue; if ((unp->unp_gcflag & UNPGC_DEAD) == 0) continue; unp->unp_gcrefs++; unp_marked++; } } static void unp_scan_socket(struct socket *so, void (*op)(struct filedescent **, int)) { struct sockbuf *sb; SOCK_LOCK_ASSERT(so); if (sotounpcb(so)->unp_gcflag & UNPGC_IGNORE_RIGHTS) return; SOCK_RECVBUF_LOCK(so); switch (so->so_type) { case SOCK_DGRAM: unp_scan(STAILQ_FIRST(&so->so_rcv.uxdg_mb), op); unp_scan(so->so_rcv.uxdg_peeked, op); TAILQ_FOREACH(sb, &so->so_rcv.uxdg_conns, uxdg_clist) unp_scan(STAILQ_FIRST(&sb->uxdg_mb), op); break; case SOCK_STREAM: case SOCK_SEQPACKET: unp_scan(so->so_rcv.sb_mb, op); break; } SOCK_RECVBUF_UNLOCK(so); } static void unp_gc_scan(struct unpcb *unp, void (*op)(struct filedescent **, int)) { struct socket *so, *soa; so = unp->unp_socket; SOCK_LOCK(so); if (SOLISTENING(so)) { /* * Mark all sockets in our accept queue. */ TAILQ_FOREACH(soa, &so->sol_comp, so_list) unp_scan_socket(soa, op); } else { /* * Mark all sockets we reference with RIGHTS. */ unp_scan_socket(so, op); } SOCK_UNLOCK(so); } static int unp_recycled; SYSCTL_INT(_net_local, OID_AUTO, recycled, CTLFLAG_RD, &unp_recycled, 0, "Number of unreachable sockets claimed by the garbage collector."); static int unp_taskcount; SYSCTL_INT(_net_local, OID_AUTO, taskcount, CTLFLAG_RD, &unp_taskcount, 0, "Number of times the garbage collector has run."); SYSCTL_UINT(_net_local, OID_AUTO, sockcount, CTLFLAG_RD, &unp_count, 0, "Number of active local sockets."); static void unp_gc(__unused void *arg, int pending) { struct unp_head *heads[] = { &unp_dhead, &unp_shead, &unp_sphead, NULL }; struct unp_head **head; struct unp_head unp_deadhead; /* List of potentially-dead sockets. */ struct file *f, **unref; struct unpcb *unp, *unptmp; int i, total, unp_unreachable; LIST_INIT(&unp_deadhead); unp_taskcount++; UNP_LINK_RLOCK(); /* * First determine which sockets may be in cycles. */ unp_unreachable = 0; for (head = heads; *head != NULL; head++) LIST_FOREACH(unp, *head, unp_link) { KASSERT((unp->unp_gcflag & ~UNPGC_IGNORE_RIGHTS) == 0, ("%s: unp %p has unexpected gc flags 0x%x", __func__, unp, (unsigned int)unp->unp_gcflag)); f = unp->unp_file; /* * Check for an unreachable socket potentially in a * cycle. It must be in a queue as indicated by * msgcount, and this must equal the file reference * count. Note that when msgcount is 0 the file is * NULL. */ if (f != NULL && unp->unp_msgcount != 0 && refcount_load(&f->f_count) == unp->unp_msgcount) { LIST_INSERT_HEAD(&unp_deadhead, unp, unp_dead); unp->unp_gcflag |= UNPGC_DEAD; unp->unp_gcrefs = unp->unp_msgcount; unp_unreachable++; } } /* * Scan all sockets previously marked as potentially being in a cycle * and remove the references each socket holds on any UNPGC_DEAD * sockets in its queue. After this step, all remaining references on * sockets marked UNPGC_DEAD should not be part of any cycle. */ LIST_FOREACH(unp, &unp_deadhead, unp_dead) unp_gc_scan(unp, unp_remove_dead_ref); /* * If a socket still has a non-negative refcount, it cannot be in a * cycle. In this case increment refcount of all children iteratively. * Stop the scan once we do a complete loop without discovering * a new reachable socket. */ do { unp_marked = 0; LIST_FOREACH_SAFE(unp, &unp_deadhead, unp_dead, unptmp) if (unp->unp_gcrefs > 0) { unp->unp_gcflag &= ~UNPGC_DEAD; LIST_REMOVE(unp, unp_dead); KASSERT(unp_unreachable > 0, ("%s: unp_unreachable underflow.", __func__)); unp_unreachable--; unp_gc_scan(unp, unp_restore_undead_ref); } } while (unp_marked); UNP_LINK_RUNLOCK(); if (unp_unreachable == 0) return; /* * Allocate space for a local array of dead unpcbs. * TODO: can this path be simplified by instead using the local * dead list at unp_deadhead, after taking out references * on the file object and/or unpcb and dropping the link lock? */ unref = malloc(unp_unreachable * sizeof(struct file *), M_TEMP, M_WAITOK); /* * Iterate looking for sockets which have been specifically marked * as unreachable and store them locally. */ UNP_LINK_RLOCK(); total = 0; LIST_FOREACH(unp, &unp_deadhead, unp_dead) { KASSERT((unp->unp_gcflag & UNPGC_DEAD) != 0, ("%s: unp %p not marked UNPGC_DEAD", __func__, unp)); unp->unp_gcflag &= ~UNPGC_DEAD; f = unp->unp_file; if (unp->unp_msgcount == 0 || f == NULL || refcount_load(&f->f_count) != unp->unp_msgcount || !fhold(f)) continue; unref[total++] = f; KASSERT(total <= unp_unreachable, ("%s: incorrect unreachable count.", __func__)); } UNP_LINK_RUNLOCK(); /* * Now flush all sockets, free'ing rights. This will free the * struct files associated with these sockets but leave each socket * with one remaining ref. */ for (i = 0; i < total; i++) { struct socket *so; so = unref[i]->f_data; CURVNET_SET(so->so_vnet); sorflush(so); CURVNET_RESTORE(); } /* * And finally release the sockets so they can be reclaimed. */ for (i = 0; i < total; i++) fdrop(unref[i], NULL); unp_recycled += total; free(unref, M_TEMP); } /* * Synchronize against unp_gc, which can trip over data as we are freeing it. */ static void unp_dispose(struct socket *so) { struct sockbuf *sb; struct unpcb *unp; struct mbuf *m; MPASS(!SOLISTENING(so)); unp = sotounpcb(so); UNP_LINK_WLOCK(); unp->unp_gcflag |= UNPGC_IGNORE_RIGHTS; UNP_LINK_WUNLOCK(); /* * Grab our special mbufs before calling sbrelease(). */ SOCK_RECVBUF_LOCK(so); switch (so->so_type) { case SOCK_DGRAM: while ((sb = TAILQ_FIRST(&so->so_rcv.uxdg_conns)) != NULL) { STAILQ_CONCAT(&so->so_rcv.uxdg_mb, &sb->uxdg_mb); TAILQ_REMOVE(&so->so_rcv.uxdg_conns, sb, uxdg_clist); /* Note: socket of sb may reconnect. */ sb->uxdg_cc = sb->uxdg_ctl = sb->uxdg_mbcnt = 0; } sb = &so->so_rcv; if (sb->uxdg_peeked != NULL) { STAILQ_INSERT_HEAD(&sb->uxdg_mb, sb->uxdg_peeked, m_stailqpkt); sb->uxdg_peeked = NULL; } m = STAILQ_FIRST(&sb->uxdg_mb); STAILQ_INIT(&sb->uxdg_mb); /* XXX: our shortened sbrelease() */ (void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0, RLIM_INFINITY); /* * XXXGL Mark sb with SBS_CANTRCVMORE. This is needed to * prevent uipc_sosend_dgram() or unp_disconnect() adding more * data to the socket. * We are now in dom_dispose and it could be a call from * soshutdown() or from the final sofree(). The sofree() case * is simple as it guarantees that no more sends will happen, * however we can race with unp_disconnect() from our peer. * The shutdown(2) case is more exotic. It would call into * dom_dispose() only if socket is SS_ISCONNECTED. This is * possible if we did connect(2) on this socket and we also * had it bound with bind(2) and receive connections from other * sockets. Because soshutdown() violates POSIX (see comment * there) we will end up here shutting down our receive side. * Of course this will have affect not only on the peer we * connect(2)ed to, but also on all of the peers who had * connect(2)ed to us. Their sends would end up with ENOBUFS. */ sb->sb_state |= SBS_CANTRCVMORE; break; case SOCK_STREAM: case SOCK_SEQPACKET: sb = &so->so_rcv; m = sbcut_locked(sb, sb->sb_ccc); KASSERT(sb->sb_ccc == 0 && sb->sb_mb == 0 && sb->sb_mbcnt == 0, ("%s: ccc %u mb %p mbcnt %u", __func__, sb->sb_ccc, (void *)sb->sb_mb, sb->sb_mbcnt)); sbrelease_locked(so, SO_RCV); break; } SOCK_RECVBUF_UNLOCK(so); if (SOCK_IO_RECV_OWNED(so)) SOCK_IO_RECV_UNLOCK(so); if (m != NULL) { unp_scan(m, unp_freerights); m_freem(m); } } static void unp_scan(struct mbuf *m0, void (*op)(struct filedescent **, int)) { struct mbuf *m; struct cmsghdr *cm; void *data; socklen_t clen, datalen; while (m0 != NULL) { for (m = m0; m; m = m->m_next) { if (m->m_type != MT_CONTROL) continue; cm = mtod(m, struct cmsghdr *); clen = m->m_len; while (cm != NULL) { if (sizeof(*cm) > clen || cm->cmsg_len > clen) break; data = CMSG_DATA(cm); datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; if (cm->cmsg_level == SOL_SOCKET && cm->cmsg_type == SCM_RIGHTS) { (*op)(data, datalen / sizeof(struct filedescent *)); } if (CMSG_SPACE(datalen) < clen) { clen -= CMSG_SPACE(datalen); cm = (struct cmsghdr *) ((caddr_t)cm + CMSG_SPACE(datalen)); } else { clen = 0; cm = NULL; } } } m0 = m0->m_nextpkt; } } /* * Definitions of protocols supported in the LOCAL domain. */ static struct protosw streamproto = { .pr_type = SOCK_STREAM, .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_RIGHTS| PR_CAPATTACH, .pr_ctloutput = &uipc_ctloutput, .pr_abort = uipc_abort, .pr_accept = uipc_accept, .pr_attach = uipc_attach, .pr_bind = uipc_bind, .pr_bindat = uipc_bindat, .pr_connect = uipc_connect, .pr_connectat = uipc_connectat, .pr_connect2 = uipc_connect2, .pr_detach = uipc_detach, .pr_disconnect = uipc_disconnect, .pr_listen = uipc_listen, .pr_peeraddr = uipc_peeraddr, .pr_rcvd = uipc_rcvd, .pr_send = uipc_send, .pr_ready = uipc_ready, .pr_sense = uipc_sense, .pr_shutdown = uipc_shutdown, .pr_sockaddr = uipc_sockaddr, .pr_soreceive = soreceive_generic, .pr_close = uipc_close, }; static struct protosw dgramproto = { .pr_type = SOCK_DGRAM, .pr_flags = PR_ATOMIC | PR_ADDR |PR_RIGHTS | PR_CAPATTACH | PR_SOCKBUF, .pr_ctloutput = &uipc_ctloutput, .pr_abort = uipc_abort, .pr_accept = uipc_accept, .pr_attach = uipc_attach, .pr_bind = uipc_bind, .pr_bindat = uipc_bindat, .pr_connect = uipc_connect, .pr_connectat = uipc_connectat, .pr_connect2 = uipc_connect2, .pr_detach = uipc_detach, .pr_disconnect = uipc_disconnect, .pr_peeraddr = uipc_peeraddr, .pr_sosend = uipc_sosend_dgram, .pr_sense = uipc_sense, .pr_shutdown = uipc_shutdown, .pr_sockaddr = uipc_sockaddr, .pr_soreceive = uipc_soreceive_dgram, .pr_close = uipc_close, }; static struct protosw seqpacketproto = { .pr_type = SOCK_SEQPACKET, /* * XXXRW: For now, PR_ADDR because soreceive will bump into them * due to our use of sbappendaddr. A new sbappend variants is needed * that supports both atomic record writes and control data. */ .pr_flags = PR_ADDR|PR_ATOMIC|PR_CONNREQUIRED| PR_WANTRCVD|PR_RIGHTS|PR_CAPATTACH, .pr_ctloutput = &uipc_ctloutput, .pr_abort = uipc_abort, .pr_accept = uipc_accept, .pr_attach = uipc_attach, .pr_bind = uipc_bind, .pr_bindat = uipc_bindat, .pr_connect = uipc_connect, .pr_connectat = uipc_connectat, .pr_connect2 = uipc_connect2, .pr_detach = uipc_detach, .pr_disconnect = uipc_disconnect, .pr_listen = uipc_listen, .pr_peeraddr = uipc_peeraddr, .pr_rcvd = uipc_rcvd, .pr_send = uipc_send, .pr_sense = uipc_sense, .pr_shutdown = uipc_shutdown, .pr_sockaddr = uipc_sockaddr, .pr_soreceive = soreceive_generic, /* XXX: or...? */ .pr_close = uipc_close, }; static struct domain localdomain = { .dom_family = AF_LOCAL, .dom_name = "local", .dom_externalize = unp_externalize, .dom_dispose = unp_dispose, .dom_nprotosw = 3, .dom_protosw = { &streamproto, &dgramproto, &seqpacketproto, } }; DOMAIN_SET(local); /* * A helper function called by VFS before socket-type vnode reclamation. * For an active vnode it clears unp_vnode pointer and decrements unp_vnode * use count. */ void vfs_unp_reclaim(struct vnode *vp) { struct unpcb *unp; int active; struct mtx *vplock; ASSERT_VOP_ELOCKED(vp, "vfs_unp_reclaim"); KASSERT(vp->v_type == VSOCK, ("vfs_unp_reclaim: vp->v_type != VSOCK")); active = 0; vplock = mtx_pool_find(mtxpool_sleep, vp); mtx_lock(vplock); VOP_UNP_CONNECT(vp, &unp); if (unp == NULL) goto done; UNP_PCB_LOCK(unp); if (unp->unp_vnode == vp) { VOP_UNP_DETACH(vp); unp->unp_vnode = NULL; active = 1; } UNP_PCB_UNLOCK(unp); done: mtx_unlock(vplock); if (active) vunref(vp); } #ifdef DDB static void db_print_indent(int indent) { int i; for (i = 0; i < indent; i++) db_printf(" "); } static void db_print_unpflags(int unp_flags) { int comma; comma = 0; if (unp_flags & UNP_HAVEPC) { db_printf("%sUNP_HAVEPC", comma ? ", " : ""); comma = 1; } if (unp_flags & UNP_WANTCRED_ALWAYS) { db_printf("%sUNP_WANTCRED_ALWAYS", comma ? ", " : ""); comma = 1; } if (unp_flags & UNP_WANTCRED_ONESHOT) { db_printf("%sUNP_WANTCRED_ONESHOT", comma ? ", " : ""); comma = 1; } if (unp_flags & UNP_CONNWAIT) { db_printf("%sUNP_CONNWAIT", comma ? ", " : ""); comma = 1; } if (unp_flags & UNP_CONNECTING) { db_printf("%sUNP_CONNECTING", comma ? ", " : ""); comma = 1; } if (unp_flags & UNP_BINDING) { db_printf("%sUNP_BINDING", comma ? ", " : ""); comma = 1; } } static void db_print_xucred(int indent, struct xucred *xu) { int comma, i; db_print_indent(indent); db_printf("cr_version: %u cr_uid: %u cr_pid: %d cr_ngroups: %d\n", xu->cr_version, xu->cr_uid, xu->cr_pid, xu->cr_ngroups); db_print_indent(indent); db_printf("cr_groups: "); comma = 0; for (i = 0; i < xu->cr_ngroups; i++) { db_printf("%s%u", comma ? ", " : "", xu->cr_groups[i]); comma = 1; } db_printf("\n"); } static void db_print_unprefs(int indent, struct unp_head *uh) { struct unpcb *unp; int counter; counter = 0; LIST_FOREACH(unp, uh, unp_reflink) { if (counter % 4 == 0) db_print_indent(indent); db_printf("%p ", unp); if (counter % 4 == 3) db_printf("\n"); counter++; } if (counter != 0 && counter % 4 != 0) db_printf("\n"); } DB_SHOW_COMMAND(unpcb, db_show_unpcb) { struct unpcb *unp; if (!have_addr) { db_printf("usage: show unpcb \n"); return; } unp = (struct unpcb *)addr; db_printf("unp_socket: %p unp_vnode: %p\n", unp->unp_socket, unp->unp_vnode); db_printf("unp_ino: %ju unp_conn: %p\n", (uintmax_t)unp->unp_ino, unp->unp_conn); db_printf("unp_refs:\n"); db_print_unprefs(2, &unp->unp_refs); /* XXXRW: Would be nice to print the full address, if any. */ db_printf("unp_addr: %p\n", unp->unp_addr); db_printf("unp_gencnt: %llu\n", (unsigned long long)unp->unp_gencnt); db_printf("unp_flags: %x (", unp->unp_flags); db_print_unpflags(unp->unp_flags); db_printf(")\n"); db_printf("unp_peercred:\n"); db_print_xucred(2, &unp->unp_peercred); db_printf("unp_refcount: %u\n", unp->unp_refcount); } #endif diff --git a/sys/netgraph/bluetooth/include/ng_btsocket_l2cap.h b/sys/netgraph/bluetooth/include/ng_btsocket_l2cap.h index baaef9abb736..b512112f8b7d 100644 --- a/sys/netgraph/bluetooth/include/ng_btsocket_l2cap.h +++ b/sys/netgraph/bluetooth/include/ng_btsocket_l2cap.h @@ -1,214 +1,214 @@ /* * ng_btsocket_l2cap.h */ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2001-2002 Maksim Yevmenkin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $Id: ng_btsocket_l2cap.h,v 1.4 2003/03/25 23:53:33 max Exp $ */ #ifndef _NETGRAPH_BTSOCKET_L2CAP_H_ #define _NETGRAPH_BTSOCKET_L2CAP_H_ /* * L2CAP routing entry */ struct ng_hook; struct ng_message; struct ng_btsocket_l2cap_rtentry { bdaddr_t src; /* source BD_ADDR */ struct ng_hook *hook; /* downstream hook */ LIST_ENTRY(ng_btsocket_l2cap_rtentry) next; /* link to next */ }; typedef struct ng_btsocket_l2cap_rtentry ng_btsocket_l2cap_rtentry_t; typedef struct ng_btsocket_l2cap_rtentry * ng_btsocket_l2cap_rtentry_p; /***************************************************************************** ***************************************************************************** ** SOCK_RAW L2CAP sockets ** ***************************************************************************** *****************************************************************************/ #define NG_BTSOCKET_L2CAP_RAW_SENDSPACE NG_L2CAP_MTU_DEFAULT #define NG_BTSOCKET_L2CAP_RAW_RECVSPACE NG_L2CAP_MTU_DEFAULT /* * Bluetooth raw L2CAP socket PCB */ struct ng_btsocket_l2cap_raw_pcb { struct socket *so; /* socket */ u_int32_t flags; /* flags */ #define NG_BTSOCKET_L2CAP_RAW_PRIVILEGED (1 << 0) bdaddr_t src; /* source address */ bdaddr_t dst; /* dest address */ uint8_t srctype;/*source addr type*/ uint8_t dsttype;/*source addr type*/ ng_btsocket_l2cap_rtentry_p rt; /* routing info */ u_int32_t token; /* message token */ struct ng_mesg *msg; /* message */ struct mtx pcb_mtx; /* pcb mutex */ LIST_ENTRY(ng_btsocket_l2cap_raw_pcb) next; /* link to next PCB */ }; typedef struct ng_btsocket_l2cap_raw_pcb ng_btsocket_l2cap_raw_pcb_t; typedef struct ng_btsocket_l2cap_raw_pcb * ng_btsocket_l2cap_raw_pcb_p; #define so2l2cap_raw_pcb(so) \ ((struct ng_btsocket_l2cap_raw_pcb *)((so)->so_pcb)) /* * Bluetooth raw L2CAP socket methods */ #ifdef _KERNEL void ng_btsocket_l2cap_raw_abort (struct socket *); void ng_btsocket_l2cap_raw_close (struct socket *); int ng_btsocket_l2cap_raw_attach (struct socket *, int, struct thread *); int ng_btsocket_l2cap_raw_bind (struct socket *, struct sockaddr *, struct thread *); int ng_btsocket_l2cap_raw_connect (struct socket *, struct sockaddr *, struct thread *); int ng_btsocket_l2cap_raw_control (struct socket *, u_long, void *, struct ifnet *, struct thread *); void ng_btsocket_l2cap_raw_detach (struct socket *); int ng_btsocket_l2cap_raw_disconnect (struct socket *); int ng_btsocket_l2cap_raw_peeraddr (struct socket *, struct sockaddr **); int ng_btsocket_l2cap_raw_send (struct socket *, int, struct mbuf *, struct sockaddr *, struct mbuf *, struct thread *); int ng_btsocket_l2cap_raw_sockaddr (struct socket *, struct sockaddr **); #endif /* _KERNEL */ /***************************************************************************** ***************************************************************************** ** SOCK_SEQPACKET L2CAP sockets ** ***************************************************************************** *****************************************************************************/ #define NG_BTSOCKET_L2CAP_SENDSPACE NG_L2CAP_MTU_DEFAULT /* (64 * 1024) */ #define NG_BTSOCKET_L2CAP_RECVSPACE (64 * 1024) /* * Bluetooth L2CAP socket PCB */ struct ng_btsocket_l2cap_pcb { struct socket *so; /* Pointer to socket */ bdaddr_t src; /* Source address */ bdaddr_t dst; /* Destination address */ uint8_t srctype; /*source addr type*/ uint8_t dsttype; /*source addr type*/ u_int16_t psm; /* PSM */ u_int16_t cid; /* Local channel ID */ uint8_t idtype; u_int16_t flags; /* socket flags */ #define NG_BTSOCKET_L2CAP_CLIENT (1 << 0) /* socket is client */ #define NG_BTSOCKET_L2CAP_TIMO (1 << 1) /* timeout pending */ u_int8_t state; /* socket state */ #define NG_BTSOCKET_L2CAP_CLOSED 0 /* socket closed */ #define NG_BTSOCKET_L2CAP_CONNECTING 1 /* wait for connect */ #define NG_BTSOCKET_L2CAP_CONFIGURING 2 /* wait for config */ #define NG_BTSOCKET_L2CAP_OPEN 3 /* socket open */ #define NG_BTSOCKET_L2CAP_DISCONNECTING 4 /* wait for disconnect */ #define NG_BTSOCKET_L2CAP_W4_ENC_CHANGE 5 u_int8_t cfg_state; /* config state */ #define NG_BTSOCKET_L2CAP_CFG_IN (1 << 0) /* incoming path done */ #define NG_BTSOCKET_L2CAP_CFG_OUT (1 << 1) /* outgoing path done */ #define NG_BTSOCKET_L2CAP_CFG_BOTH \ (NG_BTSOCKET_L2CAP_CFG_IN | NG_BTSOCKET_L2CAP_CFG_OUT) #define NG_BTSOCKET_L2CAP_CFG_IN_SENT (1 << 2) /* L2CAP ConfigReq sent */ #define NG_BTSOCKET_L2CAP_CFG_OUT_SENT (1 << 3) /* ---/--- */ uint8_t encryption; u_int16_t imtu; /* Incoming MTU */ ng_l2cap_flow_t iflow; /* Input flow spec */ u_int16_t omtu; /* Outgoing MTU */ ng_l2cap_flow_t oflow; /* Outgoing flow spec */ u_int16_t flush_timo; /* flush timeout */ u_int16_t link_timo; /* link timeout */ struct callout timo; /* timeout */ u_int32_t token; /* message token */ ng_btsocket_l2cap_rtentry_p rt; /* routing info */ struct mtx pcb_mtx; /* pcb mutex */ uint16_t need_encrypt; /*encryption needed*/ LIST_ENTRY(ng_btsocket_l2cap_pcb) next; /* link to next PCB */ }; typedef struct ng_btsocket_l2cap_pcb ng_btsocket_l2cap_pcb_t; typedef struct ng_btsocket_l2cap_pcb * ng_btsocket_l2cap_pcb_p; #define so2l2cap_pcb(so) \ ((struct ng_btsocket_l2cap_pcb *)((so)->so_pcb)) /* * Bluetooth L2CAP socket methods */ #ifdef _KERNEL void ng_btsocket_l2cap_abort (struct socket *); void ng_btsocket_l2cap_close (struct socket *); -int ng_btsocket_l2cap_accept (struct socket *, struct sockaddr **); +int ng_btsocket_l2cap_accept (struct socket *, struct sockaddr *); int ng_btsocket_l2cap_attach (struct socket *, int, struct thread *); int ng_btsocket_l2cap_bind (struct socket *, struct sockaddr *, struct thread *); int ng_btsocket_l2cap_connect (struct socket *, struct sockaddr *, struct thread *); int ng_btsocket_l2cap_control (struct socket *, u_long, void *, struct ifnet *, struct thread *); int ng_btsocket_l2cap_ctloutput (struct socket *, struct sockopt *); void ng_btsocket_l2cap_detach (struct socket *); int ng_btsocket_l2cap_disconnect (struct socket *); int ng_btsocket_l2cap_listen (struct socket *, int, struct thread *); int ng_btsocket_l2cap_peeraddr (struct socket *, struct sockaddr **); int ng_btsocket_l2cap_send (struct socket *, int, struct mbuf *, struct sockaddr *, struct mbuf *, struct thread *); int ng_btsocket_l2cap_sockaddr (struct socket *, struct sockaddr **); #endif /* _KERNEL */ #endif /* _NETGRAPH_BTSOCKET_L2CAP_H_ */ diff --git a/sys/netgraph/bluetooth/include/ng_btsocket_rfcomm.h b/sys/netgraph/bluetooth/include/ng_btsocket_rfcomm.h index 88c0f8988587..d40b694ece14 100644 --- a/sys/netgraph/bluetooth/include/ng_btsocket_rfcomm.h +++ b/sys/netgraph/bluetooth/include/ng_btsocket_rfcomm.h @@ -1,339 +1,339 @@ /* * ng_btsocket_rfcomm.h */ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2001-2003 Maksim Yevmenkin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $Id: ng_btsocket_rfcomm.h,v 1.10 2003/03/29 22:27:42 max Exp $ */ #ifndef _NETGRAPH_BTSOCKET_RFCOMM_H_ #define _NETGRAPH_BTSOCKET_RFCOMM_H_ /***************************************************************************** ***************************************************************************** ** RFCOMM ** ***************************************************************************** *****************************************************************************/ /* XXX FIXME this does not belong here */ #define RFCOMM_DEFAULT_MTU 667 #define RFCOMM_MAX_MTU 1024 #define RFCOMM_DEFAULT_CREDITS 7 #define RFCOMM_MAX_CREDITS 40 /* RFCOMM frame types */ #define RFCOMM_FRAME_SABM 0x2f #define RFCOMM_FRAME_DISC 0x43 #define RFCOMM_FRAME_UA 0x63 #define RFCOMM_FRAME_DM 0x0f #define RFCOMM_FRAME_UIH 0xef /* RFCOMM MCC commands */ #define RFCOMM_MCC_TEST 0x08 /* Test */ #define RFCOMM_MCC_FCON 0x28 /* Flow Control on */ #define RFCOMM_MCC_FCOFF 0x18 /* Flow Control off */ #define RFCOMM_MCC_MSC 0x38 /* Modem Status Command */ #define RFCOMM_MCC_RPN 0x24 /* Remote Port Negotiation */ #define RFCOMM_MCC_RLS 0x14 /* Remote Line Status */ #define RFCOMM_MCC_PN 0x20 /* Port Negotiation */ #define RFCOMM_MCC_NSC 0x04 /* Non Supported Command */ /* RFCOMM modem signals */ #define RFCOMM_MODEM_FC 0x02 /* Flow Control asserted */ #define RFCOMM_MODEM_RTC 0x04 /* Ready To Communicate */ #define RFCOMM_MODEM_RTR 0x08 /* Ready To Receive */ #define RFCOMM_MODEM_IC 0x40 /* Incoming Call */ #define RFCOMM_MODEM_DV 0x80 /* Data Valid */ /* RPN parameters - baud rate */ #define RFCOMM_RPN_BR_2400 0x0 #define RFCOMM_RPN_BR_4800 0x1 #define RFCOMM_RPN_BR_7200 0x2 #define RFCOMM_RPN_BR_9600 0x3 #define RFCOMM_RPN_BR_19200 0x4 #define RFCOMM_RPN_BR_38400 0x5 #define RFCOMM_RPN_BR_57600 0x6 #define RFCOMM_RPN_BR_115200 0x7 #define RFCOMM_RPN_BR_230400 0x8 /* RPN parameters - data bits */ #define RFCOMM_RPN_DATA_5 0x0 #define RFCOMM_RPN_DATA_6 0x2 #define RFCOMM_RPN_DATA_7 0x1 #define RFCOMM_RPN_DATA_8 0x3 /* RPN parameters - stop bit */ #define RFCOMM_RPN_STOP_1 0 #define RFCOMM_RPN_STOP_15 1 /* RPN parameters - parity */ #define RFCOMM_RPN_PARITY_NONE 0x0 #define RFCOMM_RPN_PARITY_ODD 0x4 #define RFCOMM_RPN_PARITY_EVEN 0x5 #define RFCOMM_RPN_PARITY_MARK 0x6 #define RFCOMM_RPN_PARITY_SPACE 0x7 /* RPN parameters - flow control */ #define RFCOMM_RPN_FLOW_NONE 0x00 #define RFCOMM_RPN_XON_CHAR 0x11 #define RFCOMM_RPN_XOFF_CHAR 0x13 /* RPN parameters - mask */ #define RFCOMM_RPN_PM_BITRATE 0x0001 #define RFCOMM_RPN_PM_DATA 0x0002 #define RFCOMM_RPN_PM_STOP 0x0004 #define RFCOMM_RPN_PM_PARITY 0x0008 #define RFCOMM_RPN_PM_PARITY_TYPE 0x0010 #define RFCOMM_RPN_PM_XON 0x0020 #define RFCOMM_RPN_PM_XOFF 0x0040 #define RFCOMM_RPN_PM_FLOW 0x3F00 #define RFCOMM_RPN_PM_ALL 0x3F7F /* RFCOMM frame header */ struct rfcomm_frame_hdr { u_int8_t address; u_int8_t control; u_int8_t length; /* Actual size could be 2 bytes */ } __attribute__ ((packed)); /* RFCOMM command frame header */ struct rfcomm_cmd_hdr { u_int8_t address; u_int8_t control; u_int8_t length; u_int8_t fcs; } __attribute__ ((packed)); /* RFCOMM MCC command header */ struct rfcomm_mcc_hdr { u_int8_t type; u_int8_t length; /* XXX FIXME Can actual size be 2 bytes?? */ } __attribute__ ((packed)); /* RFCOMM MSC command */ struct rfcomm_mcc_msc { u_int8_t address; u_int8_t modem; } __attribute__ ((packed)); /* RFCOMM RPN command */ struct rfcomm_mcc_rpn { u_int8_t dlci; u_int8_t bit_rate; u_int8_t line_settings; u_int8_t flow_control; u_int8_t xon_char; u_int8_t xoff_char; u_int16_t param_mask; } __attribute__ ((packed)); /* RFCOMM RLS command */ struct rfcomm_mcc_rls { u_int8_t address; u_int8_t status; } __attribute__ ((packed)); /* RFCOMM PN command */ struct rfcomm_mcc_pn { u_int8_t dlci; u_int8_t flow_control; u_int8_t priority; u_int8_t ack_timer; u_int16_t mtu; u_int8_t max_retrans; u_int8_t credits; } __attribute__ ((packed)); /* RFCOMM frame parsing macros */ #define RFCOMM_DLCI(b) (((b) & 0xfc) >> 2) #define RFCOMM_CHANNEL(b) (((b) & 0xf8) >> 3) #define RFCOMM_DIRECTION(b) (((b) & 0x04) >> 2) #define RFCOMM_TYPE(b) (((b) & 0xef)) #define RFCOMM_EA(b) (((b) & 0x01)) #define RFCOMM_CR(b) (((b) & 0x02) >> 1) #define RFCOMM_PF(b) (((b) & 0x10) >> 4) #define RFCOMM_SRVCHANNEL(dlci) ((dlci) >> 1) #define RFCOMM_MKADDRESS(cr, dlci) \ ((((dlci) & 0x3f) << 2) | ((cr) << 1) | 0x01) #define RFCOMM_MKCONTROL(type, pf) ((((type) & 0xef) | ((pf) << 4))) #define RFCOMM_MKDLCI(dir, channel) ((((channel) & 0x1f) << 1) | (dir)) #define RFCOMM_MKLEN8(len) (((len) << 1) | 1) #define RFCOMM_MKLEN16(len) ((len) << 1) /* RFCOMM MCC macros */ #define RFCOMM_MCC_TYPE(b) (((b) & 0xfc) >> 2) #define RFCOMM_MCC_LENGTH(b) (((b) & 0xfe) >> 1) #define RFCOMM_MKMCC_TYPE(cr, type) ((((type) << 2) | ((cr) << 1) | 0x01)) /* RPN macros */ #define RFCOMM_RPN_DATA_BITS(line) ((line) & 0x3) #define RFCOMM_RPN_STOP_BITS(line) (((line) >> 2) & 0x1) #define RFCOMM_RPN_PARITY(line) (((line) >> 3) & 0x3) #define RFCOMM_MKRPN_LINE_SETTINGS(data, stop, parity) \ (((data) & 0x3) | (((stop) & 0x1) << 2) | (((parity) & 0x3) << 3)) /***************************************************************************** ***************************************************************************** ** SOCK_STREAM RFCOMM sockets ** ***************************************************************************** *****************************************************************************/ #define NG_BTSOCKET_RFCOMM_SENDSPACE \ (RFCOMM_MAX_CREDITS * RFCOMM_DEFAULT_MTU * 2) #define NG_BTSOCKET_RFCOMM_RECVSPACE \ (RFCOMM_MAX_CREDITS * RFCOMM_DEFAULT_MTU * 2) /* * Bluetooth RFCOMM session. One L2CAP connection == one RFCOMM session */ struct ng_btsocket_rfcomm_pcb; struct ng_btsocket_rfcomm_session; struct ng_btsocket_rfcomm_session { struct socket *l2so; /* L2CAP socket */ u_int16_t state; /* session state */ #define NG_BTSOCKET_RFCOMM_SESSION_CLOSED 0 #define NG_BTSOCKET_RFCOMM_SESSION_LISTENING 1 #define NG_BTSOCKET_RFCOMM_SESSION_CONNECTING 2 #define NG_BTSOCKET_RFCOMM_SESSION_CONNECTED 3 #define NG_BTSOCKET_RFCOMM_SESSION_OPEN 4 #define NG_BTSOCKET_RFCOMM_SESSION_DISCONNECTING 5 u_int16_t flags; /* session flags */ #define NG_BTSOCKET_RFCOMM_SESSION_INITIATOR (1 << 0) /* initiator */ #define NG_BTSOCKET_RFCOMM_SESSION_LFC (1 << 1) /* local flow */ #define NG_BTSOCKET_RFCOMM_SESSION_RFC (1 << 2) /* remote flow */ #define INITIATOR(s) \ (((s)->flags & NG_BTSOCKET_RFCOMM_SESSION_INITIATOR)? 1 : 0) u_int16_t mtu; /* default MTU */ struct ng_bt_mbufq outq; /* outgoing queue */ struct mtx session_mtx; /* session lock */ LIST_HEAD(, ng_btsocket_rfcomm_pcb) dlcs; /* active DLC */ LIST_ENTRY(ng_btsocket_rfcomm_session) next; /* link to next */ }; typedef struct ng_btsocket_rfcomm_session ng_btsocket_rfcomm_session_t; typedef struct ng_btsocket_rfcomm_session * ng_btsocket_rfcomm_session_p; /* * Bluetooth RFCOMM socket PCB (DLC) */ struct ng_btsocket_rfcomm_pcb { struct socket *so; /* RFCOMM socket */ struct ng_btsocket_rfcomm_session *session; /* RFCOMM session */ u_int16_t flags; /* DLC flags */ #define NG_BTSOCKET_RFCOMM_DLC_TIMO (1 << 0) /* timeout pending */ #define NG_BTSOCKET_RFCOMM_DLC_CFC (1 << 1) /* credit flow ctrl */ #define NG_BTSOCKET_RFCOMM_DLC_TIMEDOUT (1 << 2) /* timeout happened */ #define NG_BTSOCKET_RFCOMM_DLC_DETACHED (1 << 3) /* DLC detached */ #define NG_BTSOCKET_RFCOMM_DLC_SENDING (1 << 4) /* send pending */ u_int16_t state; /* DLC state */ #define NG_BTSOCKET_RFCOMM_DLC_CLOSED 0 #define NG_BTSOCKET_RFCOMM_DLC_W4_CONNECT 1 #define NG_BTSOCKET_RFCOMM_DLC_CONFIGURING 2 #define NG_BTSOCKET_RFCOMM_DLC_CONNECTING 3 #define NG_BTSOCKET_RFCOMM_DLC_CONNECTED 4 #define NG_BTSOCKET_RFCOMM_DLC_DISCONNECTING 5 bdaddr_t src; /* source address */ bdaddr_t dst; /* dest. address */ u_int8_t channel; /* RFCOMM channel */ u_int8_t dlci; /* RFCOMM DLCI */ u_int8_t lmodem; /* local mdm signls */ u_int8_t rmodem; /* remote -/- */ u_int16_t mtu; /* MTU */ int16_t rx_cred; /* RX credits */ int16_t tx_cred; /* TX credits */ struct mtx pcb_mtx; /* PCB lock */ struct callout timo; /* timeout */ LIST_ENTRY(ng_btsocket_rfcomm_pcb) session_next;/* link to next */ LIST_ENTRY(ng_btsocket_rfcomm_pcb) next; /* link to next */ }; typedef struct ng_btsocket_rfcomm_pcb ng_btsocket_rfcomm_pcb_t; typedef struct ng_btsocket_rfcomm_pcb * ng_btsocket_rfcomm_pcb_p; #define so2rfcomm_pcb(so) \ ((struct ng_btsocket_rfcomm_pcb *)((so)->so_pcb)) /* * Bluetooth RFCOMM socket methods */ #ifdef _KERNEL void ng_btsocket_rfcomm_abort (struct socket *); void ng_btsocket_rfcomm_close (struct socket *); -int ng_btsocket_rfcomm_accept (struct socket *, struct sockaddr **); +int ng_btsocket_rfcomm_accept (struct socket *, struct sockaddr *); int ng_btsocket_rfcomm_attach (struct socket *, int, struct thread *); int ng_btsocket_rfcomm_bind (struct socket *, struct sockaddr *, struct thread *); int ng_btsocket_rfcomm_connect (struct socket *, struct sockaddr *, struct thread *); int ng_btsocket_rfcomm_control (struct socket *, u_long, void *, struct ifnet *, struct thread *); int ng_btsocket_rfcomm_ctloutput (struct socket *, struct sockopt *); void ng_btsocket_rfcomm_detach (struct socket *); int ng_btsocket_rfcomm_disconnect (struct socket *); int ng_btsocket_rfcomm_listen (struct socket *, int, struct thread *); int ng_btsocket_rfcomm_peeraddr (struct socket *, struct sockaddr **); int ng_btsocket_rfcomm_send (struct socket *, int, struct mbuf *, struct sockaddr *, struct mbuf *, struct thread *); int ng_btsocket_rfcomm_sockaddr (struct socket *, struct sockaddr **); #endif /* _KERNEL */ #endif /* _NETGRAPH_BTSOCKET_RFCOMM_H_ */ diff --git a/sys/netgraph/bluetooth/include/ng_btsocket_sco.h b/sys/netgraph/bluetooth/include/ng_btsocket_sco.h index 071ddb8d80ef..282980cce881 100644 --- a/sys/netgraph/bluetooth/include/ng_btsocket_sco.h +++ b/sys/netgraph/bluetooth/include/ng_btsocket_sco.h @@ -1,129 +1,129 @@ /* * ng_btsocket_sco.h */ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2001-2002 Maksim Yevmenkin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $Id: ng_btsocket_sco.h,v 1.3 2005/10/31 18:08:52 max Exp $ */ #ifndef _NETGRAPH_BTSOCKET_SCO_H_ #define _NETGRAPH_BTSOCKET_SCO_H_ /* * SCO routing entry */ struct ng_hook; struct ng_message; struct ng_btsocket_sco_rtentry { bdaddr_t src; /* source BD_ADDR */ u_int16_t pkt_size; /* mtu */ u_int16_t num_pkts; /* buffer size */ int32_t pending; /* pending packets */ struct ng_hook *hook; /* downstream hook */ LIST_ENTRY(ng_btsocket_sco_rtentry) next; /* link to next */ }; typedef struct ng_btsocket_sco_rtentry ng_btsocket_sco_rtentry_t; typedef struct ng_btsocket_sco_rtentry * ng_btsocket_sco_rtentry_p; /***************************************************************************** ***************************************************************************** ** SOCK_SEQPACKET SCO sockets ** ***************************************************************************** *****************************************************************************/ #define NG_BTSOCKET_SCO_SENDSPACE 1024 #define NG_BTSOCKET_SCO_RECVSPACE (64 * 1024) /* * Bluetooth SCO socket PCB */ struct ng_btsocket_sco_pcb { struct socket *so; /* Pointer to socket */ bdaddr_t src; /* Source address */ bdaddr_t dst; /* Destination address */ u_int16_t con_handle; /* connection handle */ u_int16_t flags; /* socket flags */ #define NG_BTSOCKET_SCO_CLIENT (1 << 0) /* socket is client */ #define NG_BTSOCKET_SCO_TIMO (1 << 1) /* timeout pending */ u_int8_t state; /* socket state */ #define NG_BTSOCKET_SCO_CLOSED 0 /* socket closed */ #define NG_BTSOCKET_SCO_CONNECTING 1 /* wait for connect */ #define NG_BTSOCKET_SCO_OPEN 2 /* socket open */ #define NG_BTSOCKET_SCO_DISCONNECTING 3 /* wait for disconnect */ struct callout timo; /* timeout */ ng_btsocket_sco_rtentry_p rt; /* routing info */ struct mtx pcb_mtx; /* pcb mutex */ LIST_ENTRY(ng_btsocket_sco_pcb) next; /* link to next PCB */ }; typedef struct ng_btsocket_sco_pcb ng_btsocket_sco_pcb_t; typedef struct ng_btsocket_sco_pcb * ng_btsocket_sco_pcb_p; #define so2sco_pcb(so) \ ((struct ng_btsocket_sco_pcb *)((so)->so_pcb)) /* * Bluetooth SCO socket methods */ #ifdef _KERNEL void ng_btsocket_sco_abort (struct socket *); void ng_btsocket_sco_close (struct socket *); -int ng_btsocket_sco_accept (struct socket *, struct sockaddr **); +int ng_btsocket_sco_accept (struct socket *, struct sockaddr *); int ng_btsocket_sco_attach (struct socket *, int, struct thread *); int ng_btsocket_sco_bind (struct socket *, struct sockaddr *, struct thread *); int ng_btsocket_sco_connect (struct socket *, struct sockaddr *, struct thread *); int ng_btsocket_sco_control (struct socket *, u_long, void *, struct ifnet *, struct thread *); int ng_btsocket_sco_ctloutput (struct socket *, struct sockopt *); void ng_btsocket_sco_detach (struct socket *); int ng_btsocket_sco_disconnect (struct socket *); int ng_btsocket_sco_listen (struct socket *, int, struct thread *); int ng_btsocket_sco_peeraddr (struct socket *, struct sockaddr **); int ng_btsocket_sco_send (struct socket *, int, struct mbuf *, struct sockaddr *, struct mbuf *, struct thread *); int ng_btsocket_sco_sockaddr (struct socket *, struct sockaddr **); #endif /* _KERNEL */ #endif /* _NETGRAPH_BTSOCKET_SCO_H_ */ diff --git a/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c b/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c index 8fc64bb70929..d221cc34d36a 100644 --- a/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c +++ b/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c @@ -1,2964 +1,2976 @@ /* * ng_btsocket_l2cap.c */ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2001-2002 Maksim Yevmenkin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $Id: ng_btsocket_l2cap.c,v 1.16 2003/09/14 23:29:06 max Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* MALLOC define */ #ifdef NG_SEPARATE_MALLOC static MALLOC_DEFINE(M_NETGRAPH_BTSOCKET_L2CAP, "netgraph_btsocks_l2cap", "Netgraph Bluetooth L2CAP sockets"); #else #define M_NETGRAPH_BTSOCKET_L2CAP M_NETGRAPH #endif /* NG_SEPARATE_MALLOC */ /* Netgraph node methods */ static ng_constructor_t ng_btsocket_l2cap_node_constructor; static ng_rcvmsg_t ng_btsocket_l2cap_node_rcvmsg; static ng_shutdown_t ng_btsocket_l2cap_node_shutdown; static ng_newhook_t ng_btsocket_l2cap_node_newhook; static ng_connect_t ng_btsocket_l2cap_node_connect; static ng_rcvdata_t ng_btsocket_l2cap_node_rcvdata; static ng_disconnect_t ng_btsocket_l2cap_node_disconnect; static void ng_btsocket_l2cap_input (void *, int); static void ng_btsocket_l2cap_rtclean (void *, int); /* Netgraph type descriptor */ static struct ng_type typestruct = { .version = NG_ABI_VERSION, .name = NG_BTSOCKET_L2CAP_NODE_TYPE, .constructor = ng_btsocket_l2cap_node_constructor, .rcvmsg = ng_btsocket_l2cap_node_rcvmsg, .shutdown = ng_btsocket_l2cap_node_shutdown, .newhook = ng_btsocket_l2cap_node_newhook, .connect = ng_btsocket_l2cap_node_connect, .rcvdata = ng_btsocket_l2cap_node_rcvdata, .disconnect = ng_btsocket_l2cap_node_disconnect, }; /* Globals */ extern int ifqmaxlen; static u_int32_t ng_btsocket_l2cap_debug_level; static node_p ng_btsocket_l2cap_node; static struct ng_bt_itemq ng_btsocket_l2cap_queue; static struct mtx ng_btsocket_l2cap_queue_mtx; static struct task ng_btsocket_l2cap_queue_task; static LIST_HEAD(, ng_btsocket_l2cap_pcb) ng_btsocket_l2cap_sockets; static struct mtx ng_btsocket_l2cap_sockets_mtx; static LIST_HEAD(, ng_btsocket_l2cap_rtentry) ng_btsocket_l2cap_rt; static struct mtx ng_btsocket_l2cap_rt_mtx; static struct task ng_btsocket_l2cap_rt_task; static struct timeval ng_btsocket_l2cap_lasttime; static int ng_btsocket_l2cap_curpps; /* Sysctl tree */ SYSCTL_DECL(_net_bluetooth_l2cap_sockets); static SYSCTL_NODE(_net_bluetooth_l2cap_sockets, OID_AUTO, seq, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "Bluetooth SEQPACKET L2CAP sockets family"); SYSCTL_UINT(_net_bluetooth_l2cap_sockets_seq, OID_AUTO, debug_level, CTLFLAG_RW, &ng_btsocket_l2cap_debug_level, NG_BTSOCKET_WARN_LEVEL, "Bluetooth SEQPACKET L2CAP sockets debug level"); SYSCTL_UINT(_net_bluetooth_l2cap_sockets_seq, OID_AUTO, queue_len, CTLFLAG_RD, &ng_btsocket_l2cap_queue.len, 0, "Bluetooth SEQPACKET L2CAP sockets input queue length"); SYSCTL_UINT(_net_bluetooth_l2cap_sockets_seq, OID_AUTO, queue_maxlen, CTLFLAG_RD, &ng_btsocket_l2cap_queue.maxlen, 0, "Bluetooth SEQPACKET L2CAP sockets input queue max. length"); SYSCTL_UINT(_net_bluetooth_l2cap_sockets_seq, OID_AUTO, queue_drops, CTLFLAG_RD, &ng_btsocket_l2cap_queue.drops, 0, "Bluetooth SEQPACKET L2CAP sockets input queue drops"); /* Debug */ #define NG_BTSOCKET_L2CAP_INFO \ if (ng_btsocket_l2cap_debug_level >= NG_BTSOCKET_INFO_LEVEL && \ ppsratecheck(&ng_btsocket_l2cap_lasttime, &ng_btsocket_l2cap_curpps, 1)) \ printf #define NG_BTSOCKET_L2CAP_WARN \ if (ng_btsocket_l2cap_debug_level >= NG_BTSOCKET_WARN_LEVEL && \ ppsratecheck(&ng_btsocket_l2cap_lasttime, &ng_btsocket_l2cap_curpps, 1)) \ printf #define NG_BTSOCKET_L2CAP_ERR \ if (ng_btsocket_l2cap_debug_level >= NG_BTSOCKET_ERR_LEVEL && \ ppsratecheck(&ng_btsocket_l2cap_lasttime, &ng_btsocket_l2cap_curpps, 1)) \ printf #define NG_BTSOCKET_L2CAP_ALERT \ if (ng_btsocket_l2cap_debug_level >= NG_BTSOCKET_ALERT_LEVEL && \ ppsratecheck(&ng_btsocket_l2cap_lasttime, &ng_btsocket_l2cap_curpps, 1)) \ printf /* * Netgraph message processing routines */ static int ng_btsocket_l2cap_process_l2ca_con_req_rsp (struct ng_mesg *, ng_btsocket_l2cap_rtentry_p); static int ng_btsocket_l2cap_process_l2ca_con_rsp_rsp (struct ng_mesg *, ng_btsocket_l2cap_rtentry_p); static int ng_btsocket_l2cap_process_l2ca_con_ind (struct ng_mesg *, ng_btsocket_l2cap_rtentry_p); static int ng_btsocket_l2cap_process_l2ca_cfg_req_rsp (struct ng_mesg *, ng_btsocket_l2cap_rtentry_p); static int ng_btsocket_l2cap_process_l2ca_cfg_rsp_rsp (struct ng_mesg *, ng_btsocket_l2cap_rtentry_p); static int ng_btsocket_l2cap_process_l2ca_cfg_ind (struct ng_mesg *, ng_btsocket_l2cap_rtentry_p); static int ng_btsocket_l2cap_process_l2ca_discon_rsp (struct ng_mesg *, ng_btsocket_l2cap_rtentry_p); static int ng_btsocket_l2cap_process_l2ca_discon_ind (struct ng_mesg *, ng_btsocket_l2cap_rtentry_p); static int ng_btsocket_l2cap_process_l2ca_write_rsp (struct ng_mesg *, ng_btsocket_l2cap_rtentry_p); /* * Send L2CA_xxx messages to the lower layer */ static int ng_btsocket_l2cap_send_l2ca_con_req (ng_btsocket_l2cap_pcb_p); static int ng_btsocket_l2cap_send_l2ca_con_rsp_req (u_int32_t, ng_btsocket_l2cap_rtentry_p, bdaddr_p, int, int, int, int); static int ng_btsocket_l2cap_send_l2ca_cfg_req (ng_btsocket_l2cap_pcb_p); static int ng_btsocket_l2cap_send_l2ca_cfg_rsp (ng_btsocket_l2cap_pcb_p); static int ng_btsocket_l2cap_send_l2ca_discon_req (u_int32_t, ng_btsocket_l2cap_pcb_p); static int ng_btsocket_l2cap_send2 (ng_btsocket_l2cap_pcb_p); /* * Timeout processing routines */ static void ng_btsocket_l2cap_timeout (ng_btsocket_l2cap_pcb_p); static void ng_btsocket_l2cap_untimeout (ng_btsocket_l2cap_pcb_p); static void ng_btsocket_l2cap_process_timeout (void *); /* * Other stuff */ static ng_btsocket_l2cap_pcb_p ng_btsocket_l2cap_pcb_by_addr(bdaddr_p, int); static ng_btsocket_l2cap_pcb_p ng_btsocket_l2cap_pcb_by_token(u_int32_t); static ng_btsocket_l2cap_pcb_p ng_btsocket_l2cap_pcb_by_cid (bdaddr_p, int,int); static int ng_btsocket_l2cap_result2errno(int); static int ng_btsock_l2cap_addrtype_to_linktype(int addrtype); #define ng_btsocket_l2cap_wakeup_input_task() \ taskqueue_enqueue(taskqueue_swi_giant, &ng_btsocket_l2cap_queue_task) #define ng_btsocket_l2cap_wakeup_route_task() \ taskqueue_enqueue(taskqueue_swi_giant, &ng_btsocket_l2cap_rt_task) int ng_btsock_l2cap_addrtype_to_linktype(int addrtype) { switch(addrtype){ case BDADDR_LE_PUBLIC: return NG_HCI_LINK_LE_PUBLIC; case BDADDR_LE_RANDOM: return NG_HCI_LINK_LE_RANDOM; default: return NG_HCI_LINK_ACL; } } /***************************************************************************** ***************************************************************************** ** Netgraph node interface ***************************************************************************** *****************************************************************************/ /* * Netgraph node constructor. Do not allow to create node of this type. */ static int ng_btsocket_l2cap_node_constructor(node_p node) { return (EINVAL); } /* ng_btsocket_l2cap_node_constructor */ /* * Do local shutdown processing. Let old node go and create new fresh one. */ static int ng_btsocket_l2cap_node_shutdown(node_p node) { int error = 0; NG_NODE_UNREF(node); /* Create new node */ error = ng_make_node_common(&typestruct, &ng_btsocket_l2cap_node); if (error != 0) { NG_BTSOCKET_L2CAP_ALERT( "%s: Could not create Netgraph node, error=%d\n", __func__, error); ng_btsocket_l2cap_node = NULL; return (error); } error = ng_name_node(ng_btsocket_l2cap_node, NG_BTSOCKET_L2CAP_NODE_TYPE); if (error != 0) { NG_BTSOCKET_L2CAP_ALERT( "%s: Could not name Netgraph node, error=%d\n", __func__, error); NG_NODE_UNREF(ng_btsocket_l2cap_node); ng_btsocket_l2cap_node = NULL; return (error); } return (0); } /* ng_btsocket_l2cap_node_shutdown */ /* * We allow any hook to be connected to the node. */ static int ng_btsocket_l2cap_node_newhook(node_p node, hook_p hook, char const *name) { return (0); } /* ng_btsocket_l2cap_node_newhook */ /* * Just say "YEP, that's OK by me!" */ static int ng_btsocket_l2cap_node_connect(hook_p hook) { NG_HOOK_SET_PRIVATE(hook, NULL); NG_HOOK_REF(hook); /* Keep extra reference to the hook */ #if 0 NG_HOOK_FORCE_QUEUE(NG_HOOK_PEER(hook)); NG_HOOK_FORCE_QUEUE(hook); #endif return (0); } /* ng_btsocket_l2cap_node_connect */ /* * Hook disconnection. Schedule route cleanup task */ static int ng_btsocket_l2cap_node_disconnect(hook_p hook) { /* * If hook has private information than we must have this hook in * the routing table and must schedule cleaning for the routing table. * Otherwise hook was connected but we never got "hook_info" message, * so we have never added this hook to the routing table and it save * to just delete it. */ if (NG_HOOK_PRIVATE(hook) != NULL) return (ng_btsocket_l2cap_wakeup_route_task()); NG_HOOK_UNREF(hook); /* Remove extra reference */ return (0); } /* ng_btsocket_l2cap_node_disconnect */ /* * Process incoming messages */ static int ng_btsocket_l2cap_node_rcvmsg(node_p node, item_p item, hook_p hook) { struct ng_mesg *msg = NGI_MSG(item); /* item still has message */ int error = 0; if (msg != NULL && msg->header.typecookie == NGM_L2CAP_COOKIE) { mtx_lock(&ng_btsocket_l2cap_queue_mtx); if (NG_BT_ITEMQ_FULL(&ng_btsocket_l2cap_queue)) { NG_BTSOCKET_L2CAP_ERR( "%s: Input queue is full (msg)\n", __func__); NG_BT_ITEMQ_DROP(&ng_btsocket_l2cap_queue); NG_FREE_ITEM(item); error = ENOBUFS; } else { if (hook != NULL) { NG_HOOK_REF(hook); NGI_SET_HOOK(item, hook); } NG_BT_ITEMQ_ENQUEUE(&ng_btsocket_l2cap_queue, item); error = ng_btsocket_l2cap_wakeup_input_task(); } mtx_unlock(&ng_btsocket_l2cap_queue_mtx); } else { NG_FREE_ITEM(item); error = EINVAL; } return (error); } /* ng_btsocket_l2cap_node_rcvmsg */ /* * Receive data on a hook */ static int ng_btsocket_l2cap_node_rcvdata(hook_p hook, item_p item) { int error = 0; mtx_lock(&ng_btsocket_l2cap_queue_mtx); if (NG_BT_ITEMQ_FULL(&ng_btsocket_l2cap_queue)) { NG_BTSOCKET_L2CAP_ERR( "%s: Input queue is full (data)\n", __func__); NG_BT_ITEMQ_DROP(&ng_btsocket_l2cap_queue); NG_FREE_ITEM(item); error = ENOBUFS; } else { NG_HOOK_REF(hook); NGI_SET_HOOK(item, hook); NG_BT_ITEMQ_ENQUEUE(&ng_btsocket_l2cap_queue, item); error = ng_btsocket_l2cap_wakeup_input_task(); } mtx_unlock(&ng_btsocket_l2cap_queue_mtx); return (error); } /* ng_btsocket_l2cap_node_rcvdata */ /* * Process L2CA_Connect respose. Socket layer must have initiated connection, * so we have to have a socket associated with message token. */ static int ng_btsocket_l2cap_process_l2ca_con_req_rsp(struct ng_mesg *msg, ng_btsocket_l2cap_rtentry_p rt) { ng_l2cap_l2ca_con_op *op = NULL; ng_btsocket_l2cap_pcb_t *pcb = NULL; int error = 0; if (msg->header.arglen != sizeof(*op)) return (EMSGSIZE); op = (ng_l2cap_l2ca_con_op *)(msg->data); mtx_lock(&ng_btsocket_l2cap_sockets_mtx); /* Look for the socket with the token */ pcb = ng_btsocket_l2cap_pcb_by_token(msg->header.token); if (pcb == NULL) { mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (ENOENT); } mtx_lock(&pcb->pcb_mtx); NG_BTSOCKET_L2CAP_INFO( "%s: Got L2CA_Connect response, token=%d, src bdaddr=%x:%x:%x:%x:%x:%x, " \ "dst bdaddr=%x:%x:%x:%x:%x:%x, psm=%d, lcid=%d, result=%d, status=%d, " \ "state=%d\n", __func__, msg->header.token, pcb->src.b[5], pcb->src.b[4], pcb->src.b[3], pcb->src.b[2], pcb->src.b[1], pcb->src.b[0], pcb->dst.b[5], pcb->dst.b[4], pcb->dst.b[3], pcb->dst.b[2], pcb->dst.b[1], pcb->dst.b[0], pcb->psm, op->lcid, op->result, op->status, pcb->state); if (pcb->state != NG_BTSOCKET_L2CAP_CONNECTING) { mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (ENOENT); } ng_btsocket_l2cap_untimeout(pcb); if (op->result == NG_L2CAP_PENDING) { ng_btsocket_l2cap_timeout(pcb); mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (0); } if (op->result == NG_L2CAP_SUCCESS){ if((pcb->idtype == NG_L2CAP_L2CA_IDTYPE_ATT)|| (pcb->idtype == NG_L2CAP_L2CA_IDTYPE_SMP)){ pcb->encryption = op->encryption; pcb->cid = op->lcid; if(pcb->need_encrypt && !(pcb->encryption)){ ng_btsocket_l2cap_timeout(pcb); pcb->state = NG_BTSOCKET_L2CAP_W4_ENC_CHANGE; }else{ pcb->state = NG_BTSOCKET_L2CAP_OPEN; soisconnected(pcb->so); } }else{ /* * Channel is now open, so update local channel ID and * start configuration process. Source and destination * addresses as well as route must be already set. */ pcb->cid = op->lcid; pcb->encryption = op->encryption; error = ng_btsocket_l2cap_send_l2ca_cfg_req(pcb); if (error != 0) { /* Send disconnect request with "zero" token */ ng_btsocket_l2cap_send_l2ca_discon_req(0, pcb); /* ... and close the socket */ pcb->state = NG_BTSOCKET_L2CAP_CLOSED; soisdisconnected(pcb->so); } else { pcb->cfg_state = NG_BTSOCKET_L2CAP_CFG_IN_SENT; pcb->state = NG_BTSOCKET_L2CAP_CONFIGURING; ng_btsocket_l2cap_timeout(pcb); } } } else { /* * We have failed to open connection, so convert result * code to "errno" code and disconnect the socket. Channel * already has been closed. */ pcb->so->so_error = ng_btsocket_l2cap_result2errno(op->result); pcb->state = NG_BTSOCKET_L2CAP_CLOSED; soisdisconnected(pcb->so); } mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (error); } /* ng_btsocket_l2cap_process_l2ca_con_req_rsp */ /* * Process L2CA_ConnectRsp response */ static int ng_btsocket_l2cap_process_l2ca_con_rsp_rsp(struct ng_mesg *msg, ng_btsocket_l2cap_rtentry_p rt) { ng_l2cap_l2ca_con_rsp_op *op = NULL; ng_btsocket_l2cap_pcb_t *pcb = NULL; if (msg->header.arglen != sizeof(*op)) return (EMSGSIZE); op = (ng_l2cap_l2ca_con_rsp_op *)(msg->data); mtx_lock(&ng_btsocket_l2cap_sockets_mtx); /* Look for the socket with the token */ pcb = ng_btsocket_l2cap_pcb_by_token(msg->header.token); if (pcb == NULL) { mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (ENOENT); } mtx_lock(&pcb->pcb_mtx); NG_BTSOCKET_L2CAP_INFO( "%s: Got L2CA_ConnectRsp response, token=%d, src bdaddr=%x:%x:%x:%x:%x:%x, " \ "dst bdaddr=%x:%x:%x:%x:%x:%x, psm=%d, lcid=%d, result=%d, state=%d\n", __func__, msg->header.token, pcb->src.b[5], pcb->src.b[4], pcb->src.b[3], pcb->src.b[2], pcb->src.b[1], pcb->src.b[0], pcb->dst.b[5], pcb->dst.b[4], pcb->dst.b[3], pcb->dst.b[2], pcb->dst.b[1], pcb->dst.b[0], pcb->psm, pcb->cid, op->result, pcb->state); if (pcb->state != NG_BTSOCKET_L2CAP_CONNECTING) { mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (ENOENT); } ng_btsocket_l2cap_untimeout(pcb); /* Check the result and disconnect the socket on failure */ if (op->result != NG_L2CAP_SUCCESS) { /* Close the socket - channel already closed */ pcb->so->so_error = ng_btsocket_l2cap_result2errno(op->result); pcb->state = NG_BTSOCKET_L2CAP_CLOSED; soisdisconnected(pcb->so); } else { /* Move to CONFIGURING state and wait for CONFIG_IND */ pcb->cfg_state = 0; pcb->state = NG_BTSOCKET_L2CAP_CONFIGURING; ng_btsocket_l2cap_timeout(pcb); } mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (0); } /* ng_btsocket_process_l2ca_con_rsp_rsp */ /* * Process L2CA_Connect indicator. Find socket that listens on address * and PSM. Find exact or closest match. Create new socket and initiate * connection. */ static int ng_btsocket_l2cap_process_l2ca_con_ind(struct ng_mesg *msg, ng_btsocket_l2cap_rtentry_p rt) { ng_l2cap_l2ca_con_ind_ip *ip = NULL; ng_btsocket_l2cap_pcb_t *pcb = NULL, *pcb1 = NULL; int error = 0; u_int32_t token = 0; u_int16_t result = 0; if (msg->header.arglen != sizeof(*ip)) return (EMSGSIZE); ip = (ng_l2cap_l2ca_con_ind_ip *)(msg->data); NG_BTSOCKET_L2CAP_INFO( "%s: Got L2CA_Connect indicator, src bdaddr=%x:%x:%x:%x:%x:%x, " \ "dst bdaddr=%x:%x:%x:%x:%x:%x, psm=%d, lcid=%d, ident=%d\n", __func__, rt->src.b[5], rt->src.b[4], rt->src.b[3], rt->src.b[2], rt->src.b[1], rt->src.b[0], ip->bdaddr.b[5], ip->bdaddr.b[4], ip->bdaddr.b[3], ip->bdaddr.b[2], ip->bdaddr.b[1], ip->bdaddr.b[0], ip->psm, ip->lcid, ip->ident); mtx_lock(&ng_btsocket_l2cap_sockets_mtx); pcb = ng_btsocket_l2cap_pcb_by_addr(&rt->src, ip->psm); if (pcb != NULL) { struct socket *so1; mtx_lock(&pcb->pcb_mtx); CURVNET_SET(pcb->so->so_vnet); so1 = sonewconn(pcb->so, 0); CURVNET_RESTORE(); if (so1 == NULL) { result = NG_L2CAP_NO_RESOURCES; goto respond; } /* * If we got here than we have created new socket. So complete * connection. If we we listening on specific address then copy * source address from listening socket, otherwise copy source * address from hook's routing information. */ pcb1 = so2l2cap_pcb(so1); KASSERT((pcb1 != NULL), ("%s: pcb1 == NULL\n", __func__)); mtx_lock(&pcb1->pcb_mtx); if (bcmp(&pcb->src, NG_HCI_BDADDR_ANY, sizeof(pcb->src)) != 0) bcopy(&pcb->src, &pcb1->src, sizeof(pcb1->src)); else bcopy(&rt->src, &pcb1->src, sizeof(pcb1->src)); pcb1->flags &= ~NG_BTSOCKET_L2CAP_CLIENT; bcopy(&ip->bdaddr, &pcb1->dst, sizeof(pcb1->dst)); pcb1->psm = ip->psm; pcb1->cid = ip->lcid; pcb1->rt = rt; /* Copy socket settings */ pcb1->imtu = pcb->imtu; bcopy(&pcb->oflow, &pcb1->oflow, sizeof(pcb1->oflow)); pcb1->flush_timo = pcb->flush_timo; token = pcb1->token; } else /* Nobody listens on requested BDADDR/PSM */ result = NG_L2CAP_PSM_NOT_SUPPORTED; respond: error = ng_btsocket_l2cap_send_l2ca_con_rsp_req(token, rt, &ip->bdaddr, ip->ident, ip->lcid, result,ip->linktype); if (pcb1 != NULL) { if (error != 0) { pcb1->so->so_error = error; pcb1->state = NG_BTSOCKET_L2CAP_CLOSED; soisdisconnected(pcb1->so); } else { pcb1->state = NG_BTSOCKET_L2CAP_CONNECTING; soisconnecting(pcb1->so); ng_btsocket_l2cap_timeout(pcb1); } mtx_unlock(&pcb1->pcb_mtx); } if (pcb != NULL) mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (error); } /* ng_btsocket_l2cap_process_l2ca_con_ind */ /*Encryption Change*/ static int ng_btsocket_l2cap_process_l2ca_enc_change(struct ng_mesg *msg, ng_btsocket_l2cap_rtentry_p rt) { ng_l2cap_l2ca_enc_chg_op *op = NULL; ng_btsocket_l2cap_pcb_t *pcb = NULL; if (msg->header.arglen != sizeof(*op)) return (EMSGSIZE); op = (ng_l2cap_l2ca_enc_chg_op *)(msg->data); mtx_lock(&ng_btsocket_l2cap_sockets_mtx); pcb = ng_btsocket_l2cap_pcb_by_cid(&rt->src, op->lcid, op->idtype); if (pcb == NULL) { mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (ENOENT); } mtx_lock(&pcb->pcb_mtx); pcb->encryption = op->result; if(pcb->need_encrypt){ ng_btsocket_l2cap_untimeout(pcb); if(pcb->state != NG_BTSOCKET_L2CAP_W4_ENC_CHANGE){ NG_BTSOCKET_L2CAP_WARN("%s: Invalid pcb status %d", __func__, pcb->state); }else if(pcb->encryption){ pcb->state = NG_BTSOCKET_L2CAP_OPEN; soisconnected(pcb->so); }else{ pcb->so->so_error = EPERM; ng_btsocket_l2cap_send_l2ca_discon_req(0, pcb); pcb->state = NG_BTSOCKET_L2CAP_CLOSED; soisdisconnected(pcb->so); } } mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return 0; } /* * Process L2CA_Config response */ static int ng_btsocket_l2cap_process_l2ca_cfg_req_rsp(struct ng_mesg *msg, ng_btsocket_l2cap_rtentry_p rt) { ng_l2cap_l2ca_cfg_op *op = NULL; ng_btsocket_l2cap_pcb_p pcb = NULL; if (msg->header.arglen != sizeof(*op)) return (EMSGSIZE); op = (ng_l2cap_l2ca_cfg_op *)(msg->data); mtx_lock(&ng_btsocket_l2cap_sockets_mtx); /* * Socket must have issued a Configure request, so we must have a * socket that wants to be configured. Use Netgraph message token * to find it */ pcb = ng_btsocket_l2cap_pcb_by_token(msg->header.token); if (pcb == NULL) { /* * XXX FIXME what to do here? We could not find a * socket with requested token. We even can not send * Disconnect, because we do not know channel ID */ mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (ENOENT); } mtx_lock(&pcb->pcb_mtx); NG_BTSOCKET_L2CAP_INFO( "%s: Got L2CA_Config response, token=%d, src bdaddr=%x:%x:%x:%x:%x:%x, " \ "dst bdaddr=%x:%x:%x:%x:%x:%x, psm=%d, lcid=%d, result=%d, state=%d, " \ "cfg_state=%x\n", __func__, msg->header.token, pcb->src.b[5], pcb->src.b[4], pcb->src.b[3], pcb->src.b[2], pcb->src.b[1], pcb->src.b[0], pcb->dst.b[5], pcb->dst.b[4], pcb->dst.b[3], pcb->dst.b[2], pcb->dst.b[1], pcb->dst.b[0], pcb->psm, pcb->cid, op->result, pcb->state, pcb->cfg_state); if (pcb->state != NG_BTSOCKET_L2CAP_CONFIGURING) { mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (ENOENT); } if (op->result == NG_L2CAP_SUCCESS) { /* * XXX FIXME Actually set flush and link timeout. * Set QoS here if required. Resolve conficts (flush_timo). * Save incoming MTU (peer's outgoing MTU) and outgoing flow * spec. */ pcb->imtu = op->imtu; bcopy(&op->oflow, &pcb->oflow, sizeof(pcb->oflow)); pcb->flush_timo = op->flush_timo; /* * We have configured incoming side, so record it and check * if configuration is complete. If complete then mark socket * as connected, otherwise wait for the peer. */ pcb->cfg_state &= ~NG_BTSOCKET_L2CAP_CFG_IN_SENT; pcb->cfg_state |= NG_BTSOCKET_L2CAP_CFG_IN; if (pcb->cfg_state == NG_BTSOCKET_L2CAP_CFG_BOTH) { /* Configuration complete - mark socket as open */ ng_btsocket_l2cap_untimeout(pcb); pcb->state = NG_BTSOCKET_L2CAP_OPEN; soisconnected(pcb->so); } } else { /* * Something went wrong. Could be unacceptable parameters, * reject or unknown option. That's too bad, but we will * not negotiate. Send Disconnect and close the channel. */ ng_btsocket_l2cap_untimeout(pcb); switch (op->result) { case NG_L2CAP_UNACCEPTABLE_PARAMS: case NG_L2CAP_UNKNOWN_OPTION: pcb->so->so_error = EINVAL; break; default: pcb->so->so_error = ECONNRESET; break; } /* Send disconnect with "zero" token */ ng_btsocket_l2cap_send_l2ca_discon_req(0, pcb); /* ... and close the socket */ pcb->state = NG_BTSOCKET_L2CAP_CLOSED; soisdisconnected(pcb->so); } mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (0); } /* ng_btsocket_l2cap_process_l2ca_cfg_req_rsp */ /* * Process L2CA_ConfigRsp response */ static int ng_btsocket_l2cap_process_l2ca_cfg_rsp_rsp(struct ng_mesg *msg, ng_btsocket_l2cap_rtentry_p rt) { ng_l2cap_l2ca_cfg_rsp_op *op = NULL; ng_btsocket_l2cap_pcb_t *pcb = NULL; int error = 0; if (msg->header.arglen != sizeof(*op)) return (EMSGSIZE); op = (ng_l2cap_l2ca_cfg_rsp_op *)(msg->data); mtx_lock(&ng_btsocket_l2cap_sockets_mtx); /* Look for the socket with the token */ pcb = ng_btsocket_l2cap_pcb_by_token(msg->header.token); if (pcb == NULL) { mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (ENOENT); } mtx_lock(&pcb->pcb_mtx); NG_BTSOCKET_L2CAP_INFO( "%s: Got L2CA_ConfigRsp response, token=%d, src bdaddr=%x:%x:%x:%x:%x:%x, " \ "dst bdaddr=%x:%x:%x:%x:%x:%x, psm=%d, lcid=%d, result=%d, state=%d, " \ "cfg_state=%x\n", __func__, msg->header.token, pcb->src.b[5], pcb->src.b[4], pcb->src.b[3], pcb->src.b[2], pcb->src.b[1], pcb->src.b[0], pcb->dst.b[5], pcb->dst.b[4], pcb->dst.b[3], pcb->dst.b[2], pcb->dst.b[1], pcb->dst.b[0], pcb->psm, pcb->cid, op->result, pcb->state, pcb->cfg_state); if (pcb->state != NG_BTSOCKET_L2CAP_CONFIGURING) { mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (ENOENT); } /* Check the result and disconnect socket of failure */ if (op->result != NG_L2CAP_SUCCESS) goto disconnect; /* * Now we done with remote side configuration. Configure local * side if we have not done it yet. */ pcb->cfg_state &= ~NG_BTSOCKET_L2CAP_CFG_OUT_SENT; pcb->cfg_state |= NG_BTSOCKET_L2CAP_CFG_OUT; if (pcb->cfg_state == NG_BTSOCKET_L2CAP_CFG_BOTH) { /* Configuration complete - mask socket as open */ ng_btsocket_l2cap_untimeout(pcb); pcb->state = NG_BTSOCKET_L2CAP_OPEN; soisconnected(pcb->so); } else { if (!(pcb->cfg_state & NG_BTSOCKET_L2CAP_CFG_IN_SENT)) { /* Send L2CA_Config request - incoming path */ error = ng_btsocket_l2cap_send_l2ca_cfg_req(pcb); if (error != 0) goto disconnect; pcb->cfg_state |= NG_BTSOCKET_L2CAP_CFG_IN_SENT; } } mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (error); disconnect: ng_btsocket_l2cap_untimeout(pcb); /* Send disconnect with "zero" token */ ng_btsocket_l2cap_send_l2ca_discon_req(0, pcb); /* ... and close the socket */ pcb->state = NG_BTSOCKET_L2CAP_CLOSED; soisdisconnected(pcb->so); mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (error); } /* ng_btsocket_l2cap_process_l2ca_cfg_rsp_rsp */ /* * Process L2CA_Config indicator */ static int ng_btsocket_l2cap_process_l2ca_cfg_ind(struct ng_mesg *msg, ng_btsocket_l2cap_rtentry_p rt) { ng_l2cap_l2ca_cfg_ind_ip *ip = NULL; ng_btsocket_l2cap_pcb_t *pcb = NULL; int error = 0; if (msg->header.arglen != sizeof(*ip)) return (EMSGSIZE); ip = (ng_l2cap_l2ca_cfg_ind_ip *)(msg->data); mtx_lock(&ng_btsocket_l2cap_sockets_mtx); /* Check for the open socket that has given channel ID */ pcb = ng_btsocket_l2cap_pcb_by_cid(&rt->src, ip->lcid, NG_L2CAP_L2CA_IDTYPE_BREDR); if (pcb == NULL) { mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (ENOENT); } mtx_lock(&pcb->pcb_mtx); NG_BTSOCKET_L2CAP_INFO( "%s: Got L2CA_Config indicator, src bdaddr=%x:%x:%x:%x:%x:%x, " \ "dst bdaddr=%x:%x:%x:%x:%x:%x, psm=%d, lcid=%d, state=%d, cfg_state=%x\n", __func__, pcb->src.b[5], pcb->src.b[4], pcb->src.b[3], pcb->src.b[2], pcb->src.b[1], pcb->src.b[0], pcb->dst.b[5], pcb->dst.b[4], pcb->dst.b[3], pcb->dst.b[2], pcb->dst.b[1], pcb->dst.b[0], pcb->psm, pcb->cid, pcb->state, pcb->cfg_state); /* XXX FIXME re-configuration on open socket */ if (pcb->state != NG_BTSOCKET_L2CAP_CONFIGURING) { mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (ENOENT); } /* * XXX FIXME Actually set flush and link timeout. Set QoS here if * required. Resolve conficts (flush_timo). Note outgoing MTU (peer's * incoming MTU) and incoming flow spec. */ pcb->omtu = ip->omtu; bcopy(&ip->iflow, &pcb->iflow, sizeof(pcb->iflow)); pcb->flush_timo = ip->flush_timo; /* * Send L2CA_Config response to our peer and check for the errors, * if any send disconnect to close the channel. */ if (!(pcb->cfg_state & NG_BTSOCKET_L2CAP_CFG_OUT_SENT)) { error = ng_btsocket_l2cap_send_l2ca_cfg_rsp(pcb); if (error != 0) { ng_btsocket_l2cap_untimeout(pcb); pcb->so->so_error = error; /* Send disconnect with "zero" token */ ng_btsocket_l2cap_send_l2ca_discon_req(0, pcb); /* ... and close the socket */ pcb->state = NG_BTSOCKET_L2CAP_CLOSED; soisdisconnected(pcb->so); } else pcb->cfg_state |= NG_BTSOCKET_L2CAP_CFG_OUT_SENT; } mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (error); } /* ng_btsocket_l2cap_process_l2cap_cfg_ind */ /* * Process L2CA_Disconnect response */ static int ng_btsocket_l2cap_process_l2ca_discon_rsp(struct ng_mesg *msg, ng_btsocket_l2cap_rtentry_p rt) { ng_l2cap_l2ca_discon_op *op = NULL; ng_btsocket_l2cap_pcb_t *pcb = NULL; /* Check message */ if (msg->header.arglen != sizeof(*op)) return (EMSGSIZE); op = (ng_l2cap_l2ca_discon_op *)(msg->data); mtx_lock(&ng_btsocket_l2cap_sockets_mtx); /* * Socket layer must have issued L2CA_Disconnect request, so there * must be a socket that wants to be disconnected. Use Netgraph * message token to find it. */ pcb = ng_btsocket_l2cap_pcb_by_token(msg->header.token); if (pcb == NULL) { mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (0); } mtx_lock(&pcb->pcb_mtx); /* XXX Close socket no matter what op->result says */ if (pcb->state != NG_BTSOCKET_L2CAP_CLOSED) { NG_BTSOCKET_L2CAP_INFO( "%s: Got L2CA_Disconnect response, token=%d, src bdaddr=%x:%x:%x:%x:%x:%x, " \ "dst bdaddr=%x:%x:%x:%x:%x:%x, psm=%d, lcid=%d, result=%d, state=%d\n", __func__, msg->header.token, pcb->src.b[5], pcb->src.b[4], pcb->src.b[3], pcb->src.b[2], pcb->src.b[1], pcb->src.b[0], pcb->dst.b[5], pcb->dst.b[4], pcb->dst.b[3], pcb->dst.b[2], pcb->dst.b[1], pcb->dst.b[0], pcb->psm, pcb->cid, op->result, pcb->state); ng_btsocket_l2cap_untimeout(pcb); pcb->state = NG_BTSOCKET_L2CAP_CLOSED; soisdisconnected(pcb->so); } mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (0); } /* ng_btsocket_l2cap_process_l2ca_discon_rsp */ /* * Process L2CA_Disconnect indicator */ static int ng_btsocket_l2cap_process_l2ca_discon_ind(struct ng_mesg *msg, ng_btsocket_l2cap_rtentry_p rt) { ng_l2cap_l2ca_discon_ind_ip *ip = NULL; ng_btsocket_l2cap_pcb_t *pcb = NULL; /* Check message */ if (msg->header.arglen != sizeof(*ip)) return (EMSGSIZE); ip = (ng_l2cap_l2ca_discon_ind_ip *)(msg->data); mtx_lock(&ng_btsocket_l2cap_sockets_mtx); /* Look for the socket with given channel ID */ pcb = ng_btsocket_l2cap_pcb_by_cid(&rt->src, ip->lcid, ip->idtype); if (pcb == NULL) { mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (0); } /* * Channel has already been destroyed, so disconnect the socket * and be done with it. If there was any pending request we can * not do anything here anyway. */ mtx_lock(&pcb->pcb_mtx); NG_BTSOCKET_L2CAP_INFO( "%s: Got L2CA_Disconnect indicator, src bdaddr=%x:%x:%x:%x:%x:%x, " \ "dst bdaddr=%x:%x:%x:%x:%x:%x, psm=%d, lcid=%d, state=%d\n", __func__, pcb->src.b[5], pcb->src.b[4], pcb->src.b[3], pcb->src.b[2], pcb->src.b[1], pcb->src.b[0], pcb->dst.b[5], pcb->dst.b[4], pcb->dst.b[3], pcb->dst.b[2], pcb->dst.b[1], pcb->dst.b[0], pcb->psm, pcb->cid, pcb->state); if (pcb->flags & NG_BTSOCKET_L2CAP_TIMO) ng_btsocket_l2cap_untimeout(pcb); pcb->state = NG_BTSOCKET_L2CAP_CLOSED; soisdisconnected(pcb->so); mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (0); } /* ng_btsocket_l2cap_process_l2ca_discon_ind */ /* * Process L2CA_Write response */ static int ng_btsocket_l2cap_process_l2ca_write_rsp(struct ng_mesg *msg, ng_btsocket_l2cap_rtentry_p rt) { ng_l2cap_l2ca_write_op *op = NULL; ng_btsocket_l2cap_pcb_t *pcb = NULL; /* Check message */ if (msg->header.arglen != sizeof(*op)) return (EMSGSIZE); op = (ng_l2cap_l2ca_write_op *)(msg->data); mtx_lock(&ng_btsocket_l2cap_sockets_mtx); /* Look for the socket with given token */ pcb = ng_btsocket_l2cap_pcb_by_token(msg->header.token); if (pcb == NULL) { mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (ENOENT); } mtx_lock(&pcb->pcb_mtx); NG_BTSOCKET_L2CAP_INFO( "%s: Got L2CA_Write response, src bdaddr=%x:%x:%x:%x:%x:%x, " \ "dst bdaddr=%x:%x:%x:%x:%x:%x, psm=%d, lcid=%d, result=%d, length=%d, " \ "state=%d\n", __func__, pcb->src.b[5], pcb->src.b[4], pcb->src.b[3], pcb->src.b[2], pcb->src.b[1], pcb->src.b[0], pcb->dst.b[5], pcb->dst.b[4], pcb->dst.b[3], pcb->dst.b[2], pcb->dst.b[1], pcb->dst.b[0], pcb->psm, pcb->cid, op->result, op->length, pcb->state); if (pcb->state != NG_BTSOCKET_L2CAP_OPEN) { mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (ENOENT); } ng_btsocket_l2cap_untimeout(pcb); /* * Check if we have more data to send */ sbdroprecord(&pcb->so->so_snd); if (sbavail(&pcb->so->so_snd) > 0) { if (ng_btsocket_l2cap_send2(pcb) == 0) ng_btsocket_l2cap_timeout(pcb); else sbdroprecord(&pcb->so->so_snd); /* XXX */ } /* * Now set the result, drop packet from the socket send queue and * ask for more (wakeup sender) */ pcb->so->so_error = ng_btsocket_l2cap_result2errno(op->result); sowwakeup(pcb->so); mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (0); } /* ng_btsocket_l2cap_process_l2ca_write_rsp */ /* * Send L2CA_Connect request */ static int ng_btsocket_l2cap_send_l2ca_con_req(ng_btsocket_l2cap_pcb_p pcb) { struct ng_mesg *msg = NULL; ng_l2cap_l2ca_con_ip *ip = NULL; int error = 0; mtx_assert(&pcb->pcb_mtx, MA_OWNED); if (pcb->rt == NULL || pcb->rt->hook == NULL || NG_HOOK_NOT_VALID(pcb->rt->hook)) return (ENETDOWN); NG_MKMESSAGE(msg, NGM_L2CAP_COOKIE, NGM_L2CAP_L2CA_CON, sizeof(*ip), M_NOWAIT); if (msg == NULL) return (ENOMEM); msg->header.token = pcb->token; ip = (ng_l2cap_l2ca_con_ip *)(msg->data); bcopy(&pcb->dst, &ip->bdaddr, sizeof(ip->bdaddr)); ip->psm = pcb->psm; ip->linktype = ng_btsock_l2cap_addrtype_to_linktype(pcb->dsttype); ip->idtype = pcb->idtype; NG_SEND_MSG_HOOK(error, ng_btsocket_l2cap_node, msg,pcb->rt->hook, 0); return (error); } /* ng_btsocket_l2cap_send_l2ca_con_req */ /* * Send L2CA_Connect response */ static int ng_btsocket_l2cap_send_l2ca_con_rsp_req(u_int32_t token, ng_btsocket_l2cap_rtentry_p rt, bdaddr_p dst, int ident, int lcid, int result, int linktype) { struct ng_mesg *msg = NULL; ng_l2cap_l2ca_con_rsp_ip *ip = NULL; int error = 0; if (rt == NULL || rt->hook == NULL || NG_HOOK_NOT_VALID(rt->hook)) return (ENETDOWN); NG_MKMESSAGE(msg, NGM_L2CAP_COOKIE, NGM_L2CAP_L2CA_CON_RSP, sizeof(*ip), M_NOWAIT); if (msg == NULL) return (ENOMEM); msg->header.token = token; ip = (ng_l2cap_l2ca_con_rsp_ip *)(msg->data); bcopy(dst, &ip->bdaddr, sizeof(ip->bdaddr)); ip->ident = ident; ip->lcid = lcid; ip->linktype = linktype; ip->result = result; ip->status = 0; NG_SEND_MSG_HOOK(error, ng_btsocket_l2cap_node, msg, rt->hook, 0); return (error); } /* ng_btsocket_l2cap_send_l2ca_con_rsp_req */ /* * Send L2CA_Config request */ static int ng_btsocket_l2cap_send_l2ca_cfg_req(ng_btsocket_l2cap_pcb_p pcb) { struct ng_mesg *msg = NULL; ng_l2cap_l2ca_cfg_ip *ip = NULL; int error = 0; mtx_assert(&pcb->pcb_mtx, MA_OWNED); if (pcb->rt == NULL || pcb->rt->hook == NULL || NG_HOOK_NOT_VALID(pcb->rt->hook)) return (ENETDOWN); NG_MKMESSAGE(msg, NGM_L2CAP_COOKIE, NGM_L2CAP_L2CA_CFG, sizeof(*ip), M_NOWAIT); if (msg == NULL) return (ENOMEM); msg->header.token = pcb->token; ip = (ng_l2cap_l2ca_cfg_ip *)(msg->data); ip->lcid = pcb->cid; ip->imtu = pcb->imtu; bcopy(&pcb->oflow, &ip->oflow, sizeof(ip->oflow)); ip->flush_timo = pcb->flush_timo; ip->link_timo = pcb->link_timo; NG_SEND_MSG_HOOK(error, ng_btsocket_l2cap_node, msg,pcb->rt->hook, 0); return (error); } /* ng_btsocket_l2cap_send_l2ca_cfg_req */ /* * Send L2CA_Config response */ static int ng_btsocket_l2cap_send_l2ca_cfg_rsp(ng_btsocket_l2cap_pcb_p pcb) { struct ng_mesg *msg = NULL; ng_l2cap_l2ca_cfg_rsp_ip *ip = NULL; int error = 0; mtx_assert(&pcb->pcb_mtx, MA_OWNED); if (pcb->rt == NULL || pcb->rt->hook == NULL || NG_HOOK_NOT_VALID(pcb->rt->hook)) return (ENETDOWN); NG_MKMESSAGE(msg, NGM_L2CAP_COOKIE, NGM_L2CAP_L2CA_CFG_RSP, sizeof(*ip), M_NOWAIT); if (msg == NULL) return (ENOMEM); msg->header.token = pcb->token; ip = (ng_l2cap_l2ca_cfg_rsp_ip *)(msg->data); ip->lcid = pcb->cid; ip->omtu = pcb->omtu; bcopy(&pcb->iflow, &ip->iflow, sizeof(ip->iflow)); NG_SEND_MSG_HOOK(error, ng_btsocket_l2cap_node, msg, pcb->rt->hook, 0); return (error); } /* ng_btsocket_l2cap_send_l2ca_cfg_rsp */ /* * Send L2CA_Disconnect request */ static int ng_btsocket_l2cap_send_l2ca_discon_req(u_int32_t token, ng_btsocket_l2cap_pcb_p pcb) { struct ng_mesg *msg = NULL; ng_l2cap_l2ca_discon_ip *ip = NULL; int error = 0; mtx_assert(&pcb->pcb_mtx, MA_OWNED); if (pcb->rt == NULL || pcb->rt->hook == NULL || NG_HOOK_NOT_VALID(pcb->rt->hook)) return (ENETDOWN); NG_MKMESSAGE(msg, NGM_L2CAP_COOKIE, NGM_L2CAP_L2CA_DISCON, sizeof(*ip), M_NOWAIT); if (msg == NULL) return (ENOMEM); msg->header.token = token; ip = (ng_l2cap_l2ca_discon_ip *)(msg->data); ip->lcid = pcb->cid; ip->idtype = pcb->idtype; NG_SEND_MSG_HOOK(error, ng_btsocket_l2cap_node, msg,pcb->rt->hook, 0); return (error); } /* ng_btsocket_l2cap_send_l2ca_discon_req */ /***************************************************************************** ***************************************************************************** ** Socket interface ***************************************************************************** *****************************************************************************/ /* * L2CAP sockets data input routine */ static void ng_btsocket_l2cap_data_input(struct mbuf *m, hook_p hook) { ng_l2cap_hdr_t *hdr = NULL; ng_l2cap_clt_hdr_t *clt_hdr = NULL; ng_btsocket_l2cap_pcb_t *pcb = NULL; ng_btsocket_l2cap_rtentry_t *rt = NULL; uint16_t idtype; if (hook == NULL) { NG_BTSOCKET_L2CAP_ALERT( "%s: Invalid source hook for L2CAP data packet\n", __func__); goto drop; } rt = (ng_btsocket_l2cap_rtentry_t *) NG_HOOK_PRIVATE(hook); if (rt == NULL) { NG_BTSOCKET_L2CAP_ALERT( "%s: Could not find out source bdaddr for L2CAP data packet\n", __func__); goto drop; } m = m_pullup(m, sizeof(uint16_t)); idtype = *mtod(m, uint16_t *); m_adj(m, sizeof(uint16_t)); /* Make sure we can access header */ if (m->m_pkthdr.len < sizeof(*hdr)) { NG_BTSOCKET_L2CAP_ERR( "%s: L2CAP data packet too small, len=%d\n", __func__, m->m_pkthdr.len); goto drop; } if (m->m_len < sizeof(*hdr)) { m = m_pullup(m, sizeof(*hdr)); if (m == NULL) goto drop; } /* Strip L2CAP packet header and verify packet length */ hdr = mtod(m, ng_l2cap_hdr_t *); m_adj(m, sizeof(*hdr)); if (hdr->length != m->m_pkthdr.len) { NG_BTSOCKET_L2CAP_ERR( "%s: Bad L2CAP data packet length, len=%d, length=%d\n", __func__, m->m_pkthdr.len, hdr->length); goto drop; } /* * Now process packet. Two cases: * * 1) Normal packet (cid != 2) then find connected socket and append * mbuf to the socket queue. Wakeup socket. * * 2) Broadcast packet (cid == 2) then find all sockets that connected * to the given PSM and have SO_BROADCAST bit set and append mbuf * to the socket queue. Wakeup socket. */ NG_BTSOCKET_L2CAP_INFO( "%s: Received L2CAP data packet: src bdaddr=%x:%x:%x:%x:%x:%x, " \ "dcid=%d, length=%d\n", __func__, rt->src.b[5], rt->src.b[4], rt->src.b[3], rt->src.b[2], rt->src.b[1], rt->src.b[0], hdr->dcid, hdr->length); if ((hdr->dcid >= NG_L2CAP_FIRST_CID) || (idtype == NG_L2CAP_L2CA_IDTYPE_ATT)|| (idtype == NG_L2CAP_L2CA_IDTYPE_SMP) ){ mtx_lock(&ng_btsocket_l2cap_sockets_mtx); /* Normal packet: find connected socket */ pcb = ng_btsocket_l2cap_pcb_by_cid(&rt->src, hdr->dcid,idtype); if (pcb == NULL) { mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); goto drop; } mtx_lock(&pcb->pcb_mtx); if (pcb->state != NG_BTSOCKET_L2CAP_OPEN) { NG_BTSOCKET_L2CAP_ERR( "%s: No connected socket found, src bdaddr=%x:%x:%x:%x:%x:%x, dcid=%d, " \ "state=%d\n", __func__, rt->src.b[5], rt->src.b[4], rt->src.b[3], rt->src.b[2], rt->src.b[1], rt->src.b[0], hdr->dcid, pcb->state); mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); goto drop; } /* Check packet size against socket's incoming MTU */ if (hdr->length > pcb->imtu) { NG_BTSOCKET_L2CAP_ERR( "%s: L2CAP data packet too big, src bdaddr=%x:%x:%x:%x:%x:%x, " \ "dcid=%d, length=%d, imtu=%d\n", __func__, rt->src.b[5], rt->src.b[4], rt->src.b[3], rt->src.b[2], rt->src.b[1], rt->src.b[0], hdr->dcid, hdr->length, pcb->imtu); mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); goto drop; } /* Check if we have enough space in socket receive queue */ if (m->m_pkthdr.len > sbspace(&pcb->so->so_rcv)) { /* * This is really bad. Receive queue on socket does * not have enough space for the packet. We do not * have any other choice but drop the packet. L2CAP * does not provide any flow control. */ NG_BTSOCKET_L2CAP_ERR( "%s: Not enough space in socket receive queue. Dropping L2CAP data packet, " \ "src bdaddr=%x:%x:%x:%x:%x:%x, dcid=%d, len=%d, space=%ld\n", __func__, rt->src.b[5], rt->src.b[4], rt->src.b[3], rt->src.b[2], rt->src.b[1], rt->src.b[0], hdr->dcid, m->m_pkthdr.len, sbspace(&pcb->so->so_rcv)); mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); goto drop; } /* Append packet to the socket receive queue and wakeup */ sbappendrecord(&pcb->so->so_rcv, m); m = NULL; sorwakeup(pcb->so); mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); } else if (hdr->dcid == NG_L2CAP_CLT_CID) { /* Broadcast packet: give packet to all sockets */ /* Check packet size against connectionless MTU */ if (hdr->length > NG_L2CAP_MTU_DEFAULT) { NG_BTSOCKET_L2CAP_ERR( "%s: Connectionless L2CAP data packet too big, " \ "src bdaddr=%x:%x:%x:%x:%x:%x, length=%d\n", __func__, rt->src.b[5], rt->src.b[4], rt->src.b[3], rt->src.b[2], rt->src.b[1], rt->src.b[0], hdr->length); goto drop; } /* Make sure we can access connectionless header */ if (m->m_pkthdr.len < sizeof(*clt_hdr)) { NG_BTSOCKET_L2CAP_ERR( "%s: Can not get L2CAP connectionless packet header, " \ "src bdaddr=%x:%x:%x:%x:%x:%x, length=%d\n", __func__, rt->src.b[5], rt->src.b[4], rt->src.b[3], rt->src.b[2], rt->src.b[1], rt->src.b[0], hdr->length); goto drop; } if (m->m_len < sizeof(*clt_hdr)) { m = m_pullup(m, sizeof(*clt_hdr)); if (m == NULL) goto drop; } /* Strip connectionless header and deliver packet */ clt_hdr = mtod(m, ng_l2cap_clt_hdr_t *); m_adj(m, sizeof(*clt_hdr)); NG_BTSOCKET_L2CAP_INFO( "%s: Got L2CAP connectionless data packet, " \ "src bdaddr=%x:%x:%x:%x:%x:%x, psm=%d, length=%d\n", __func__, rt->src.b[5], rt->src.b[4], rt->src.b[3], rt->src.b[2], rt->src.b[1], rt->src.b[0], clt_hdr->psm, hdr->length); mtx_lock(&ng_btsocket_l2cap_sockets_mtx); LIST_FOREACH(pcb, &ng_btsocket_l2cap_sockets, next) { struct mbuf *copy = NULL; mtx_lock(&pcb->pcb_mtx); if (bcmp(&rt->src, &pcb->src, sizeof(pcb->src)) != 0 || pcb->psm != clt_hdr->psm || pcb->state != NG_BTSOCKET_L2CAP_OPEN || (pcb->so->so_options & SO_BROADCAST) == 0 || m->m_pkthdr.len > sbspace(&pcb->so->so_rcv)) goto next; /* * Create a copy of the packet and append it to the * socket's queue. If m_dup() failed - no big deal * it is a broadcast traffic after all */ copy = m_dup(m, M_NOWAIT); if (copy != NULL) { sbappendrecord(&pcb->so->so_rcv, copy); sorwakeup(pcb->so); } next: mtx_unlock(&pcb->pcb_mtx); } mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); } drop: NG_FREE_M(m); /* checks for m != NULL */ } /* ng_btsocket_l2cap_data_input */ /* * L2CAP sockets default message input routine */ static void ng_btsocket_l2cap_default_msg_input(struct ng_mesg *msg, hook_p hook) { switch (msg->header.cmd) { case NGM_L2CAP_NODE_HOOK_INFO: { ng_btsocket_l2cap_rtentry_t *rt = NULL; ng_l2cap_node_hook_info_ep *ep = (ng_l2cap_node_hook_info_ep *)msg->data; if (hook == NULL || msg->header.arglen != sizeof(*ep)) break; if (bcmp(&ep->addr, NG_HCI_BDADDR_ANY, sizeof(bdaddr_t)) == 0) break; mtx_lock(&ng_btsocket_l2cap_rt_mtx); rt = (ng_btsocket_l2cap_rtentry_t *) NG_HOOK_PRIVATE(hook); if (rt == NULL) { rt = malloc(sizeof(*rt), M_NETGRAPH_BTSOCKET_L2CAP, M_NOWAIT|M_ZERO); if (rt == NULL) { mtx_unlock(&ng_btsocket_l2cap_rt_mtx); break; } LIST_INSERT_HEAD(&ng_btsocket_l2cap_rt, rt, next); NG_HOOK_SET_PRIVATE(hook, rt); } bcopy(&ep->addr, &rt->src, sizeof(rt->src)); rt->hook = hook; mtx_unlock(&ng_btsocket_l2cap_rt_mtx); NG_BTSOCKET_L2CAP_INFO( "%s: Updating hook \"%s\", src bdaddr=%x:%x:%x:%x:%x:%x\n", __func__, NG_HOOK_NAME(hook), rt->src.b[5], rt->src.b[4], rt->src.b[3], rt->src.b[2], rt->src.b[1], rt->src.b[0]); } break; default: NG_BTSOCKET_L2CAP_WARN( "%s: Unknown message, cmd=%d\n", __func__, msg->header.cmd); break; } NG_FREE_MSG(msg); /* Checks for msg != NULL */ } /* ng_btsocket_l2cap_default_msg_input */ /* * L2CAP sockets L2CA message input routine */ static void ng_btsocket_l2cap_l2ca_msg_input(struct ng_mesg *msg, hook_p hook) { ng_btsocket_l2cap_rtentry_p rt = NULL; if (hook == NULL) { NG_BTSOCKET_L2CAP_ALERT( "%s: Invalid source hook for L2CA message\n", __func__); goto drop; } rt = (ng_btsocket_l2cap_rtentry_p) NG_HOOK_PRIVATE(hook); if (rt == NULL) { NG_BTSOCKET_L2CAP_ALERT( "%s: Could not find out source bdaddr for L2CA message\n", __func__); goto drop; } switch (msg->header.cmd) { case NGM_L2CAP_L2CA_CON: /* L2CA_Connect response */ ng_btsocket_l2cap_process_l2ca_con_req_rsp(msg, rt); break; case NGM_L2CAP_L2CA_CON_RSP: /* L2CA_ConnectRsp response */ ng_btsocket_l2cap_process_l2ca_con_rsp_rsp(msg, rt); break; case NGM_L2CAP_L2CA_CON_IND: /* L2CA_Connect indicator */ ng_btsocket_l2cap_process_l2ca_con_ind(msg, rt); break; case NGM_L2CAP_L2CA_CFG: /* L2CA_Config response */ ng_btsocket_l2cap_process_l2ca_cfg_req_rsp(msg, rt); break; case NGM_L2CAP_L2CA_CFG_RSP: /* L2CA_ConfigRsp response */ ng_btsocket_l2cap_process_l2ca_cfg_rsp_rsp(msg, rt); break; case NGM_L2CAP_L2CA_CFG_IND: /* L2CA_Config indicator */ ng_btsocket_l2cap_process_l2ca_cfg_ind(msg, rt); break; case NGM_L2CAP_L2CA_DISCON: /* L2CA_Disconnect response */ ng_btsocket_l2cap_process_l2ca_discon_rsp(msg, rt); break; case NGM_L2CAP_L2CA_DISCON_IND: /* L2CA_Disconnect indicator */ ng_btsocket_l2cap_process_l2ca_discon_ind(msg, rt); break; case NGM_L2CAP_L2CA_WRITE: /* L2CA_Write response */ ng_btsocket_l2cap_process_l2ca_write_rsp(msg, rt); break; case NGM_L2CAP_L2CA_ENC_CHANGE: ng_btsocket_l2cap_process_l2ca_enc_change(msg, rt); break; /* XXX FIXME add other L2CA messages */ default: NG_BTSOCKET_L2CAP_WARN( "%s: Unknown L2CA message, cmd=%d\n", __func__, msg->header.cmd); break; } drop: NG_FREE_MSG(msg); } /* ng_btsocket_l2cap_l2ca_msg_input */ /* * L2CAP sockets input routine */ static void ng_btsocket_l2cap_input(void *context, int pending) { item_p item = NULL; hook_p hook = NULL; for (;;) { mtx_lock(&ng_btsocket_l2cap_queue_mtx); NG_BT_ITEMQ_DEQUEUE(&ng_btsocket_l2cap_queue, item); mtx_unlock(&ng_btsocket_l2cap_queue_mtx); if (item == NULL) break; NGI_GET_HOOK(item, hook); if (hook != NULL && NG_HOOK_NOT_VALID(hook)) goto drop; switch(item->el_flags & NGQF_TYPE) { case NGQF_DATA: { struct mbuf *m = NULL; NGI_GET_M(item, m); ng_btsocket_l2cap_data_input(m, hook); } break; case NGQF_MESG: { struct ng_mesg *msg = NULL; NGI_GET_MSG(item, msg); switch (msg->header.cmd) { case NGM_L2CAP_L2CA_CON: case NGM_L2CAP_L2CA_CON_RSP: case NGM_L2CAP_L2CA_CON_IND: case NGM_L2CAP_L2CA_CFG: case NGM_L2CAP_L2CA_CFG_RSP: case NGM_L2CAP_L2CA_CFG_IND: case NGM_L2CAP_L2CA_DISCON: case NGM_L2CAP_L2CA_DISCON_IND: case NGM_L2CAP_L2CA_WRITE: case NGM_L2CAP_L2CA_ENC_CHANGE: /* XXX FIXME add other L2CA messages */ ng_btsocket_l2cap_l2ca_msg_input(msg, hook); break; default: ng_btsocket_l2cap_default_msg_input(msg, hook); break; } } break; default: KASSERT(0, ("%s: invalid item type=%ld\n", __func__, (item->el_flags & NGQF_TYPE))); break; } drop: if (hook != NULL) NG_HOOK_UNREF(hook); NG_FREE_ITEM(item); } } /* ng_btsocket_l2cap_input */ /* * Route cleanup task. Gets scheduled when hook is disconnected. Here we * will find all sockets that use "invalid" hook and disconnect them. */ static void ng_btsocket_l2cap_rtclean(void *context, int pending) { ng_btsocket_l2cap_pcb_p pcb = NULL, pcb_next = NULL; ng_btsocket_l2cap_rtentry_p rt = NULL; mtx_lock(&ng_btsocket_l2cap_rt_mtx); mtx_lock(&ng_btsocket_l2cap_sockets_mtx); /* * First disconnect all sockets that use "invalid" hook */ for (pcb = LIST_FIRST(&ng_btsocket_l2cap_sockets); pcb != NULL; ) { mtx_lock(&pcb->pcb_mtx); pcb_next = LIST_NEXT(pcb, next); if (pcb->rt != NULL && pcb->rt->hook != NULL && NG_HOOK_NOT_VALID(pcb->rt->hook)) { if (pcb->flags & NG_BTSOCKET_L2CAP_TIMO) ng_btsocket_l2cap_untimeout(pcb); pcb->so->so_error = ENETDOWN; pcb->state = NG_BTSOCKET_L2CAP_CLOSED; soisdisconnected(pcb->so); pcb->token = 0; pcb->cid = 0; pcb->rt = NULL; } mtx_unlock(&pcb->pcb_mtx); pcb = pcb_next; } /* * Now cleanup routing table */ for (rt = LIST_FIRST(&ng_btsocket_l2cap_rt); rt != NULL; ) { ng_btsocket_l2cap_rtentry_p rt_next = LIST_NEXT(rt, next); if (rt->hook != NULL && NG_HOOK_NOT_VALID(rt->hook)) { LIST_REMOVE(rt, next); NG_HOOK_SET_PRIVATE(rt->hook, NULL); NG_HOOK_UNREF(rt->hook); /* Remove extra reference */ bzero(rt, sizeof(*rt)); free(rt, M_NETGRAPH_BTSOCKET_L2CAP); } rt = rt_next; } mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); mtx_unlock(&ng_btsocket_l2cap_rt_mtx); } /* ng_btsocket_l2cap_rtclean */ /* * Initialize everything */ static void ng_btsocket_l2cap_init(void *arg __unused) { int error = 0; ng_btsocket_l2cap_node = NULL; ng_btsocket_l2cap_debug_level = NG_BTSOCKET_WARN_LEVEL; /* Register Netgraph node type */ error = ng_newtype(&typestruct); if (error != 0) { NG_BTSOCKET_L2CAP_ALERT( "%s: Could not register Netgraph node type, error=%d\n", __func__, error); return; } /* Create Netgrapg node */ error = ng_make_node_common(&typestruct, &ng_btsocket_l2cap_node); if (error != 0) { NG_BTSOCKET_L2CAP_ALERT( "%s: Could not create Netgraph node, error=%d\n", __func__, error); ng_btsocket_l2cap_node = NULL; return; } error = ng_name_node(ng_btsocket_l2cap_node, NG_BTSOCKET_L2CAP_NODE_TYPE); if (error != 0) { NG_BTSOCKET_L2CAP_ALERT( "%s: Could not name Netgraph node, error=%d\n", __func__, error); NG_NODE_UNREF(ng_btsocket_l2cap_node); ng_btsocket_l2cap_node = NULL; return; } /* Create input queue */ NG_BT_ITEMQ_INIT(&ng_btsocket_l2cap_queue, ifqmaxlen); mtx_init(&ng_btsocket_l2cap_queue_mtx, "btsocks_l2cap_queue_mtx", NULL, MTX_DEF); TASK_INIT(&ng_btsocket_l2cap_queue_task, 0, ng_btsocket_l2cap_input, NULL); /* Create list of sockets */ LIST_INIT(&ng_btsocket_l2cap_sockets); mtx_init(&ng_btsocket_l2cap_sockets_mtx, "btsocks_l2cap_sockets_mtx", NULL, MTX_DEF); /* Routing table */ LIST_INIT(&ng_btsocket_l2cap_rt); mtx_init(&ng_btsocket_l2cap_rt_mtx, "btsocks_l2cap_rt_mtx", NULL, MTX_DEF); TASK_INIT(&ng_btsocket_l2cap_rt_task, 0, ng_btsocket_l2cap_rtclean, NULL); } /* ng_btsocket_l2cap_init */ SYSINIT(ng_btsocket_l2cap_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ng_btsocket_l2cap_init, NULL); /* * Abort connection on socket */ void ng_btsocket_l2cap_abort(struct socket *so) { so->so_error = ECONNABORTED; (void)ng_btsocket_l2cap_disconnect(so); } /* ng_btsocket_l2cap_abort */ void ng_btsocket_l2cap_close(struct socket *so) { (void)ng_btsocket_l2cap_disconnect(so); } /* ng_btsocket_l2cap_close */ -/* - * Accept connection on socket. Nothing to do here, socket must be connected - * and ready, so just return peer address and be done with it. - */ - -int -ng_btsocket_l2cap_accept(struct socket *so, struct sockaddr **nam) -{ - if (ng_btsocket_l2cap_node == NULL) - return (EINVAL); - - return (ng_btsocket_l2cap_peeraddr(so, nam)); -} /* ng_btsocket_l2cap_accept */ - /* * Create and attach new socket */ int ng_btsocket_l2cap_attach(struct socket *so, int proto, struct thread *td) { static u_int32_t token = 0; ng_btsocket_l2cap_pcb_p pcb = so2l2cap_pcb(so); int error; /* Check socket and protocol */ if (ng_btsocket_l2cap_node == NULL) return (EPROTONOSUPPORT); if (so->so_type != SOCK_SEQPACKET) return (ESOCKTNOSUPPORT); #if 0 /* XXX sonewconn() calls "pru_attach" with proto == 0 */ if (proto != 0) if (proto != BLUETOOTH_PROTO_L2CAP) return (EPROTONOSUPPORT); #endif /* XXX */ if (pcb != NULL) return (EISCONN); /* Reserve send and receive space if it is not reserved yet */ if ((so->so_snd.sb_hiwat == 0) || (so->so_rcv.sb_hiwat == 0)) { error = soreserve(so, NG_BTSOCKET_L2CAP_SENDSPACE, NG_BTSOCKET_L2CAP_RECVSPACE); if (error != 0) return (error); } /* Allocate the PCB */ pcb = malloc(sizeof(*pcb), M_NETGRAPH_BTSOCKET_L2CAP, M_NOWAIT | M_ZERO); if (pcb == NULL) return (ENOMEM); /* Link the PCB and the socket */ so->so_pcb = (caddr_t) pcb; pcb->so = so; pcb->state = NG_BTSOCKET_L2CAP_CLOSED; /* Initialize PCB */ pcb->imtu = pcb->omtu = NG_L2CAP_MTU_DEFAULT; /* Default flow */ pcb->iflow.flags = 0x0; pcb->iflow.service_type = NG_HCI_SERVICE_TYPE_BEST_EFFORT; pcb->iflow.token_rate = 0xffffffff; /* maximum */ pcb->iflow.token_bucket_size = 0xffffffff; /* maximum */ pcb->iflow.peak_bandwidth = 0x00000000; /* maximum */ pcb->iflow.latency = 0xffffffff; /* don't care */ pcb->iflow.delay_variation = 0xffffffff; /* don't care */ bcopy(&pcb->iflow, &pcb->oflow, sizeof(pcb->oflow)); pcb->flush_timo = NG_L2CAP_FLUSH_TIMO_DEFAULT; pcb->link_timo = NG_L2CAP_LINK_TIMO_DEFAULT; /* * XXX Mark PCB mutex as DUPOK to prevent "duplicated lock of * the same type" message. When accepting new L2CAP connection * ng_btsocket_l2cap_process_l2ca_con_ind() holds both PCB mutexes * for "old" (accepting) PCB and "new" (created) PCB. */ mtx_init(&pcb->pcb_mtx, "btsocks_l2cap_pcb_mtx", NULL, MTX_DEF|MTX_DUPOK); callout_init_mtx(&pcb->timo, &pcb->pcb_mtx, 0); /* * Add the PCB to the list * * XXX FIXME VERY IMPORTANT! * * This is totally FUBAR. We could get here in two cases: * * 1) When user calls socket() * 2) When we need to accept new incoming connection and call * sonewconn() * * In the first case we must acquire ng_btsocket_l2cap_sockets_mtx. * In the second case we hold ng_btsocket_l2cap_sockets_mtx already. * So we now need to distinguish between these cases. From reading * /sys/kern/uipc_socket.c we can find out that sonewconn() calls * pru_attach with proto == 0 and td == NULL. For now use this fact * to figure out if we were called from socket() or from sonewconn(). */ if (td != NULL) mtx_lock(&ng_btsocket_l2cap_sockets_mtx); else mtx_assert(&ng_btsocket_l2cap_sockets_mtx, MA_OWNED); /* Set PCB token. Use ng_btsocket_l2cap_sockets_mtx for protection */ if (++ token == 0) token ++; pcb->token = token; LIST_INSERT_HEAD(&ng_btsocket_l2cap_sockets, pcb, next); if (td != NULL) mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (0); } /* ng_btsocket_l2cap_attach */ /* * Bind socket */ int ng_btsocket_l2cap_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { ng_btsocket_l2cap_pcb_t *pcb = NULL; struct sockaddr_l2cap *sa = (struct sockaddr_l2cap *) nam; int psm, error = 0; if (ng_btsocket_l2cap_node == NULL) return (EINVAL); /* Verify address */ if (sa == NULL) return (EINVAL); if (sa->l2cap_family != AF_BLUETOOTH) return (EAFNOSUPPORT); /*For the time being, Not support LE binding.*/ if ((sa->l2cap_len != sizeof(*sa))&& (sa->l2cap_len != sizeof(struct sockaddr_l2cap_compat))) return (EINVAL); psm = le16toh(sa->l2cap_psm); /* * Check if other socket has this address already (look for exact * match PSM and bdaddr) and assign socket address if it's available. * * Note: socket can be bound to ANY PSM (zero) thus allowing several * channels with the same PSM between the same pair of BD_ADDR'es. */ mtx_lock(&ng_btsocket_l2cap_sockets_mtx); LIST_FOREACH(pcb, &ng_btsocket_l2cap_sockets, next) if (psm != 0 && psm == pcb->psm && bcmp(&pcb->src, &sa->l2cap_bdaddr, sizeof(bdaddr_t)) == 0) break; if (pcb == NULL) { /* Set socket address */ pcb = so2l2cap_pcb(so); if (pcb != NULL) { bcopy(&sa->l2cap_bdaddr, &pcb->src, sizeof(pcb->src)); pcb->psm = psm; } else error = EINVAL; } else error = EADDRINUSE; mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (error); } /* ng_btsocket_l2cap_bind */ /* * Connect socket */ int ng_btsocket_l2cap_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { ng_btsocket_l2cap_pcb_t *pcb = so2l2cap_pcb(so); struct sockaddr_l2cap_compat *sal = (struct sockaddr_l2cap_compat *) nam; struct sockaddr_l2cap *sa = (struct sockaddr_l2cap *)nam; struct sockaddr_l2cap ba; ng_btsocket_l2cap_rtentry_t *rt = NULL; int have_src, error = 0; int idtype = NG_L2CAP_L2CA_IDTYPE_BREDR; /* Check socket */ if (pcb == NULL) return (EINVAL); if (ng_btsocket_l2cap_node == NULL) return (EINVAL); if (pcb->state == NG_BTSOCKET_L2CAP_CONNECTING) return (EINPROGRESS); /* Verify address */ if (sa == NULL) return (EINVAL); if (sa->l2cap_family != AF_BLUETOOTH) return (EAFNOSUPPORT); if (sa->l2cap_len == sizeof(*sal)){ bcopy(sal, &ba, sizeof(*sal)); sa = &ba; sa->l2cap_len = sizeof(*sa); sa->l2cap_bdaddr_type = BDADDR_BREDR; } if (sa->l2cap_len != sizeof(*sa)) return (EINVAL); if ((sa->l2cap_psm && sa->l2cap_cid)) return EINVAL; if (bcmp(&sa->l2cap_bdaddr, NG_HCI_BDADDR_ANY, sizeof(bdaddr_t)) == 0) return (EDESTADDRREQ); if((sa->l2cap_bdaddr_type == BDADDR_BREDR)&& (sa->l2cap_psm == 0)) return EDESTADDRREQ; if(sa->l2cap_bdaddr_type != BDADDR_BREDR){ if(sa->l2cap_cid == NG_L2CAP_ATT_CID){ idtype = NG_L2CAP_L2CA_IDTYPE_ATT; }else if (sa->l2cap_cid == NG_L2CAP_SMP_CID){ idtype =NG_L2CAP_L2CA_IDTYPE_SMP; }else{ //if cid == 0 idtype = NG_L2CAP_L2CA_IDTYPE_LE; // Not supported yet return EINVAL; } } if (pcb->psm != 0 && pcb->psm != le16toh(sa->l2cap_psm)) return (EINVAL); /* * Routing. Socket should be bound to some source address. The source * address can be ANY. Destination address must be set and it must not * be ANY. If source address is ANY then find first rtentry that has * src != dst. */ mtx_lock(&ng_btsocket_l2cap_rt_mtx); mtx_lock(&ng_btsocket_l2cap_sockets_mtx); mtx_lock(&pcb->pcb_mtx); /* Send destination address and PSM */ bcopy(&sa->l2cap_bdaddr, &pcb->dst, sizeof(pcb->dst)); pcb->psm = le16toh(sa->l2cap_psm); pcb->dsttype = sa->l2cap_bdaddr_type; pcb->cid = 0; pcb->idtype = idtype; pcb->rt = NULL; have_src = bcmp(&pcb->src, NG_HCI_BDADDR_ANY, sizeof(pcb->src)); LIST_FOREACH(rt, &ng_btsocket_l2cap_rt, next) { if (rt->hook == NULL || NG_HOOK_NOT_VALID(rt->hook)) continue; /* Match src and dst */ if (have_src) { if (bcmp(&pcb->src, &rt->src, sizeof(rt->src)) == 0) break; } else { if (bcmp(&pcb->dst, &rt->src, sizeof(rt->src)) != 0) break; } } if (rt != NULL) { pcb->rt = rt; if (!have_src){ bcopy(&rt->src, &pcb->src, sizeof(pcb->src)); pcb->srctype = (sa->l2cap_bdaddr_type == BDADDR_BREDR)? BDADDR_BREDR : BDADDR_LE_PUBLIC; } } else error = EHOSTUNREACH; /* * Send L2CA_Connect request */ if (error == 0) { error = ng_btsocket_l2cap_send_l2ca_con_req(pcb); if (error == 0) { pcb->flags |= NG_BTSOCKET_L2CAP_CLIENT; pcb->state = NG_BTSOCKET_L2CAP_CONNECTING; soisconnecting(pcb->so); ng_btsocket_l2cap_timeout(pcb); } } mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); mtx_unlock(&ng_btsocket_l2cap_rt_mtx); return (error); } /* ng_btsocket_l2cap_connect */ /* * Process ioctl's calls on socket */ int ng_btsocket_l2cap_control(struct socket *so, u_long cmd, void *data, struct ifnet *ifp, struct thread *td) { return (EINVAL); } /* ng_btsocket_l2cap_control */ /* * Process getsockopt/setsockopt system calls */ int ng_btsocket_l2cap_ctloutput(struct socket *so, struct sockopt *sopt) { ng_btsocket_l2cap_pcb_p pcb = so2l2cap_pcb(so); int error = 0; ng_l2cap_cfg_opt_val_t v; if (pcb == NULL) return (EINVAL); if (ng_btsocket_l2cap_node == NULL) return (EINVAL); if (sopt->sopt_level != SOL_L2CAP) return (0); mtx_lock(&pcb->pcb_mtx); switch (sopt->sopt_dir) { case SOPT_GET: switch (sopt->sopt_name) { case SO_L2CAP_IMTU: /* get incoming MTU */ error = sooptcopyout(sopt, &pcb->imtu, sizeof(pcb->imtu)); break; case SO_L2CAP_OMTU: /* get outgoing (peer incoming) MTU */ error = sooptcopyout(sopt, &pcb->omtu, sizeof(pcb->omtu)); break; case SO_L2CAP_IFLOW: /* get incoming flow spec. */ error = sooptcopyout(sopt, &pcb->iflow, sizeof(pcb->iflow)); break; case SO_L2CAP_OFLOW: /* get outgoing flow spec. */ error = sooptcopyout(sopt, &pcb->oflow, sizeof(pcb->oflow)); break; case SO_L2CAP_FLUSH: /* get flush timeout */ error = sooptcopyout(sopt, &pcb->flush_timo, sizeof(pcb->flush_timo)); break; case SO_L2CAP_ENCRYPTED: /* get encrypt required */ error = sooptcopyout(sopt, &pcb->need_encrypt, sizeof(pcb->need_encrypt)); break; default: error = ENOPROTOOPT; break; } break; case SOPT_SET: /* * XXX * We do not allow to change these parameters while socket is * connected or we are in the process of creating a connection. * May be this should indicate re-configuration of the open * channel? */ if (pcb->state != NG_BTSOCKET_L2CAP_CLOSED) { error = EACCES; break; } switch (sopt->sopt_name) { case SO_L2CAP_IMTU: /* set incoming MTU */ error = sooptcopyin(sopt, &v, sizeof(v), sizeof(v.mtu)); if (error == 0) pcb->imtu = v.mtu; break; case SO_L2CAP_OFLOW: /* set outgoing flow spec. */ error = sooptcopyin(sopt, &v, sizeof(v),sizeof(v.flow)); if (error == 0) bcopy(&v.flow, &pcb->oflow, sizeof(pcb->oflow)); break; case SO_L2CAP_FLUSH: /* set flush timeout */ error = sooptcopyin(sopt, &v, sizeof(v), sizeof(v.flush_timo)); if (error == 0) pcb->flush_timo = v.flush_timo; break; case SO_L2CAP_ENCRYPTED: /*set connect encryption opt*/ if((pcb->state != NG_BTSOCKET_L2CAP_OPEN) && (pcb->state != NG_BTSOCKET_L2CAP_W4_ENC_CHANGE)){ error = sooptcopyin(sopt, &v, sizeof(v), sizeof(v.encryption)); if(error == 0) pcb->need_encrypt = (v.encryption)?1:0; }else{ error = EINVAL; } break; default: error = ENOPROTOOPT; break; } break; default: error = EINVAL; break; } mtx_unlock(&pcb->pcb_mtx); return (error); } /* ng_btsocket_l2cap_ctloutput */ /* * Detach and destroy socket */ void ng_btsocket_l2cap_detach(struct socket *so) { ng_btsocket_l2cap_pcb_p pcb = so2l2cap_pcb(so); KASSERT(pcb != NULL, ("ng_btsocket_l2cap_detach: pcb == NULL")); if (ng_btsocket_l2cap_node == NULL) return; mtx_lock(&ng_btsocket_l2cap_sockets_mtx); mtx_lock(&pcb->pcb_mtx); /* XXX what to do with pending request? */ if (pcb->flags & NG_BTSOCKET_L2CAP_TIMO) ng_btsocket_l2cap_untimeout(pcb); if (pcb->state != NG_BTSOCKET_L2CAP_CLOSED && pcb->state != NG_BTSOCKET_L2CAP_DISCONNECTING) /* Send disconnect request with "zero" token */ ng_btsocket_l2cap_send_l2ca_discon_req(0, pcb); pcb->state = NG_BTSOCKET_L2CAP_CLOSED; LIST_REMOVE(pcb, next); mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); mtx_destroy(&pcb->pcb_mtx); bzero(pcb, sizeof(*pcb)); free(pcb, M_NETGRAPH_BTSOCKET_L2CAP); soisdisconnected(so); so->so_pcb = NULL; } /* ng_btsocket_l2cap_detach */ /* * Disconnect socket */ int ng_btsocket_l2cap_disconnect(struct socket *so) { ng_btsocket_l2cap_pcb_p pcb = so2l2cap_pcb(so); int error = 0; if (pcb == NULL) return (EINVAL); if (ng_btsocket_l2cap_node == NULL) return (EINVAL); mtx_lock(&pcb->pcb_mtx); if (pcb->state == NG_BTSOCKET_L2CAP_DISCONNECTING) { mtx_unlock(&pcb->pcb_mtx); return (EINPROGRESS); } if (pcb->state != NG_BTSOCKET_L2CAP_CLOSED) { /* XXX FIXME what to do with pending request? */ if (pcb->flags & NG_BTSOCKET_L2CAP_TIMO) ng_btsocket_l2cap_untimeout(pcb); error = ng_btsocket_l2cap_send_l2ca_discon_req(pcb->token, pcb); if (error == 0) { pcb->state = NG_BTSOCKET_L2CAP_DISCONNECTING; soisdisconnecting(so); ng_btsocket_l2cap_timeout(pcb); } /* XXX FIXME what to do if error != 0 */ } mtx_unlock(&pcb->pcb_mtx); return (error); } /* ng_btsocket_l2cap_disconnect */ /* * Listen on socket */ int ng_btsocket_l2cap_listen(struct socket *so, int backlog, struct thread *td) { ng_btsocket_l2cap_pcb_p pcb = so2l2cap_pcb(so); int error; SOCK_LOCK(so); error = solisten_proto_check(so); if (error != 0) goto out; if (pcb == NULL) { solisten_proto_abort(so); error = EINVAL; goto out; } if (ng_btsocket_l2cap_node == NULL) { solisten_proto_abort(so); error = EINVAL; goto out; } if (pcb->psm == 0) { solisten_proto_abort(so); error = EADDRNOTAVAIL; goto out; } solisten_proto(so, backlog); out: SOCK_UNLOCK(so); return (error); } /* ng_btsocket_listen */ -/* - * Get peer address - */ - -int -ng_btsocket_l2cap_peeraddr(struct socket *so, struct sockaddr **nam) +static int +ng_btsocket_l2cap_peeraddr1(struct socket *so, struct sockaddr_l2cap *sa) { ng_btsocket_l2cap_pcb_p pcb = so2l2cap_pcb(so); - struct sockaddr_l2cap sa; if (pcb == NULL) return (EINVAL); if (ng_btsocket_l2cap_node == NULL) return (EINVAL); - bcopy(&pcb->dst, &sa.l2cap_bdaddr, sizeof(sa.l2cap_bdaddr)); - sa.l2cap_psm = htole16(pcb->psm); - sa.l2cap_len = sizeof(sa); - sa.l2cap_family = AF_BLUETOOTH; + *sa = (struct sockaddr_l2cap ){ + .l2cap_len = sizeof(struct sockaddr_l2cap), + .l2cap_family = AF_BLUETOOTH, + .l2cap_psm = htole16(pcb->psm), + }; + bcopy(&pcb->dst, &sa->l2cap_bdaddr, sizeof(sa->l2cap_bdaddr)); switch(pcb->idtype){ case NG_L2CAP_L2CA_IDTYPE_ATT: - sa.l2cap_cid = NG_L2CAP_ATT_CID; + sa->l2cap_cid = NG_L2CAP_ATT_CID; break; case NG_L2CAP_L2CA_IDTYPE_SMP: - sa.l2cap_cid = NG_L2CAP_SMP_CID; + sa->l2cap_cid = NG_L2CAP_SMP_CID; break; default: - sa.l2cap_cid = 0; + sa->l2cap_cid = 0; break; } - sa.l2cap_bdaddr_type = pcb->dsttype; + sa->l2cap_bdaddr_type = pcb->dsttype; + + return (0); +} + +/* + * Get peer address + */ +int +ng_btsocket_l2cap_peeraddr(struct socket *so, struct sockaddr **nam) +{ + struct sockaddr_l2cap sa; + int error; + + error = ng_btsocket_l2cap_peeraddr1(so, &sa); + if (error != 0) + return (error); *nam = sodupsockaddr((struct sockaddr *) &sa, M_NOWAIT); return ((*nam == NULL)? ENOMEM : 0); -} /* ng_btsocket_l2cap_peeraddr */ +} + +/* + * Accept connection on socket. Nothing to do here, socket must be connected + * and ready, so just return peer address and be done with it. + */ +int +ng_btsocket_l2cap_accept(struct socket *so, struct sockaddr *sa) +{ + if (ng_btsocket_l2cap_node == NULL) + return (EINVAL); + + return (ng_btsocket_l2cap_peeraddr1(so, (struct sockaddr_l2cap *)sa)); +} /* * Send data to socket */ int ng_btsocket_l2cap_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct thread *td) { ng_btsocket_l2cap_pcb_t *pcb = so2l2cap_pcb(so); int error = 0; if (ng_btsocket_l2cap_node == NULL) { error = ENETDOWN; goto drop; } /* Check socket and input */ if (pcb == NULL || m == NULL || control != NULL) { error = EINVAL; goto drop; } mtx_lock(&pcb->pcb_mtx); /* Make sure socket is connected */ if (pcb->state != NG_BTSOCKET_L2CAP_OPEN) { mtx_unlock(&pcb->pcb_mtx); error = ENOTCONN; goto drop; } /* Check route */ if (pcb->rt == NULL || pcb->rt->hook == NULL || NG_HOOK_NOT_VALID(pcb->rt->hook)) { mtx_unlock(&pcb->pcb_mtx); error = ENETDOWN; goto drop; } /* Check packet size against outgoing (peer's incoming) MTU) */ if (m->m_pkthdr.len > pcb->omtu) { NG_BTSOCKET_L2CAP_ERR( "%s: Packet too big, len=%d, omtu=%d\n", __func__, m->m_pkthdr.len, pcb->omtu); mtx_unlock(&pcb->pcb_mtx); error = EMSGSIZE; goto drop; } /* * First put packet on socket send queue. Then check if we have * pending timeout. If we do not have timeout then we must send * packet and schedule timeout. Otherwise do nothing and wait for * L2CA_WRITE_RSP. */ sbappendrecord(&pcb->so->so_snd, m); m = NULL; if (!(pcb->flags & NG_BTSOCKET_L2CAP_TIMO)) { error = ng_btsocket_l2cap_send2(pcb); if (error == 0) ng_btsocket_l2cap_timeout(pcb); else sbdroprecord(&pcb->so->so_snd); /* XXX */ } mtx_unlock(&pcb->pcb_mtx); drop: NG_FREE_M(m); /* checks for != NULL */ NG_FREE_M(control); return (error); } /* ng_btsocket_l2cap_send */ /* * Send first packet in the socket queue to the L2CAP layer */ static int ng_btsocket_l2cap_send2(ng_btsocket_l2cap_pcb_p pcb) { struct mbuf *m = NULL; ng_l2cap_l2ca_hdr_t *hdr = NULL; int error = 0; mtx_assert(&pcb->pcb_mtx, MA_OWNED); if (sbavail(&pcb->so->so_snd) == 0) return (EINVAL); /* XXX */ m = m_dup(pcb->so->so_snd.sb_mb, M_NOWAIT); if (m == NULL) return (ENOBUFS); /* Create L2CA packet header */ M_PREPEND(m, sizeof(*hdr), M_NOWAIT); if (m != NULL) if (m->m_len < sizeof(*hdr)) m = m_pullup(m, sizeof(*hdr)); if (m == NULL) { NG_BTSOCKET_L2CAP_ERR( "%s: Failed to create L2CA packet header\n", __func__); return (ENOBUFS); } hdr = mtod(m, ng_l2cap_l2ca_hdr_t *); hdr->token = pcb->token; hdr->length = m->m_pkthdr.len - sizeof(*hdr); hdr->lcid = pcb->cid; hdr->idtype = pcb->idtype; NG_BTSOCKET_L2CAP_INFO( "%s: Sending packet: len=%d, length=%d, lcid=%d, token=%d, state=%d\n", __func__, m->m_pkthdr.len, hdr->length, hdr->lcid, hdr->token, pcb->state); /* * If we got here than we have successfully creates new L2CAP * data packet and now we can send it to the L2CAP layer */ NG_SEND_DATA_ONLY(error, pcb->rt->hook, m); return (error); } /* ng_btsocket_l2cap_send2 */ /* * Get socket address */ int ng_btsocket_l2cap_sockaddr(struct socket *so, struct sockaddr **nam) { ng_btsocket_l2cap_pcb_p pcb = so2l2cap_pcb(so); struct sockaddr_l2cap sa; if (pcb == NULL) return (EINVAL); if (ng_btsocket_l2cap_node == NULL) return (EINVAL); bcopy(&pcb->src, &sa.l2cap_bdaddr, sizeof(sa.l2cap_bdaddr)); sa.l2cap_psm = htole16(pcb->psm); sa.l2cap_len = sizeof(sa); sa.l2cap_family = AF_BLUETOOTH; sa.l2cap_cid = 0; sa.l2cap_bdaddr_type = pcb->srctype; *nam = sodupsockaddr((struct sockaddr *) &sa, M_NOWAIT); return ((*nam == NULL)? ENOMEM : 0); } /* ng_btsocket_l2cap_sockaddr */ /***************************************************************************** ***************************************************************************** ** Misc. functions ***************************************************************************** *****************************************************************************/ /* * Look for the socket that listens on given PSM and bdaddr. Returns exact or * close match (if any). Caller must hold ng_btsocket_l2cap_sockets_mtx. */ static ng_btsocket_l2cap_pcb_p ng_btsocket_l2cap_pcb_by_addr(bdaddr_p bdaddr, int psm) { ng_btsocket_l2cap_pcb_p p = NULL, p1 = NULL; mtx_assert(&ng_btsocket_l2cap_sockets_mtx, MA_OWNED); LIST_FOREACH(p, &ng_btsocket_l2cap_sockets, next) { if (p->so == NULL || !SOLISTENING(p->so) || p->psm != psm) continue; if (bcmp(&p->src, bdaddr, sizeof(p->src)) == 0) break; if (bcmp(&p->src, NG_HCI_BDADDR_ANY, sizeof(p->src)) == 0) p1 = p; } return ((p != NULL)? p : p1); } /* ng_btsocket_l2cap_pcb_by_addr */ /* * Look for the socket that has given token. * Caller must hold ng_btsocket_l2cap_sockets_mtx. */ static ng_btsocket_l2cap_pcb_p ng_btsocket_l2cap_pcb_by_token(u_int32_t token) { ng_btsocket_l2cap_pcb_p p = NULL; if (token == 0) return (NULL); mtx_assert(&ng_btsocket_l2cap_sockets_mtx, MA_OWNED); LIST_FOREACH(p, &ng_btsocket_l2cap_sockets, next) if (p->token == token) break; return (p); } /* ng_btsocket_l2cap_pcb_by_token */ /* * Look for the socket that assigned to given source address and channel ID. * Caller must hold ng_btsocket_l2cap_sockets_mtx */ static ng_btsocket_l2cap_pcb_p ng_btsocket_l2cap_pcb_by_cid(bdaddr_p src, int cid, int idtype) { ng_btsocket_l2cap_pcb_p p = NULL; mtx_assert(&ng_btsocket_l2cap_sockets_mtx, MA_OWNED); LIST_FOREACH(p, &ng_btsocket_l2cap_sockets, next){ if (p->cid == cid && bcmp(src, &p->src, sizeof(p->src)) == 0&& p->idtype == idtype) break; } return (p); } /* ng_btsocket_l2cap_pcb_by_cid */ /* * Set timeout on socket */ static void ng_btsocket_l2cap_timeout(ng_btsocket_l2cap_pcb_p pcb) { mtx_assert(&pcb->pcb_mtx, MA_OWNED); if (!(pcb->flags & NG_BTSOCKET_L2CAP_TIMO)) { pcb->flags |= NG_BTSOCKET_L2CAP_TIMO; callout_reset(&pcb->timo, bluetooth_l2cap_ertx_timeout(), ng_btsocket_l2cap_process_timeout, pcb); } else KASSERT(0, ("%s: Duplicated socket timeout?!\n", __func__)); } /* ng_btsocket_l2cap_timeout */ /* * Unset timeout on socket */ static void ng_btsocket_l2cap_untimeout(ng_btsocket_l2cap_pcb_p pcb) { mtx_assert(&pcb->pcb_mtx, MA_OWNED); if (pcb->flags & NG_BTSOCKET_L2CAP_TIMO) { callout_stop(&pcb->timo); pcb->flags &= ~NG_BTSOCKET_L2CAP_TIMO; } else KASSERT(0, ("%s: No socket timeout?!\n", __func__)); } /* ng_btsocket_l2cap_untimeout */ /* * Process timeout on socket */ static void ng_btsocket_l2cap_process_timeout(void *xpcb) { ng_btsocket_l2cap_pcb_p pcb = (ng_btsocket_l2cap_pcb_p) xpcb; mtx_assert(&pcb->pcb_mtx, MA_OWNED); pcb->flags &= ~NG_BTSOCKET_L2CAP_TIMO; pcb->so->so_error = ETIMEDOUT; switch (pcb->state) { case NG_BTSOCKET_L2CAP_CONNECTING: case NG_BTSOCKET_L2CAP_CONFIGURING: case NG_BTSOCKET_L2CAP_W4_ENC_CHANGE: /* Send disconnect request with "zero" token */ if (pcb->cid != 0) ng_btsocket_l2cap_send_l2ca_discon_req(0, pcb); /* ... and close the socket */ pcb->state = NG_BTSOCKET_L2CAP_CLOSED; soisdisconnected(pcb->so); break; case NG_BTSOCKET_L2CAP_OPEN: /* Send timeout - drop packet and wakeup sender */ sbdroprecord(&pcb->so->so_snd); sowwakeup(pcb->so); break; case NG_BTSOCKET_L2CAP_DISCONNECTING: /* Disconnect timeout - disconnect the socket anyway */ pcb->state = NG_BTSOCKET_L2CAP_CLOSED; soisdisconnected(pcb->so); break; default: NG_BTSOCKET_L2CAP_ERR( "%s: Invalid socket state=%d\n", __func__, pcb->state); break; } } /* ng_btsocket_l2cap_process_timeout */ /* * Translate HCI/L2CAP error code into "errno" code * XXX Note: Some L2CAP and HCI error codes have the same value, but * different meaning */ static int ng_btsocket_l2cap_result2errno(int result) { switch (result) { case 0x00: /* No error */ return (0); case 0x01: /* Unknown HCI command */ return (ENODEV); case 0x02: /* No connection */ return (ENOTCONN); case 0x03: /* Hardware failure */ return (EIO); case 0x04: /* Page timeout */ return (EHOSTDOWN); case 0x05: /* Authentication failure */ case 0x06: /* Key missing */ case 0x18: /* Pairing not allowed */ case 0x21: /* Role change not allowed */ case 0x24: /* LMP PSU not allowed */ case 0x25: /* Encryption mode not acceptable */ case 0x26: /* Unit key used */ return (EACCES); case 0x07: /* Memory full */ return (ENOMEM); case 0x08: /* Connection timeout */ case 0x10: /* Host timeout */ case 0x22: /* LMP response timeout */ case 0xee: /* HCI timeout */ case 0xeeee: /* L2CAP timeout */ return (ETIMEDOUT); case 0x09: /* Max number of connections */ case 0x0a: /* Max number of SCO connections to a unit */ return (EMLINK); case 0x0b: /* ACL connection already exists */ return (EEXIST); case 0x0c: /* Command disallowed */ return (EBUSY); case 0x0d: /* Host rejected due to limited resources */ case 0x0e: /* Host rejected due to securiity reasons */ case 0x0f: /* Host rejected due to remote unit is a personal unit */ case 0x1b: /* SCO offset rejected */ case 0x1c: /* SCO interval rejected */ case 0x1d: /* SCO air mode rejected */ return (ECONNREFUSED); case 0x11: /* Unsupported feature or parameter value */ case 0x19: /* Unknown LMP PDU */ case 0x1a: /* Unsupported remote feature */ case 0x20: /* Unsupported LMP parameter value */ case 0x27: /* QoS is not supported */ case 0x29: /* Paring with unit key not supported */ return (EOPNOTSUPP); case 0x12: /* Invalid HCI command parameter */ case 0x1e: /* Invalid LMP parameters */ return (EINVAL); case 0x13: /* Other end terminated connection: User ended connection */ case 0x14: /* Other end terminated connection: Low resources */ case 0x15: /* Other end terminated connection: About to power off */ return (ECONNRESET); case 0x16: /* Connection terminated by local host */ return (ECONNABORTED); #if 0 /* XXX not yet */ case 0x17: /* Repeated attempts */ case 0x1f: /* Unspecified error */ case 0x23: /* LMP error transaction collision */ case 0x28: /* Instant passed */ #endif } return (ENOSYS); } /* ng_btsocket_l2cap_result2errno */ diff --git a/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c b/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c index 00225f8240e9..af542f3c258b 100644 --- a/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c +++ b/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c @@ -1,3557 +1,3569 @@ /* * ng_btsocket_rfcomm.c */ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2001-2003 Maksim Yevmenkin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $Id: ng_btsocket_rfcomm.c,v 1.28 2003/09/14 23:29:06 max Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* MALLOC define */ #ifdef NG_SEPARATE_MALLOC static MALLOC_DEFINE(M_NETGRAPH_BTSOCKET_RFCOMM, "netgraph_btsocks_rfcomm", "Netgraph Bluetooth RFCOMM sockets"); #else #define M_NETGRAPH_BTSOCKET_RFCOMM M_NETGRAPH #endif /* NG_SEPARATE_MALLOC */ /* Debug */ #define NG_BTSOCKET_RFCOMM_INFO \ if (ng_btsocket_rfcomm_debug_level >= NG_BTSOCKET_INFO_LEVEL && \ ppsratecheck(&ng_btsocket_rfcomm_lasttime, &ng_btsocket_rfcomm_curpps, 1)) \ printf #define NG_BTSOCKET_RFCOMM_WARN \ if (ng_btsocket_rfcomm_debug_level >= NG_BTSOCKET_WARN_LEVEL && \ ppsratecheck(&ng_btsocket_rfcomm_lasttime, &ng_btsocket_rfcomm_curpps, 1)) \ printf #define NG_BTSOCKET_RFCOMM_ERR \ if (ng_btsocket_rfcomm_debug_level >= NG_BTSOCKET_ERR_LEVEL && \ ppsratecheck(&ng_btsocket_rfcomm_lasttime, &ng_btsocket_rfcomm_curpps, 1)) \ printf #define NG_BTSOCKET_RFCOMM_ALERT \ if (ng_btsocket_rfcomm_debug_level >= NG_BTSOCKET_ALERT_LEVEL && \ ppsratecheck(&ng_btsocket_rfcomm_lasttime, &ng_btsocket_rfcomm_curpps, 1)) \ printf #define ALOT 0x7fff /* Local prototypes */ static int ng_btsocket_rfcomm_upcall (struct socket *so, void *arg, int waitflag); static void ng_btsocket_rfcomm_sessions_task (void *ctx, int pending); static void ng_btsocket_rfcomm_session_task (ng_btsocket_rfcomm_session_p s); #define ng_btsocket_rfcomm_task_wakeup() \ taskqueue_enqueue(taskqueue_swi_giant, &ng_btsocket_rfcomm_task) static ng_btsocket_rfcomm_pcb_p ng_btsocket_rfcomm_connect_ind (ng_btsocket_rfcomm_session_p s, int channel); static void ng_btsocket_rfcomm_connect_cfm (ng_btsocket_rfcomm_session_p s); static int ng_btsocket_rfcomm_session_create (ng_btsocket_rfcomm_session_p *sp, struct socket *l2so, bdaddr_p src, bdaddr_p dst, struct thread *td); static int ng_btsocket_rfcomm_session_accept (ng_btsocket_rfcomm_session_p s0); static int ng_btsocket_rfcomm_session_connect (ng_btsocket_rfcomm_session_p s); static int ng_btsocket_rfcomm_session_receive (ng_btsocket_rfcomm_session_p s); static int ng_btsocket_rfcomm_session_send (ng_btsocket_rfcomm_session_p s); static void ng_btsocket_rfcomm_session_clean (ng_btsocket_rfcomm_session_p s); static void ng_btsocket_rfcomm_session_process_pcb (ng_btsocket_rfcomm_session_p s); static ng_btsocket_rfcomm_session_p ng_btsocket_rfcomm_session_by_addr (bdaddr_p src, bdaddr_p dst); static int ng_btsocket_rfcomm_receive_frame (ng_btsocket_rfcomm_session_p s, struct mbuf *m0); static int ng_btsocket_rfcomm_receive_sabm (ng_btsocket_rfcomm_session_p s, int dlci); static int ng_btsocket_rfcomm_receive_disc (ng_btsocket_rfcomm_session_p s, int dlci); static int ng_btsocket_rfcomm_receive_ua (ng_btsocket_rfcomm_session_p s, int dlci); static int ng_btsocket_rfcomm_receive_dm (ng_btsocket_rfcomm_session_p s, int dlci); static int ng_btsocket_rfcomm_receive_uih (ng_btsocket_rfcomm_session_p s, int dlci, int pf, struct mbuf *m0); static int ng_btsocket_rfcomm_receive_mcc (ng_btsocket_rfcomm_session_p s, struct mbuf *m0); static int ng_btsocket_rfcomm_receive_test (ng_btsocket_rfcomm_session_p s, struct mbuf *m0); static int ng_btsocket_rfcomm_receive_fc (ng_btsocket_rfcomm_session_p s, struct mbuf *m0); static int ng_btsocket_rfcomm_receive_msc (ng_btsocket_rfcomm_session_p s, struct mbuf *m0); static int ng_btsocket_rfcomm_receive_rpn (ng_btsocket_rfcomm_session_p s, struct mbuf *m0); static int ng_btsocket_rfcomm_receive_rls (ng_btsocket_rfcomm_session_p s, struct mbuf *m0); static int ng_btsocket_rfcomm_receive_pn (ng_btsocket_rfcomm_session_p s, struct mbuf *m0); static void ng_btsocket_rfcomm_set_pn (ng_btsocket_rfcomm_pcb_p pcb, u_int8_t cr, u_int8_t flow_control, u_int8_t credits, u_int16_t mtu); static int ng_btsocket_rfcomm_send_command (ng_btsocket_rfcomm_session_p s, u_int8_t type, u_int8_t dlci); static int ng_btsocket_rfcomm_send_uih (ng_btsocket_rfcomm_session_p s, u_int8_t address, u_int8_t pf, u_int8_t credits, struct mbuf *data); static int ng_btsocket_rfcomm_send_msc (ng_btsocket_rfcomm_pcb_p pcb); static int ng_btsocket_rfcomm_send_pn (ng_btsocket_rfcomm_pcb_p pcb); static int ng_btsocket_rfcomm_send_credits (ng_btsocket_rfcomm_pcb_p pcb); static int ng_btsocket_rfcomm_pcb_send (ng_btsocket_rfcomm_pcb_p pcb, int limit); static void ng_btsocket_rfcomm_pcb_kill (ng_btsocket_rfcomm_pcb_p pcb, int error); static ng_btsocket_rfcomm_pcb_p ng_btsocket_rfcomm_pcb_by_dlci (ng_btsocket_rfcomm_session_p s, int dlci); static ng_btsocket_rfcomm_pcb_p ng_btsocket_rfcomm_pcb_listener (bdaddr_p src, int channel); static void ng_btsocket_rfcomm_timeout (ng_btsocket_rfcomm_pcb_p pcb); static void ng_btsocket_rfcomm_untimeout (ng_btsocket_rfcomm_pcb_p pcb); static void ng_btsocket_rfcomm_process_timeout (void *xpcb); static struct mbuf * ng_btsocket_rfcomm_prepare_packet (struct sockbuf *sb, int length); /* Globals */ extern int ifqmaxlen; static u_int32_t ng_btsocket_rfcomm_debug_level; static u_int32_t ng_btsocket_rfcomm_timo; struct task ng_btsocket_rfcomm_task; static LIST_HEAD(, ng_btsocket_rfcomm_session) ng_btsocket_rfcomm_sessions; static struct mtx ng_btsocket_rfcomm_sessions_mtx; static LIST_HEAD(, ng_btsocket_rfcomm_pcb) ng_btsocket_rfcomm_sockets; static struct mtx ng_btsocket_rfcomm_sockets_mtx; static struct timeval ng_btsocket_rfcomm_lasttime; static int ng_btsocket_rfcomm_curpps; /* Sysctl tree */ SYSCTL_DECL(_net_bluetooth_rfcomm_sockets); static SYSCTL_NODE(_net_bluetooth_rfcomm_sockets, OID_AUTO, stream, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "Bluetooth STREAM RFCOMM sockets family"); SYSCTL_UINT(_net_bluetooth_rfcomm_sockets_stream, OID_AUTO, debug_level, CTLFLAG_RW, &ng_btsocket_rfcomm_debug_level, NG_BTSOCKET_INFO_LEVEL, "Bluetooth STREAM RFCOMM sockets debug level"); SYSCTL_UINT(_net_bluetooth_rfcomm_sockets_stream, OID_AUTO, timeout, CTLFLAG_RW, &ng_btsocket_rfcomm_timo, 60, "Bluetooth STREAM RFCOMM sockets timeout"); /***************************************************************************** ***************************************************************************** ** RFCOMM CRC ***************************************************************************** *****************************************************************************/ static u_int8_t ng_btsocket_rfcomm_crc_table[256] = { 0x00, 0x91, 0xe3, 0x72, 0x07, 0x96, 0xe4, 0x75, 0x0e, 0x9f, 0xed, 0x7c, 0x09, 0x98, 0xea, 0x7b, 0x1c, 0x8d, 0xff, 0x6e, 0x1b, 0x8a, 0xf8, 0x69, 0x12, 0x83, 0xf1, 0x60, 0x15, 0x84, 0xf6, 0x67, 0x38, 0xa9, 0xdb, 0x4a, 0x3f, 0xae, 0xdc, 0x4d, 0x36, 0xa7, 0xd5, 0x44, 0x31, 0xa0, 0xd2, 0x43, 0x24, 0xb5, 0xc7, 0x56, 0x23, 0xb2, 0xc0, 0x51, 0x2a, 0xbb, 0xc9, 0x58, 0x2d, 0xbc, 0xce, 0x5f, 0x70, 0xe1, 0x93, 0x02, 0x77, 0xe6, 0x94, 0x05, 0x7e, 0xef, 0x9d, 0x0c, 0x79, 0xe8, 0x9a, 0x0b, 0x6c, 0xfd, 0x8f, 0x1e, 0x6b, 0xfa, 0x88, 0x19, 0x62, 0xf3, 0x81, 0x10, 0x65, 0xf4, 0x86, 0x17, 0x48, 0xd9, 0xab, 0x3a, 0x4f, 0xde, 0xac, 0x3d, 0x46, 0xd7, 0xa5, 0x34, 0x41, 0xd0, 0xa2, 0x33, 0x54, 0xc5, 0xb7, 0x26, 0x53, 0xc2, 0xb0, 0x21, 0x5a, 0xcb, 0xb9, 0x28, 0x5d, 0xcc, 0xbe, 0x2f, 0xe0, 0x71, 0x03, 0x92, 0xe7, 0x76, 0x04, 0x95, 0xee, 0x7f, 0x0d, 0x9c, 0xe9, 0x78, 0x0a, 0x9b, 0xfc, 0x6d, 0x1f, 0x8e, 0xfb, 0x6a, 0x18, 0x89, 0xf2, 0x63, 0x11, 0x80, 0xf5, 0x64, 0x16, 0x87, 0xd8, 0x49, 0x3b, 0xaa, 0xdf, 0x4e, 0x3c, 0xad, 0xd6, 0x47, 0x35, 0xa4, 0xd1, 0x40, 0x32, 0xa3, 0xc4, 0x55, 0x27, 0xb6, 0xc3, 0x52, 0x20, 0xb1, 0xca, 0x5b, 0x29, 0xb8, 0xcd, 0x5c, 0x2e, 0xbf, 0x90, 0x01, 0x73, 0xe2, 0x97, 0x06, 0x74, 0xe5, 0x9e, 0x0f, 0x7d, 0xec, 0x99, 0x08, 0x7a, 0xeb, 0x8c, 0x1d, 0x6f, 0xfe, 0x8b, 0x1a, 0x68, 0xf9, 0x82, 0x13, 0x61, 0xf0, 0x85, 0x14, 0x66, 0xf7, 0xa8, 0x39, 0x4b, 0xda, 0xaf, 0x3e, 0x4c, 0xdd, 0xa6, 0x37, 0x45, 0xd4, 0xa1, 0x30, 0x42, 0xd3, 0xb4, 0x25, 0x57, 0xc6, 0xb3, 0x22, 0x50, 0xc1, 0xba, 0x2b, 0x59, 0xc8, 0xbd, 0x2c, 0x5e, 0xcf }; /* CRC */ static u_int8_t ng_btsocket_rfcomm_crc(u_int8_t *data, int length) { u_int8_t crc = 0xff; while (length --) crc = ng_btsocket_rfcomm_crc_table[crc ^ *data++]; return (crc); } /* ng_btsocket_rfcomm_crc */ /* FCS on 2 bytes */ static u_int8_t ng_btsocket_rfcomm_fcs2(u_int8_t *data) { return (0xff - ng_btsocket_rfcomm_crc(data, 2)); } /* ng_btsocket_rfcomm_fcs2 */ /* FCS on 3 bytes */ static u_int8_t ng_btsocket_rfcomm_fcs3(u_int8_t *data) { return (0xff - ng_btsocket_rfcomm_crc(data, 3)); } /* ng_btsocket_rfcomm_fcs3 */ /* * Check FCS * * From Bluetooth spec * * "... In 07.10, the frame check sequence (FCS) is calculated on different * sets of fields for different frame types. These are the fields that the * FCS are calculated on: * * For SABM, DISC, UA, DM frames: on Address, Control and length field. * For UIH frames: on Address and Control field. * * (This is stated here for clarification, and to set the standard for RFCOMM; * the fields included in FCS calculation have actually changed in version * 7.0.0 of TS 07.10, but RFCOMM will not change the FCS calculation scheme * from the one above.) ..." */ static int ng_btsocket_rfcomm_check_fcs(u_int8_t *data, int type, u_int8_t fcs) { if (type != RFCOMM_FRAME_UIH) return (ng_btsocket_rfcomm_fcs3(data) != fcs); return (ng_btsocket_rfcomm_fcs2(data) != fcs); } /* ng_btsocket_rfcomm_check_fcs */ /***************************************************************************** ***************************************************************************** ** Socket interface ***************************************************************************** *****************************************************************************/ /* * Initialize everything */ static void ng_btsocket_rfcomm_init(void *arg __unused) { ng_btsocket_rfcomm_debug_level = NG_BTSOCKET_WARN_LEVEL; ng_btsocket_rfcomm_timo = 60; /* RFCOMM task */ TASK_INIT(&ng_btsocket_rfcomm_task, 0, ng_btsocket_rfcomm_sessions_task, NULL); /* RFCOMM sessions list */ LIST_INIT(&ng_btsocket_rfcomm_sessions); mtx_init(&ng_btsocket_rfcomm_sessions_mtx, "btsocks_rfcomm_sessions_mtx", NULL, MTX_DEF); /* RFCOMM sockets list */ LIST_INIT(&ng_btsocket_rfcomm_sockets); mtx_init(&ng_btsocket_rfcomm_sockets_mtx, "btsocks_rfcomm_sockets_mtx", NULL, MTX_DEF); } /* ng_btsocket_rfcomm_init */ SYSINIT(ng_btsocket_rfcomm_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ng_btsocket_rfcomm_init, NULL); /* * Abort connection on socket */ void ng_btsocket_rfcomm_abort(struct socket *so) { so->so_error = ECONNABORTED; (void)ng_btsocket_rfcomm_disconnect(so); } /* ng_btsocket_rfcomm_abort */ void ng_btsocket_rfcomm_close(struct socket *so) { (void)ng_btsocket_rfcomm_disconnect(so); } /* ng_btsocket_rfcomm_close */ -/* - * Accept connection on socket. Nothing to do here, socket must be connected - * and ready, so just return peer address and be done with it. - */ - -int -ng_btsocket_rfcomm_accept(struct socket *so, struct sockaddr **nam) -{ - return (ng_btsocket_rfcomm_peeraddr(so, nam)); -} /* ng_btsocket_rfcomm_accept */ - /* * Create and attach new socket */ int ng_btsocket_rfcomm_attach(struct socket *so, int proto, struct thread *td) { ng_btsocket_rfcomm_pcb_p pcb = so2rfcomm_pcb(so); int error; /* Check socket and protocol */ if (so->so_type != SOCK_STREAM) return (ESOCKTNOSUPPORT); #if 0 /* XXX sonewconn() calls "pru_attach" with proto == 0 */ if (proto != 0) if (proto != BLUETOOTH_PROTO_RFCOMM) return (EPROTONOSUPPORT); #endif /* XXX */ if (pcb != NULL) return (EISCONN); /* Reserve send and receive space if it is not reserved yet */ if ((so->so_snd.sb_hiwat == 0) || (so->so_rcv.sb_hiwat == 0)) { error = soreserve(so, NG_BTSOCKET_RFCOMM_SENDSPACE, NG_BTSOCKET_RFCOMM_RECVSPACE); if (error != 0) return (error); } /* Allocate the PCB */ pcb = malloc(sizeof(*pcb), M_NETGRAPH_BTSOCKET_RFCOMM, M_NOWAIT | M_ZERO); if (pcb == NULL) return (ENOMEM); /* Link the PCB and the socket */ so->so_pcb = (caddr_t) pcb; pcb->so = so; /* Initialize PCB */ pcb->state = NG_BTSOCKET_RFCOMM_DLC_CLOSED; pcb->flags = NG_BTSOCKET_RFCOMM_DLC_CFC; pcb->lmodem = pcb->rmodem = (RFCOMM_MODEM_RTC | RFCOMM_MODEM_RTR | RFCOMM_MODEM_DV); pcb->mtu = RFCOMM_DEFAULT_MTU; pcb->tx_cred = 0; pcb->rx_cred = RFCOMM_DEFAULT_CREDITS; mtx_init(&pcb->pcb_mtx, "btsocks_rfcomm_pcb_mtx", NULL, MTX_DEF); callout_init_mtx(&pcb->timo, &pcb->pcb_mtx, 0); /* Add the PCB to the list */ mtx_lock(&ng_btsocket_rfcomm_sockets_mtx); LIST_INSERT_HEAD(&ng_btsocket_rfcomm_sockets, pcb, next); mtx_unlock(&ng_btsocket_rfcomm_sockets_mtx); return (0); } /* ng_btsocket_rfcomm_attach */ /* * Bind socket */ int ng_btsocket_rfcomm_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { ng_btsocket_rfcomm_pcb_t *pcb = so2rfcomm_pcb(so), *pcb1; struct sockaddr_rfcomm *sa = (struct sockaddr_rfcomm *) nam; if (pcb == NULL) return (EINVAL); /* Verify address */ if (sa == NULL) return (EINVAL); if (sa->rfcomm_family != AF_BLUETOOTH) return (EAFNOSUPPORT); if (sa->rfcomm_len != sizeof(*sa)) return (EINVAL); if (sa->rfcomm_channel > 30) return (EINVAL); mtx_lock(&pcb->pcb_mtx); if (sa->rfcomm_channel != 0) { mtx_lock(&ng_btsocket_rfcomm_sockets_mtx); LIST_FOREACH(pcb1, &ng_btsocket_rfcomm_sockets, next) { if (pcb1->channel == sa->rfcomm_channel && bcmp(&pcb1->src, &sa->rfcomm_bdaddr, sizeof(pcb1->src)) == 0) { mtx_unlock(&ng_btsocket_rfcomm_sockets_mtx); mtx_unlock(&pcb->pcb_mtx); return (EADDRINUSE); } } mtx_unlock(&ng_btsocket_rfcomm_sockets_mtx); } bcopy(&sa->rfcomm_bdaddr, &pcb->src, sizeof(pcb->src)); pcb->channel = sa->rfcomm_channel; mtx_unlock(&pcb->pcb_mtx); return (0); } /* ng_btsocket_rfcomm_bind */ /* * Connect socket */ int ng_btsocket_rfcomm_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { ng_btsocket_rfcomm_pcb_t *pcb = so2rfcomm_pcb(so); struct sockaddr_rfcomm *sa = (struct sockaddr_rfcomm *) nam; ng_btsocket_rfcomm_session_t *s = NULL; struct socket *l2so = NULL; int dlci, error = 0; if (pcb == NULL) return (EINVAL); /* Verify address */ if (sa == NULL) return (EINVAL); if (sa->rfcomm_family != AF_BLUETOOTH) return (EAFNOSUPPORT); if (sa->rfcomm_len != sizeof(*sa)) return (EINVAL); if (sa->rfcomm_channel > 30) return (EINVAL); if (sa->rfcomm_channel == 0 || bcmp(&sa->rfcomm_bdaddr, NG_HCI_BDADDR_ANY, sizeof(bdaddr_t)) == 0) return (EDESTADDRREQ); /* * Note that we will not check for errors in socreate() because * if we failed to create L2CAP socket at this point we still * might have already open session. */ error = socreate(PF_BLUETOOTH, &l2so, SOCK_SEQPACKET, BLUETOOTH_PROTO_L2CAP, td->td_ucred, td); /* * Look for session between "pcb->src" and "sa->rfcomm_bdaddr" (dst) */ mtx_lock(&ng_btsocket_rfcomm_sessions_mtx); s = ng_btsocket_rfcomm_session_by_addr(&pcb->src, &sa->rfcomm_bdaddr); if (s == NULL) { /* * We need to create new RFCOMM session. Check if we have L2CAP * socket. If l2so == NULL then error has the error code from * socreate() */ if (l2so == NULL) { mtx_unlock(&ng_btsocket_rfcomm_sessions_mtx); return (error); } error = ng_btsocket_rfcomm_session_create(&s, l2so, &pcb->src, &sa->rfcomm_bdaddr, td); if (error != 0) { mtx_unlock(&ng_btsocket_rfcomm_sessions_mtx); soclose(l2so); return (error); } } else if (l2so != NULL) soclose(l2so); /* we don't need new L2CAP socket */ /* * Check if we already have the same DLCI the same session */ mtx_lock(&s->session_mtx); mtx_lock(&pcb->pcb_mtx); dlci = RFCOMM_MKDLCI(!INITIATOR(s), sa->rfcomm_channel); if (ng_btsocket_rfcomm_pcb_by_dlci(s, dlci) != NULL) { mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&s->session_mtx); mtx_unlock(&ng_btsocket_rfcomm_sessions_mtx); return (EBUSY); } /* * Check session state and if its not acceptable then refuse connection */ switch (s->state) { case NG_BTSOCKET_RFCOMM_SESSION_CONNECTING: case NG_BTSOCKET_RFCOMM_SESSION_CONNECTED: case NG_BTSOCKET_RFCOMM_SESSION_OPEN: /* * Update destination address and channel and attach * DLC to the session */ bcopy(&sa->rfcomm_bdaddr, &pcb->dst, sizeof(pcb->dst)); pcb->channel = sa->rfcomm_channel; pcb->dlci = dlci; LIST_INSERT_HEAD(&s->dlcs, pcb, session_next); pcb->session = s; ng_btsocket_rfcomm_timeout(pcb); soisconnecting(pcb->so); if (s->state == NG_BTSOCKET_RFCOMM_SESSION_OPEN) { pcb->mtu = s->mtu; bcopy(&so2l2cap_pcb(s->l2so)->src, &pcb->src, sizeof(pcb->src)); pcb->state = NG_BTSOCKET_RFCOMM_DLC_CONFIGURING; error = ng_btsocket_rfcomm_send_pn(pcb); if (error == 0) error = ng_btsocket_rfcomm_task_wakeup(); } else pcb->state = NG_BTSOCKET_RFCOMM_DLC_W4_CONNECT; break; default: error = ECONNRESET; break; } mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&s->session_mtx); mtx_unlock(&ng_btsocket_rfcomm_sessions_mtx); return (error); } /* ng_btsocket_rfcomm_connect */ /* * Process ioctl's calls on socket. * XXX FIXME this should provide interface to the RFCOMM multiplexor channel */ int ng_btsocket_rfcomm_control(struct socket *so, u_long cmd, void *data, struct ifnet *ifp, struct thread *td) { return (EINVAL); } /* ng_btsocket_rfcomm_control */ /* * Process getsockopt/setsockopt system calls */ int ng_btsocket_rfcomm_ctloutput(struct socket *so, struct sockopt *sopt) { ng_btsocket_rfcomm_pcb_p pcb = so2rfcomm_pcb(so); struct ng_btsocket_rfcomm_fc_info fcinfo; int error = 0; if (pcb == NULL) return (EINVAL); if (sopt->sopt_level != SOL_RFCOMM) return (0); mtx_lock(&pcb->pcb_mtx); switch (sopt->sopt_dir) { case SOPT_GET: switch (sopt->sopt_name) { case SO_RFCOMM_MTU: error = sooptcopyout(sopt, &pcb->mtu, sizeof(pcb->mtu)); break; case SO_RFCOMM_FC_INFO: fcinfo.lmodem = pcb->lmodem; fcinfo.rmodem = pcb->rmodem; fcinfo.tx_cred = pcb->tx_cred; fcinfo.rx_cred = pcb->rx_cred; fcinfo.cfc = (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC)? 1 : 0; fcinfo.reserved = 0; error = sooptcopyout(sopt, &fcinfo, sizeof(fcinfo)); break; default: error = ENOPROTOOPT; break; } break; case SOPT_SET: switch (sopt->sopt_name) { default: error = ENOPROTOOPT; break; } break; default: error = EINVAL; break; } mtx_unlock(&pcb->pcb_mtx); return (error); } /* ng_btsocket_rfcomm_ctloutput */ /* * Detach and destroy socket */ void ng_btsocket_rfcomm_detach(struct socket *so) { ng_btsocket_rfcomm_pcb_p pcb = so2rfcomm_pcb(so); KASSERT(pcb != NULL, ("ng_btsocket_rfcomm_detach: pcb == NULL")); mtx_lock(&pcb->pcb_mtx); switch (pcb->state) { case NG_BTSOCKET_RFCOMM_DLC_W4_CONNECT: case NG_BTSOCKET_RFCOMM_DLC_CONFIGURING: case NG_BTSOCKET_RFCOMM_DLC_CONNECTING: case NG_BTSOCKET_RFCOMM_DLC_CONNECTED: /* XXX What to do with pending request? */ if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMO) ng_btsocket_rfcomm_untimeout(pcb); if (pcb->state == NG_BTSOCKET_RFCOMM_DLC_W4_CONNECT) pcb->flags |= NG_BTSOCKET_RFCOMM_DLC_DETACHED; else pcb->state = NG_BTSOCKET_RFCOMM_DLC_DISCONNECTING; ng_btsocket_rfcomm_task_wakeup(); break; case NG_BTSOCKET_RFCOMM_DLC_DISCONNECTING: ng_btsocket_rfcomm_task_wakeup(); break; } while (pcb->state != NG_BTSOCKET_RFCOMM_DLC_CLOSED) msleep(&pcb->state, &pcb->pcb_mtx, PZERO, "rf_det", 0); if (pcb->session != NULL) panic("%s: pcb->session != NULL\n", __func__); if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMO) panic("%s: timeout on closed DLC, flags=%#x\n", __func__, pcb->flags); mtx_lock(&ng_btsocket_rfcomm_sockets_mtx); LIST_REMOVE(pcb, next); mtx_unlock(&ng_btsocket_rfcomm_sockets_mtx); mtx_unlock(&pcb->pcb_mtx); mtx_destroy(&pcb->pcb_mtx); bzero(pcb, sizeof(*pcb)); free(pcb, M_NETGRAPH_BTSOCKET_RFCOMM); soisdisconnected(so); so->so_pcb = NULL; } /* ng_btsocket_rfcomm_detach */ /* * Disconnect socket */ int ng_btsocket_rfcomm_disconnect(struct socket *so) { ng_btsocket_rfcomm_pcb_p pcb = so2rfcomm_pcb(so); if (pcb == NULL) return (EINVAL); mtx_lock(&pcb->pcb_mtx); if (pcb->state == NG_BTSOCKET_RFCOMM_DLC_DISCONNECTING) { mtx_unlock(&pcb->pcb_mtx); return (EINPROGRESS); } /* XXX What to do with pending request? */ if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMO) ng_btsocket_rfcomm_untimeout(pcb); switch (pcb->state) { case NG_BTSOCKET_RFCOMM_DLC_CONFIGURING: /* XXX can we get here? */ case NG_BTSOCKET_RFCOMM_DLC_CONNECTING: /* XXX can we get here? */ case NG_BTSOCKET_RFCOMM_DLC_CONNECTED: /* * Just change DLC state and enqueue RFCOMM task. It will * queue and send DISC on the DLC. */ pcb->state = NG_BTSOCKET_RFCOMM_DLC_DISCONNECTING; soisdisconnecting(so); ng_btsocket_rfcomm_task_wakeup(); break; case NG_BTSOCKET_RFCOMM_DLC_CLOSED: case NG_BTSOCKET_RFCOMM_DLC_W4_CONNECT: break; default: panic("%s: Invalid DLC state=%d, flags=%#x\n", __func__, pcb->state, pcb->flags); break; } mtx_unlock(&pcb->pcb_mtx); return (0); } /* ng_btsocket_rfcomm_disconnect */ /* * Listen on socket. First call to listen() will create listening RFCOMM session */ int ng_btsocket_rfcomm_listen(struct socket *so, int backlog, struct thread *td) { ng_btsocket_rfcomm_pcb_p pcb = so2rfcomm_pcb(so), pcb1; ng_btsocket_rfcomm_session_p s = NULL; struct socket *l2so = NULL; int error, socreate_error, usedchannels; if (pcb == NULL) return (EINVAL); if (pcb->channel > 30) return (EADDRNOTAVAIL); usedchannels = 0; mtx_lock(&pcb->pcb_mtx); if (pcb->channel == 0) { mtx_lock(&ng_btsocket_rfcomm_sockets_mtx); LIST_FOREACH(pcb1, &ng_btsocket_rfcomm_sockets, next) if (pcb1->channel != 0 && bcmp(&pcb1->src, &pcb->src, sizeof(pcb->src)) == 0) usedchannels |= (1 << (pcb1->channel - 1)); for (pcb->channel = 30; pcb->channel > 0; pcb->channel --) if (!(usedchannels & (1 << (pcb->channel - 1)))) break; if (pcb->channel == 0) { mtx_unlock(&ng_btsocket_rfcomm_sockets_mtx); mtx_unlock(&pcb->pcb_mtx); return (EADDRNOTAVAIL); } mtx_unlock(&ng_btsocket_rfcomm_sockets_mtx); } mtx_unlock(&pcb->pcb_mtx); /* * Note that we will not check for errors in socreate() because * if we failed to create L2CAP socket at this point we still * might have already open session. */ socreate_error = socreate(PF_BLUETOOTH, &l2so, SOCK_SEQPACKET, BLUETOOTH_PROTO_L2CAP, td->td_ucred, td); /* * Transition the socket and session into the LISTENING state. Check * for collisions first, as there can only be one. */ mtx_lock(&ng_btsocket_rfcomm_sessions_mtx); SOCK_LOCK(so); error = solisten_proto_check(so); SOCK_UNLOCK(so); if (error != 0) goto out; LIST_FOREACH(s, &ng_btsocket_rfcomm_sessions, next) if (s->state == NG_BTSOCKET_RFCOMM_SESSION_LISTENING) break; if (s == NULL) { /* * We need to create default RFCOMM session. Check if we have * L2CAP socket. If l2so == NULL then error has the error code * from socreate() */ if (l2so == NULL) { solisten_proto_abort(so); error = socreate_error; goto out; } /* * Create default listen RFCOMM session. The default RFCOMM * session will listen on ANY address. * * XXX FIXME Note that currently there is no way to adjust MTU * for the default session. */ error = ng_btsocket_rfcomm_session_create(&s, l2so, NG_HCI_BDADDR_ANY, NULL, td); if (error != 0) { solisten_proto_abort(so); goto out; } l2so = NULL; } SOCK_LOCK(so); solisten_proto(so, backlog); SOCK_UNLOCK(so); out: mtx_unlock(&ng_btsocket_rfcomm_sessions_mtx); /* * If we still have an l2so reference here, it's unneeded, so release * it. */ if (l2so != NULL) soclose(l2so); return (error); } /* ng_btsocket_listen */ -/* - * Get peer address - */ - -int -ng_btsocket_rfcomm_peeraddr(struct socket *so, struct sockaddr **nam) +static int +ng_btsocket_rfcomm_peeraddr1(struct socket *so, struct sockaddr_rfcomm *sa) { ng_btsocket_rfcomm_pcb_p pcb = so2rfcomm_pcb(so); - struct sockaddr_rfcomm sa; if (pcb == NULL) return (EINVAL); - bcopy(&pcb->dst, &sa.rfcomm_bdaddr, sizeof(sa.rfcomm_bdaddr)); - sa.rfcomm_channel = pcb->channel; - sa.rfcomm_len = sizeof(sa); - sa.rfcomm_family = AF_BLUETOOTH; + *sa = (struct sockaddr_rfcomm ){ + .rfcomm_len = sizeof(struct sockaddr_rfcomm), + .rfcomm_family = AF_BLUETOOTH, + .rfcomm_channel = pcb->channel, + }; + bcopy(&pcb->dst, &sa->rfcomm_bdaddr, sizeof(sa->rfcomm_bdaddr)); + return (0); +} + +/* + * Get peer address + */ +int +ng_btsocket_rfcomm_peeraddr(struct socket *so, struct sockaddr **nam) +{ + struct sockaddr_rfcomm sa; + int error; + + error = ng_btsocket_rfcomm_peeraddr1(so, &sa); + if (error != 0) + return (error); *nam = sodupsockaddr((struct sockaddr *) &sa, M_NOWAIT); return ((*nam == NULL)? ENOMEM : 0); -} /* ng_btsocket_rfcomm_peeraddr */ +} + +/* + * Accept connection on socket. Nothing to do here, socket must be connected + * and ready, so just return peer address and be done with it. + */ +int +ng_btsocket_rfcomm_accept(struct socket *so, struct sockaddr *sa) +{ + + return (ng_btsocket_rfcomm_peeraddr1(so, (struct sockaddr_rfcomm *)sa)); +} /* * Send data to socket */ int ng_btsocket_rfcomm_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct thread *td) { ng_btsocket_rfcomm_pcb_t *pcb = so2rfcomm_pcb(so); int error = 0; /* Check socket and input */ if (pcb == NULL || m == NULL || control != NULL) { error = EINVAL; goto drop; } mtx_lock(&pcb->pcb_mtx); /* Make sure DLC is connected */ if (pcb->state != NG_BTSOCKET_RFCOMM_DLC_CONNECTED) { mtx_unlock(&pcb->pcb_mtx); error = ENOTCONN; goto drop; } /* Put the packet on the socket's send queue and wakeup RFCOMM task */ sbappend(&pcb->so->so_snd, m, flags); m = NULL; if (!(pcb->flags & NG_BTSOCKET_RFCOMM_DLC_SENDING)) { pcb->flags |= NG_BTSOCKET_RFCOMM_DLC_SENDING; error = ng_btsocket_rfcomm_task_wakeup(); } mtx_unlock(&pcb->pcb_mtx); drop: NG_FREE_M(m); /* checks for != NULL */ NG_FREE_M(control); return (error); } /* ng_btsocket_rfcomm_send */ /* * Get socket address */ int ng_btsocket_rfcomm_sockaddr(struct socket *so, struct sockaddr **nam) { ng_btsocket_rfcomm_pcb_p pcb = so2rfcomm_pcb(so); struct sockaddr_rfcomm sa; if (pcb == NULL) return (EINVAL); bcopy(&pcb->src, &sa.rfcomm_bdaddr, sizeof(sa.rfcomm_bdaddr)); sa.rfcomm_channel = pcb->channel; sa.rfcomm_len = sizeof(sa); sa.rfcomm_family = AF_BLUETOOTH; *nam = sodupsockaddr((struct sockaddr *) &sa, M_NOWAIT); return ((*nam == NULL)? ENOMEM : 0); } /* ng_btsocket_rfcomm_sockaddr */ /* * Upcall function for L2CAP sockets. Enqueue RFCOMM task. */ static int ng_btsocket_rfcomm_upcall(struct socket *so, void *arg, int waitflag) { int error; if (so == NULL) panic("%s: so == NULL\n", __func__); if ((error = ng_btsocket_rfcomm_task_wakeup()) != 0) NG_BTSOCKET_RFCOMM_ALERT( "%s: Could not enqueue RFCOMM task, error=%d\n", __func__, error); return (SU_OK); } /* ng_btsocket_rfcomm_upcall */ /* * RFCOMM task. Will handle all RFCOMM sessions in one pass. * XXX FIXME does not scale very well */ static void ng_btsocket_rfcomm_sessions_task(void *ctx, int pending) { ng_btsocket_rfcomm_session_p s = NULL, s_next = NULL; mtx_lock(&ng_btsocket_rfcomm_sessions_mtx); for (s = LIST_FIRST(&ng_btsocket_rfcomm_sessions); s != NULL; ) { mtx_lock(&s->session_mtx); s_next = LIST_NEXT(s, next); ng_btsocket_rfcomm_session_task(s); if (s->state == NG_BTSOCKET_RFCOMM_SESSION_CLOSED) { /* Unlink and clean the session */ LIST_REMOVE(s, next); NG_BT_MBUFQ_DRAIN(&s->outq); if (!LIST_EMPTY(&s->dlcs)) panic("%s: DLC list is not empty\n", __func__); /* Close L2CAP socket */ SOCKBUF_LOCK(&s->l2so->so_rcv); soupcall_clear(s->l2so, SO_RCV); SOCKBUF_UNLOCK(&s->l2so->so_rcv); SOCKBUF_LOCK(&s->l2so->so_snd); soupcall_clear(s->l2so, SO_SND); SOCKBUF_UNLOCK(&s->l2so->so_snd); soclose(s->l2so); mtx_unlock(&s->session_mtx); mtx_destroy(&s->session_mtx); bzero(s, sizeof(*s)); free(s, M_NETGRAPH_BTSOCKET_RFCOMM); } else mtx_unlock(&s->session_mtx); s = s_next; } mtx_unlock(&ng_btsocket_rfcomm_sessions_mtx); } /* ng_btsocket_rfcomm_sessions_task */ /* * Process RFCOMM session. Will handle all RFCOMM sockets in one pass. */ static void ng_btsocket_rfcomm_session_task(ng_btsocket_rfcomm_session_p s) { mtx_assert(&s->session_mtx, MA_OWNED); if (s->l2so->so_rcv.sb_state & SBS_CANTRCVMORE) { NG_BTSOCKET_RFCOMM_INFO( "%s: L2CAP connection has been terminated, so=%p, so_state=%#x, so_count=%d, " \ "state=%d, flags=%#x\n", __func__, s->l2so, s->l2so->so_state, s->l2so->so_count, s->state, s->flags); s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED; ng_btsocket_rfcomm_session_clean(s); } /* Now process upcall */ switch (s->state) { /* Try to accept new L2CAP connection(s) */ case NG_BTSOCKET_RFCOMM_SESSION_LISTENING: while (ng_btsocket_rfcomm_session_accept(s) == 0) ; break; /* Process the results of the L2CAP connect */ case NG_BTSOCKET_RFCOMM_SESSION_CONNECTING: ng_btsocket_rfcomm_session_process_pcb(s); if (ng_btsocket_rfcomm_session_connect(s) != 0) { s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED; ng_btsocket_rfcomm_session_clean(s); } break; /* Try to receive/send more data */ case NG_BTSOCKET_RFCOMM_SESSION_CONNECTED: case NG_BTSOCKET_RFCOMM_SESSION_OPEN: case NG_BTSOCKET_RFCOMM_SESSION_DISCONNECTING: ng_btsocket_rfcomm_session_process_pcb(s); if (ng_btsocket_rfcomm_session_receive(s) != 0) { s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED; ng_btsocket_rfcomm_session_clean(s); } else if (ng_btsocket_rfcomm_session_send(s) != 0) { s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED; ng_btsocket_rfcomm_session_clean(s); } break; case NG_BTSOCKET_RFCOMM_SESSION_CLOSED: break; default: panic("%s: Invalid session state=%d, flags=%#x\n", __func__, s->state, s->flags); break; } } /* ng_btsocket_rfcomm_session_task */ /* * Process RFCOMM connection indicator. Caller must hold s->session_mtx */ static ng_btsocket_rfcomm_pcb_p ng_btsocket_rfcomm_connect_ind(ng_btsocket_rfcomm_session_p s, int channel) { ng_btsocket_rfcomm_pcb_p pcb = NULL, pcb1 = NULL; ng_btsocket_l2cap_pcb_p l2pcb = NULL; struct socket *so1; mtx_assert(&s->session_mtx, MA_OWNED); /* * Try to find RFCOMM socket that listens on given source address * and channel. This will return the best possible match. */ l2pcb = so2l2cap_pcb(s->l2so); pcb = ng_btsocket_rfcomm_pcb_listener(&l2pcb->src, channel); if (pcb == NULL) return (NULL); /* * Check the pending connections queue and if we have space then * create new socket and set proper source and destination address, * and channel. */ mtx_lock(&pcb->pcb_mtx); CURVNET_SET(pcb->so->so_vnet); so1 = sonewconn(pcb->so, 0); CURVNET_RESTORE(); mtx_unlock(&pcb->pcb_mtx); if (so1 == NULL) return (NULL); /* * If we got here than we have created new socket. So complete the * connection. Set source and destination address from the session. */ pcb1 = so2rfcomm_pcb(so1); if (pcb1 == NULL) panic("%s: pcb1 == NULL\n", __func__); mtx_lock(&pcb1->pcb_mtx); bcopy(&l2pcb->src, &pcb1->src, sizeof(pcb1->src)); bcopy(&l2pcb->dst, &pcb1->dst, sizeof(pcb1->dst)); pcb1->channel = channel; /* Link new DLC to the session. We already hold s->session_mtx */ LIST_INSERT_HEAD(&s->dlcs, pcb1, session_next); pcb1->session = s; mtx_unlock(&pcb1->pcb_mtx); return (pcb1); } /* ng_btsocket_rfcomm_connect_ind */ /* * Process RFCOMM connect confirmation. Caller must hold s->session_mtx. */ static void ng_btsocket_rfcomm_connect_cfm(ng_btsocket_rfcomm_session_p s) { ng_btsocket_rfcomm_pcb_p pcb = NULL, pcb_next = NULL; int error; mtx_assert(&s->session_mtx, MA_OWNED); /* * Wake up all waiting sockets and send PN request for each of them. * Note that timeout already been set in ng_btsocket_rfcomm_connect() * * Note: cannot use LIST_FOREACH because ng_btsocket_rfcomm_pcb_kill * will unlink DLC from the session */ for (pcb = LIST_FIRST(&s->dlcs); pcb != NULL; ) { mtx_lock(&pcb->pcb_mtx); pcb_next = LIST_NEXT(pcb, session_next); if (pcb->state == NG_BTSOCKET_RFCOMM_DLC_W4_CONNECT) { pcb->mtu = s->mtu; bcopy(&so2l2cap_pcb(s->l2so)->src, &pcb->src, sizeof(pcb->src)); error = ng_btsocket_rfcomm_send_pn(pcb); if (error == 0) pcb->state = NG_BTSOCKET_RFCOMM_DLC_CONFIGURING; else ng_btsocket_rfcomm_pcb_kill(pcb, error); } mtx_unlock(&pcb->pcb_mtx); pcb = pcb_next; } } /* ng_btsocket_rfcomm_connect_cfm */ /***************************************************************************** ***************************************************************************** ** RFCOMM sessions ***************************************************************************** *****************************************************************************/ /* * Create new RFCOMM session. That function WILL NOT take ownership over l2so. * Caller MUST free l2so if function failed. */ static int ng_btsocket_rfcomm_session_create(ng_btsocket_rfcomm_session_p *sp, struct socket *l2so, bdaddr_p src, bdaddr_p dst, struct thread *td) { ng_btsocket_rfcomm_session_p s = NULL; struct sockaddr_l2cap l2sa; struct sockopt l2sopt; int error; u_int16_t mtu; mtx_assert(&ng_btsocket_rfcomm_sessions_mtx, MA_OWNED); /* Allocate the RFCOMM session */ s = malloc(sizeof(*s), M_NETGRAPH_BTSOCKET_RFCOMM, M_NOWAIT | M_ZERO); if (s == NULL) return (ENOMEM); /* Set defaults */ s->mtu = RFCOMM_DEFAULT_MTU; s->flags = 0; s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED; NG_BT_MBUFQ_INIT(&s->outq, ifqmaxlen); /* * XXX Mark session mutex as DUPOK to prevent "duplicated lock of * the same type" message. When accepting new L2CAP connection * ng_btsocket_rfcomm_session_accept() holds both session mutexes * for "old" (accepting) session and "new" (created) session. */ mtx_init(&s->session_mtx, "btsocks_rfcomm_session_mtx", NULL, MTX_DEF|MTX_DUPOK); LIST_INIT(&s->dlcs); /* Prepare L2CAP socket */ SOCKBUF_LOCK(&l2so->so_rcv); soupcall_set(l2so, SO_RCV, ng_btsocket_rfcomm_upcall, NULL); SOCKBUF_UNLOCK(&l2so->so_rcv); SOCKBUF_LOCK(&l2so->so_snd); soupcall_set(l2so, SO_SND, ng_btsocket_rfcomm_upcall, NULL); SOCKBUF_UNLOCK(&l2so->so_snd); l2so->so_state |= SS_NBIO; s->l2so = l2so; mtx_lock(&s->session_mtx); /* * "src" == NULL and "dst" == NULL means just create session. * caller must do the rest */ if (src == NULL && dst == NULL) goto done; /* * Set incoming MTU on L2CAP socket. It is RFCOMM session default MTU * plus 5 bytes: RFCOMM frame header, one extra byte for length and one * extra byte for credits. */ mtu = s->mtu + sizeof(struct rfcomm_frame_hdr) + 1 + 1; l2sopt.sopt_dir = SOPT_SET; l2sopt.sopt_level = SOL_L2CAP; l2sopt.sopt_name = SO_L2CAP_IMTU; l2sopt.sopt_val = (void *) &mtu; l2sopt.sopt_valsize = sizeof(mtu); l2sopt.sopt_td = NULL; error = sosetopt(s->l2so, &l2sopt); if (error != 0) goto bad; /* Bind socket to "src" address */ l2sa.l2cap_len = sizeof(l2sa); l2sa.l2cap_family = AF_BLUETOOTH; l2sa.l2cap_psm = (dst == NULL)? htole16(NG_L2CAP_PSM_RFCOMM) : 0; bcopy(src, &l2sa.l2cap_bdaddr, sizeof(l2sa.l2cap_bdaddr)); l2sa.l2cap_cid = 0; l2sa.l2cap_bdaddr_type = BDADDR_BREDR; error = sobind(s->l2so, (struct sockaddr *) &l2sa, td); if (error != 0) goto bad; /* If "dst" is not NULL then initiate connect(), otherwise listen() */ if (dst == NULL) { s->flags = 0; s->state = NG_BTSOCKET_RFCOMM_SESSION_LISTENING; error = solisten(s->l2so, 10, td); if (error != 0) goto bad; } else { s->flags = NG_BTSOCKET_RFCOMM_SESSION_INITIATOR; s->state = NG_BTSOCKET_RFCOMM_SESSION_CONNECTING; l2sa.l2cap_len = sizeof(l2sa); l2sa.l2cap_family = AF_BLUETOOTH; l2sa.l2cap_psm = htole16(NG_L2CAP_PSM_RFCOMM); bcopy(dst, &l2sa.l2cap_bdaddr, sizeof(l2sa.l2cap_bdaddr)); l2sa.l2cap_cid = 0; l2sa.l2cap_bdaddr_type = BDADDR_BREDR; error = soconnect(s->l2so, (struct sockaddr *) &l2sa, td); if (error != 0) goto bad; } done: LIST_INSERT_HEAD(&ng_btsocket_rfcomm_sessions, s, next); *sp = s; mtx_unlock(&s->session_mtx); return (0); bad: mtx_unlock(&s->session_mtx); /* Return L2CAP socket back to its original state */ SOCKBUF_LOCK(&l2so->so_rcv); soupcall_clear(s->l2so, SO_RCV); SOCKBUF_UNLOCK(&l2so->so_rcv); SOCKBUF_LOCK(&l2so->so_snd); soupcall_clear(s->l2so, SO_SND); SOCKBUF_UNLOCK(&l2so->so_snd); l2so->so_state &= ~SS_NBIO; mtx_destroy(&s->session_mtx); bzero(s, sizeof(*s)); free(s, M_NETGRAPH_BTSOCKET_RFCOMM); return (error); } /* ng_btsocket_rfcomm_session_create */ /* * Process accept() on RFCOMM session * XXX FIXME locking for "l2so"? */ static int ng_btsocket_rfcomm_session_accept(ng_btsocket_rfcomm_session_p s0) { struct socket *l2so; - struct sockaddr_l2cap *l2sa = NULL; + struct sockaddr_l2cap l2sa = { .l2cap_len = sizeof(l2sa) }; ng_btsocket_l2cap_pcb_t *l2pcb = NULL; ng_btsocket_rfcomm_session_p s = NULL; int error; mtx_assert(&ng_btsocket_rfcomm_sessions_mtx, MA_OWNED); mtx_assert(&s0->session_mtx, MA_OWNED); SOLISTEN_LOCK(s0->l2so); error = solisten_dequeue(s0->l2so, &l2so, 0); if (error == EWOULDBLOCK) return (error); if (error) { NG_BTSOCKET_RFCOMM_ERR( "%s: Could not accept connection on L2CAP socket, error=%d\n", __func__, error); return (error); } - error = soaccept(l2so, (struct sockaddr **) &l2sa); + error = soaccept(l2so, (struct sockaddr *)&l2sa); if (error != 0) { NG_BTSOCKET_RFCOMM_ERR( "%s: soaccept() on L2CAP socket failed, error=%d\n", __func__, error); soclose(l2so); return (error); } /* * Check if there is already active RFCOMM session between two devices. * If so then close L2CAP connection. We only support one RFCOMM session * between each pair of devices. Note that here we assume session in any * state. The session even could be in the middle of disconnecting. */ l2pcb = so2l2cap_pcb(l2so); s = ng_btsocket_rfcomm_session_by_addr(&l2pcb->src, &l2pcb->dst); if (s == NULL) { /* Create a new RFCOMM session */ error = ng_btsocket_rfcomm_session_create(&s, l2so, NULL, NULL, curthread /* XXX */); if (error == 0) { mtx_lock(&s->session_mtx); s->flags = 0; s->state = NG_BTSOCKET_RFCOMM_SESSION_CONNECTED; /* * Adjust MTU on incoming connection. Reserve 5 bytes: * RFCOMM frame header, one extra byte for length and * one extra byte for credits. */ s->mtu = min(l2pcb->imtu, l2pcb->omtu) - sizeof(struct rfcomm_frame_hdr) - 1 - 1; mtx_unlock(&s->session_mtx); } else { NG_BTSOCKET_RFCOMM_ALERT( "%s: Failed to create new RFCOMM session, error=%d\n", __func__, error); soclose(l2so); } } else { NG_BTSOCKET_RFCOMM_WARN( "%s: Rejecting duplicating RFCOMM session between src=%x:%x:%x:%x:%x:%x and " \ "dst=%x:%x:%x:%x:%x:%x, state=%d, flags=%#x\n", __func__, l2pcb->src.b[5], l2pcb->src.b[4], l2pcb->src.b[3], l2pcb->src.b[2], l2pcb->src.b[1], l2pcb->src.b[0], l2pcb->dst.b[5], l2pcb->dst.b[4], l2pcb->dst.b[3], l2pcb->dst.b[2], l2pcb->dst.b[1], l2pcb->dst.b[0], s->state, s->flags); error = EBUSY; soclose(l2so); } return (error); } /* ng_btsocket_rfcomm_session_accept */ /* * Process connect() on RFCOMM session * XXX FIXME locking for "l2so"? */ static int ng_btsocket_rfcomm_session_connect(ng_btsocket_rfcomm_session_p s) { ng_btsocket_l2cap_pcb_p l2pcb = so2l2cap_pcb(s->l2so); int error; mtx_assert(&s->session_mtx, MA_OWNED); /* First check if connection has failed */ if ((error = s->l2so->so_error) != 0) { s->l2so->so_error = 0; NG_BTSOCKET_RFCOMM_ERR( "%s: Could not connect RFCOMM session, error=%d, state=%d, flags=%#x\n", __func__, error, s->state, s->flags); return (error); } /* Is connection still in progress? */ if (s->l2so->so_state & SS_ISCONNECTING) return (0); /* * If we got here then we are connected. Send SABM on DLCI 0 to * open multiplexor channel. */ if (error == 0) { s->state = NG_BTSOCKET_RFCOMM_SESSION_CONNECTED; /* * Adjust MTU on outgoing connection. Reserve 5 bytes: RFCOMM * frame header, one extra byte for length and one extra byte * for credits. */ s->mtu = min(l2pcb->imtu, l2pcb->omtu) - sizeof(struct rfcomm_frame_hdr) - 1 - 1; error = ng_btsocket_rfcomm_send_command(s,RFCOMM_FRAME_SABM,0); if (error == 0) error = ng_btsocket_rfcomm_task_wakeup(); } return (error); }/* ng_btsocket_rfcomm_session_connect */ /* * Receive data on RFCOMM session * XXX FIXME locking for "l2so"? */ static int ng_btsocket_rfcomm_session_receive(ng_btsocket_rfcomm_session_p s) { struct mbuf *m = NULL; struct uio uio; int more, flags, error; mtx_assert(&s->session_mtx, MA_OWNED); /* Can we read from the L2CAP socket? */ if (!soreadable(s->l2so)) return (0); /* First check for error on L2CAP socket */ if ((error = s->l2so->so_error) != 0) { s->l2so->so_error = 0; NG_BTSOCKET_RFCOMM_ERR( "%s: Could not receive data from L2CAP socket, error=%d, state=%d, flags=%#x\n", __func__, error, s->state, s->flags); return (error); } /* * Read all packets from the L2CAP socket. * XXX FIXME/VERIFY is that correct? For now use m->m_nextpkt as * indication that there is more packets on the socket's buffer. * Also what should we use in uio.uio_resid? * May be s->mtu + sizeof(struct rfcomm_frame_hdr) + 1 + 1? */ for (more = 1; more; ) { /* Try to get next packet from socket */ bzero(&uio, sizeof(uio)); /* uio.uio_td = NULL; */ uio.uio_resid = 1000000000; flags = MSG_DONTWAIT; m = NULL; error = soreceive(s->l2so, NULL, &uio, &m, (struct mbuf **) NULL, &flags); if (error != 0) { if (error == EWOULDBLOCK) return (0); /* XXX can happen? */ NG_BTSOCKET_RFCOMM_ERR( "%s: Could not receive data from L2CAP socket, error=%d\n", __func__, error); return (error); } more = (m->m_nextpkt != NULL); m->m_nextpkt = NULL; ng_btsocket_rfcomm_receive_frame(s, m); } return (0); } /* ng_btsocket_rfcomm_session_receive */ /* * Send data on RFCOMM session * XXX FIXME locking for "l2so"? */ static int ng_btsocket_rfcomm_session_send(ng_btsocket_rfcomm_session_p s) { struct mbuf *m = NULL; int error; mtx_assert(&s->session_mtx, MA_OWNED); /* Send as much as we can from the session queue */ while (sowriteable(s->l2so)) { /* Check if socket still OK */ if ((error = s->l2so->so_error) != 0) { s->l2so->so_error = 0; NG_BTSOCKET_RFCOMM_ERR( "%s: Detected error=%d on L2CAP socket, state=%d, flags=%#x\n", __func__, error, s->state, s->flags); return (error); } NG_BT_MBUFQ_DEQUEUE(&s->outq, m); if (m == NULL) return (0); /* we are done */ /* Call send function on the L2CAP socket */ error = s->l2so->so_proto->pr_send(s->l2so, 0, m, NULL, NULL, curthread /* XXX */); if (error != 0) { NG_BTSOCKET_RFCOMM_ERR( "%s: Could not send data to L2CAP socket, error=%d\n", __func__, error); return (error); } } return (0); } /* ng_btsocket_rfcomm_session_send */ /* * Close and disconnect all DLCs for the given session. Caller must hold * s->sesson_mtx. Will wakeup session. */ static void ng_btsocket_rfcomm_session_clean(ng_btsocket_rfcomm_session_p s) { ng_btsocket_rfcomm_pcb_p pcb = NULL, pcb_next = NULL; int error; mtx_assert(&s->session_mtx, MA_OWNED); /* * Note: cannot use LIST_FOREACH because ng_btsocket_rfcomm_pcb_kill * will unlink DLC from the session */ for (pcb = LIST_FIRST(&s->dlcs); pcb != NULL; ) { mtx_lock(&pcb->pcb_mtx); pcb_next = LIST_NEXT(pcb, session_next); NG_BTSOCKET_RFCOMM_INFO( "%s: Disconnecting dlci=%d, state=%d, flags=%#x\n", __func__, pcb->dlci, pcb->state, pcb->flags); if (pcb->state == NG_BTSOCKET_RFCOMM_DLC_CONNECTED) error = ECONNRESET; else error = ECONNREFUSED; ng_btsocket_rfcomm_pcb_kill(pcb, error); mtx_unlock(&pcb->pcb_mtx); pcb = pcb_next; } } /* ng_btsocket_rfcomm_session_clean */ /* * Process all DLCs on the session. Caller MUST hold s->session_mtx. */ static void ng_btsocket_rfcomm_session_process_pcb(ng_btsocket_rfcomm_session_p s) { ng_btsocket_rfcomm_pcb_p pcb = NULL, pcb_next = NULL; int error; mtx_assert(&s->session_mtx, MA_OWNED); /* * Note: cannot use LIST_FOREACH because ng_btsocket_rfcomm_pcb_kill * will unlink DLC from the session */ for (pcb = LIST_FIRST(&s->dlcs); pcb != NULL; ) { mtx_lock(&pcb->pcb_mtx); pcb_next = LIST_NEXT(pcb, session_next); switch (pcb->state) { /* * If DLC in W4_CONNECT state then we should check for both * timeout and detach. */ case NG_BTSOCKET_RFCOMM_DLC_W4_CONNECT: if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_DETACHED) ng_btsocket_rfcomm_pcb_kill(pcb, 0); else if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMEDOUT) ng_btsocket_rfcomm_pcb_kill(pcb, ETIMEDOUT); break; /* * If DLC in CONFIGURING or CONNECTING state then we only * should check for timeout. If detach() was called then * DLC will be moved into DISCONNECTING state. */ case NG_BTSOCKET_RFCOMM_DLC_CONFIGURING: case NG_BTSOCKET_RFCOMM_DLC_CONNECTING: if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMEDOUT) ng_btsocket_rfcomm_pcb_kill(pcb, ETIMEDOUT); break; /* * If DLC in CONNECTED state then we need to send data (if any) * from the socket's send queue. Note that we will send data * from either all sockets or none. This may overload session's * outgoing queue (but we do not check for that). * * XXX FIXME need scheduler for RFCOMM sockets */ case NG_BTSOCKET_RFCOMM_DLC_CONNECTED: error = ng_btsocket_rfcomm_pcb_send(pcb, ALOT); if (error != 0) ng_btsocket_rfcomm_pcb_kill(pcb, error); break; /* * If DLC in DISCONNECTING state then we must send DISC frame. * Note that if DLC has timeout set then we do not need to * resend DISC frame. * * XXX FIXME need to drain all data from the socket's queue * if LINGER option was set */ case NG_BTSOCKET_RFCOMM_DLC_DISCONNECTING: if (!(pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMO)) { error = ng_btsocket_rfcomm_send_command( pcb->session, RFCOMM_FRAME_DISC, pcb->dlci); if (error == 0) ng_btsocket_rfcomm_timeout(pcb); else ng_btsocket_rfcomm_pcb_kill(pcb, error); } else if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMEDOUT) ng_btsocket_rfcomm_pcb_kill(pcb, ETIMEDOUT); break; /* case NG_BTSOCKET_RFCOMM_DLC_CLOSED: */ default: panic("%s: Invalid DLC state=%d, flags=%#x\n", __func__, pcb->state, pcb->flags); break; } mtx_unlock(&pcb->pcb_mtx); pcb = pcb_next; } } /* ng_btsocket_rfcomm_session_process_pcb */ /* * Find RFCOMM session between "src" and "dst". * Caller MUST hold ng_btsocket_rfcomm_sessions_mtx. */ static ng_btsocket_rfcomm_session_p ng_btsocket_rfcomm_session_by_addr(bdaddr_p src, bdaddr_p dst) { ng_btsocket_rfcomm_session_p s = NULL; ng_btsocket_l2cap_pcb_p l2pcb = NULL; int any_src; mtx_assert(&ng_btsocket_rfcomm_sessions_mtx, MA_OWNED); any_src = (bcmp(src, NG_HCI_BDADDR_ANY, sizeof(*src)) == 0); LIST_FOREACH(s, &ng_btsocket_rfcomm_sessions, next) { l2pcb = so2l2cap_pcb(s->l2so); if ((any_src || bcmp(&l2pcb->src, src, sizeof(*src)) == 0) && bcmp(&l2pcb->dst, dst, sizeof(*dst)) == 0) break; } return (s); } /* ng_btsocket_rfcomm_session_by_addr */ /***************************************************************************** ***************************************************************************** ** RFCOMM ***************************************************************************** *****************************************************************************/ /* * Process incoming RFCOMM frame. Caller must hold s->session_mtx. * XXX FIXME check frame length */ static int ng_btsocket_rfcomm_receive_frame(ng_btsocket_rfcomm_session_p s, struct mbuf *m0) { struct rfcomm_frame_hdr *hdr = NULL; struct mbuf *m = NULL; u_int16_t length; u_int8_t dlci, type; int error = 0; mtx_assert(&s->session_mtx, MA_OWNED); /* Pullup as much as we can into first mbuf (for direct access) */ length = min(m0->m_pkthdr.len, MHLEN); if (m0->m_len < length) { if ((m0 = m_pullup(m0, length)) == NULL) { NG_BTSOCKET_RFCOMM_ALERT( "%s: m_pullup(%d) failed\n", __func__, length); return (ENOBUFS); } } hdr = mtod(m0, struct rfcomm_frame_hdr *); dlci = RFCOMM_DLCI(hdr->address); type = RFCOMM_TYPE(hdr->control); /* Test EA bit in length. If not set then we have 2 bytes of length */ if (!RFCOMM_EA(hdr->length)) { bcopy(&hdr->length, &length, sizeof(length)); length = le16toh(length) >> 1; m_adj(m0, sizeof(*hdr) + 1); } else { length = hdr->length >> 1; m_adj(m0, sizeof(*hdr)); } NG_BTSOCKET_RFCOMM_INFO( "%s: Got frame type=%#x, dlci=%d, length=%d, cr=%d, pf=%d, len=%d\n", __func__, type, dlci, length, RFCOMM_CR(hdr->address), RFCOMM_PF(hdr->control), m0->m_pkthdr.len); /* * Get FCS (the last byte in the frame) * XXX this will not work if mbuf chain ends with empty mbuf. * XXX let's hope it never happens :) */ for (m = m0; m->m_next != NULL; m = m->m_next) ; if (m->m_len <= 0) panic("%s: Empty mbuf at the end of the chain, len=%d\n", __func__, m->m_len); /* * Check FCS. We only need to calculate FCS on first 2 or 3 bytes * and already m_pullup'ed mbuf chain, so it should be safe. */ if (ng_btsocket_rfcomm_check_fcs((u_int8_t *) hdr, type, m->m_data[m->m_len - 1])) { NG_BTSOCKET_RFCOMM_ERR( "%s: Invalid RFCOMM packet. Bad checksum\n", __func__); NG_FREE_M(m0); return (EINVAL); } m_adj(m0, -1); /* Trim FCS byte */ /* * Process RFCOMM frame. * * From TS 07.10 spec * * "... In the case where a SABM or DISC command with the P bit set * to 0 is received then the received frame shall be discarded..." * * "... If a unsolicited DM response is received then the frame shall * be processed irrespective of the P/F setting... " * * "... The station may transmit response frames with the F bit set * to 0 at any opportunity on an asynchronous basis. However, in the * case where a UA response is received with the F bit set to 0 then * the received frame shall be discarded..." * * From Bluetooth spec * * "... When credit based flow control is being used, the meaning of * the P/F bit in the control field of the RFCOMM header is redefined * for UIH frames..." */ switch (type) { case RFCOMM_FRAME_SABM: if (RFCOMM_PF(hdr->control)) error = ng_btsocket_rfcomm_receive_sabm(s, dlci); break; case RFCOMM_FRAME_DISC: if (RFCOMM_PF(hdr->control)) error = ng_btsocket_rfcomm_receive_disc(s, dlci); break; case RFCOMM_FRAME_UA: if (RFCOMM_PF(hdr->control)) error = ng_btsocket_rfcomm_receive_ua(s, dlci); break; case RFCOMM_FRAME_DM: error = ng_btsocket_rfcomm_receive_dm(s, dlci); break; case RFCOMM_FRAME_UIH: if (dlci == 0) error = ng_btsocket_rfcomm_receive_mcc(s, m0); else error = ng_btsocket_rfcomm_receive_uih(s, dlci, RFCOMM_PF(hdr->control), m0); return (error); /* NOT REACHED */ default: NG_BTSOCKET_RFCOMM_ERR( "%s: Invalid RFCOMM packet. Unknown type=%#x\n", __func__, type); error = EINVAL; break; } NG_FREE_M(m0); return (error); } /* ng_btsocket_rfcomm_receive_frame */ /* * Process RFCOMM SABM frame */ static int ng_btsocket_rfcomm_receive_sabm(ng_btsocket_rfcomm_session_p s, int dlci) { ng_btsocket_rfcomm_pcb_p pcb = NULL; int error = 0; mtx_assert(&s->session_mtx, MA_OWNED); NG_BTSOCKET_RFCOMM_INFO( "%s: Got SABM, session state=%d, flags=%#x, mtu=%d, dlci=%d\n", __func__, s->state, s->flags, s->mtu, dlci); /* DLCI == 0 means open multiplexor channel */ if (dlci == 0) { switch (s->state) { case NG_BTSOCKET_RFCOMM_SESSION_CONNECTED: case NG_BTSOCKET_RFCOMM_SESSION_OPEN: error = ng_btsocket_rfcomm_send_command(s, RFCOMM_FRAME_UA, dlci); if (error == 0) { s->state = NG_BTSOCKET_RFCOMM_SESSION_OPEN; ng_btsocket_rfcomm_connect_cfm(s); } else { s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED; ng_btsocket_rfcomm_session_clean(s); } break; default: NG_BTSOCKET_RFCOMM_WARN( "%s: Got SABM for session in invalid state state=%d, flags=%#x\n", __func__, s->state, s->flags); error = EINVAL; break; } return (error); } /* Make sure multiplexor channel is open */ if (s->state != NG_BTSOCKET_RFCOMM_SESSION_OPEN) { NG_BTSOCKET_RFCOMM_ERR( "%s: Got SABM for dlci=%d with multiplexor channel closed, state=%d, " \ "flags=%#x\n", __func__, dlci, s->state, s->flags); return (EINVAL); } /* * Check if we have this DLCI. This might happen when remote * peer uses PN command before actual open (SABM) happens. */ pcb = ng_btsocket_rfcomm_pcb_by_dlci(s, dlci); if (pcb != NULL) { mtx_lock(&pcb->pcb_mtx); if (pcb->state != NG_BTSOCKET_RFCOMM_DLC_CONNECTING) { NG_BTSOCKET_RFCOMM_ERR( "%s: Got SABM for dlci=%d in invalid state=%d, flags=%#x\n", __func__, dlci, pcb->state, pcb->flags); mtx_unlock(&pcb->pcb_mtx); return (ENOENT); } ng_btsocket_rfcomm_untimeout(pcb); error = ng_btsocket_rfcomm_send_command(s,RFCOMM_FRAME_UA,dlci); if (error == 0) error = ng_btsocket_rfcomm_send_msc(pcb); if (error == 0) { pcb->state = NG_BTSOCKET_RFCOMM_DLC_CONNECTED; soisconnected(pcb->so); } else ng_btsocket_rfcomm_pcb_kill(pcb, error); mtx_unlock(&pcb->pcb_mtx); return (error); } /* * We do not have requested DLCI, so it must be an incoming connection * with default parameters. Try to accept it. */ pcb = ng_btsocket_rfcomm_connect_ind(s, RFCOMM_SRVCHANNEL(dlci)); if (pcb != NULL) { mtx_lock(&pcb->pcb_mtx); pcb->dlci = dlci; error = ng_btsocket_rfcomm_send_command(s,RFCOMM_FRAME_UA,dlci); if (error == 0) error = ng_btsocket_rfcomm_send_msc(pcb); if (error == 0) { pcb->state = NG_BTSOCKET_RFCOMM_DLC_CONNECTED; soisconnected(pcb->so); } else ng_btsocket_rfcomm_pcb_kill(pcb, error); mtx_unlock(&pcb->pcb_mtx); } else /* Nobody is listen()ing on the requested DLCI */ error = ng_btsocket_rfcomm_send_command(s,RFCOMM_FRAME_DM,dlci); return (error); } /* ng_btsocket_rfcomm_receive_sabm */ /* * Process RFCOMM DISC frame */ static int ng_btsocket_rfcomm_receive_disc(ng_btsocket_rfcomm_session_p s, int dlci) { ng_btsocket_rfcomm_pcb_p pcb = NULL; int error = 0; mtx_assert(&s->session_mtx, MA_OWNED); NG_BTSOCKET_RFCOMM_INFO( "%s: Got DISC, session state=%d, flags=%#x, mtu=%d, dlci=%d\n", __func__, s->state, s->flags, s->mtu, dlci); /* DLCI == 0 means close multiplexor channel */ if (dlci == 0) { /* XXX FIXME assume that remote side will close the socket */ error = ng_btsocket_rfcomm_send_command(s, RFCOMM_FRAME_UA, 0); if (error == 0) { if (s->state == NG_BTSOCKET_RFCOMM_SESSION_DISCONNECTING) s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED; /* XXX */ else s->state = NG_BTSOCKET_RFCOMM_SESSION_DISCONNECTING; } else s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED; /* XXX */ ng_btsocket_rfcomm_session_clean(s); } else { pcb = ng_btsocket_rfcomm_pcb_by_dlci(s, dlci); if (pcb != NULL) { int err; mtx_lock(&pcb->pcb_mtx); NG_BTSOCKET_RFCOMM_INFO( "%s: Got DISC for dlci=%d, state=%d, flags=%#x\n", __func__, dlci, pcb->state, pcb->flags); error = ng_btsocket_rfcomm_send_command(s, RFCOMM_FRAME_UA, dlci); if (pcb->state == NG_BTSOCKET_RFCOMM_DLC_CONNECTED) err = 0; else err = ECONNREFUSED; ng_btsocket_rfcomm_pcb_kill(pcb, err); mtx_unlock(&pcb->pcb_mtx); } else { NG_BTSOCKET_RFCOMM_WARN( "%s: Got DISC for non-existing dlci=%d\n", __func__, dlci); error = ng_btsocket_rfcomm_send_command(s, RFCOMM_FRAME_DM, dlci); } } return (error); } /* ng_btsocket_rfcomm_receive_disc */ /* * Process RFCOMM UA frame */ static int ng_btsocket_rfcomm_receive_ua(ng_btsocket_rfcomm_session_p s, int dlci) { ng_btsocket_rfcomm_pcb_p pcb = NULL; int error = 0; mtx_assert(&s->session_mtx, MA_OWNED); NG_BTSOCKET_RFCOMM_INFO( "%s: Got UA, session state=%d, flags=%#x, mtu=%d, dlci=%d\n", __func__, s->state, s->flags, s->mtu, dlci); /* dlci == 0 means multiplexor channel */ if (dlci == 0) { switch (s->state) { case NG_BTSOCKET_RFCOMM_SESSION_CONNECTED: s->state = NG_BTSOCKET_RFCOMM_SESSION_OPEN; ng_btsocket_rfcomm_connect_cfm(s); break; case NG_BTSOCKET_RFCOMM_SESSION_DISCONNECTING: s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED; ng_btsocket_rfcomm_session_clean(s); break; default: NG_BTSOCKET_RFCOMM_WARN( "%s: Got UA for session in invalid state=%d(%d), flags=%#x, mtu=%d\n", __func__, s->state, INITIATOR(s), s->flags, s->mtu); error = ENOENT; break; } return (error); } /* Check if we have this DLCI */ pcb = ng_btsocket_rfcomm_pcb_by_dlci(s, dlci); if (pcb != NULL) { mtx_lock(&pcb->pcb_mtx); NG_BTSOCKET_RFCOMM_INFO( "%s: Got UA for dlci=%d, state=%d, flags=%#x\n", __func__, dlci, pcb->state, pcb->flags); switch (pcb->state) { case NG_BTSOCKET_RFCOMM_DLC_CONNECTING: ng_btsocket_rfcomm_untimeout(pcb); error = ng_btsocket_rfcomm_send_msc(pcb); if (error == 0) { pcb->state = NG_BTSOCKET_RFCOMM_DLC_CONNECTED; soisconnected(pcb->so); } break; case NG_BTSOCKET_RFCOMM_DLC_DISCONNECTING: ng_btsocket_rfcomm_pcb_kill(pcb, 0); break; default: NG_BTSOCKET_RFCOMM_WARN( "%s: Got UA for dlci=%d in invalid state=%d, flags=%#x\n", __func__, dlci, pcb->state, pcb->flags); error = ENOENT; break; } mtx_unlock(&pcb->pcb_mtx); } else { NG_BTSOCKET_RFCOMM_WARN( "%s: Got UA for non-existing dlci=%d\n", __func__, dlci); error = ng_btsocket_rfcomm_send_command(s,RFCOMM_FRAME_DM,dlci); } return (error); } /* ng_btsocket_rfcomm_receive_ua */ /* * Process RFCOMM DM frame */ static int ng_btsocket_rfcomm_receive_dm(ng_btsocket_rfcomm_session_p s, int dlci) { ng_btsocket_rfcomm_pcb_p pcb = NULL; int error; mtx_assert(&s->session_mtx, MA_OWNED); NG_BTSOCKET_RFCOMM_INFO( "%s: Got DM, session state=%d, flags=%#x, mtu=%d, dlci=%d\n", __func__, s->state, s->flags, s->mtu, dlci); /* DLCI == 0 means multiplexor channel */ if (dlci == 0) { /* Disconnect all dlc's on the session */ s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED; ng_btsocket_rfcomm_session_clean(s); } else { pcb = ng_btsocket_rfcomm_pcb_by_dlci(s, dlci); if (pcb != NULL) { mtx_lock(&pcb->pcb_mtx); NG_BTSOCKET_RFCOMM_INFO( "%s: Got DM for dlci=%d, state=%d, flags=%#x\n", __func__, dlci, pcb->state, pcb->flags); if (pcb->state == NG_BTSOCKET_RFCOMM_DLC_CONNECTED) error = ECONNRESET; else error = ECONNREFUSED; ng_btsocket_rfcomm_pcb_kill(pcb, error); mtx_unlock(&pcb->pcb_mtx); } else NG_BTSOCKET_RFCOMM_WARN( "%s: Got DM for non-existing dlci=%d\n", __func__, dlci); } return (0); } /* ng_btsocket_rfcomm_receive_dm */ /* * Process RFCOMM UIH frame (data) */ static int ng_btsocket_rfcomm_receive_uih(ng_btsocket_rfcomm_session_p s, int dlci, int pf, struct mbuf *m0) { ng_btsocket_rfcomm_pcb_p pcb = NULL; int error = 0; mtx_assert(&s->session_mtx, MA_OWNED); NG_BTSOCKET_RFCOMM_INFO( "%s: Got UIH, session state=%d, flags=%#x, mtu=%d, dlci=%d, pf=%d, len=%d\n", __func__, s->state, s->flags, s->mtu, dlci, pf, m0->m_pkthdr.len); /* XXX should we do it here? Check for session flow control */ if (s->flags & NG_BTSOCKET_RFCOMM_SESSION_LFC) { NG_BTSOCKET_RFCOMM_WARN( "%s: Got UIH with session flow control asserted, state=%d, flags=%#x\n", __func__, s->state, s->flags); goto drop; } /* Check if we have this dlci */ pcb = ng_btsocket_rfcomm_pcb_by_dlci(s, dlci); if (pcb == NULL) { NG_BTSOCKET_RFCOMM_WARN( "%s: Got UIH for non-existing dlci=%d\n", __func__, dlci); error = ng_btsocket_rfcomm_send_command(s,RFCOMM_FRAME_DM,dlci); goto drop; } mtx_lock(&pcb->pcb_mtx); /* Check dlci state */ if (pcb->state != NG_BTSOCKET_RFCOMM_DLC_CONNECTED) { NG_BTSOCKET_RFCOMM_WARN( "%s: Got UIH for dlci=%d in invalid state=%d, flags=%#x\n", __func__, dlci, pcb->state, pcb->flags); error = EINVAL; goto drop1; } /* Check dlci flow control */ if (((pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC) && pcb->rx_cred <= 0) || (pcb->lmodem & RFCOMM_MODEM_FC)) { NG_BTSOCKET_RFCOMM_ERR( "%s: Got UIH for dlci=%d with asserted flow control, state=%d, " \ "flags=%#x, rx_cred=%d, lmodem=%#x\n", __func__, dlci, pcb->state, pcb->flags, pcb->rx_cred, pcb->lmodem); goto drop1; } /* Did we get any credits? */ if ((pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC) && pf) { NG_BTSOCKET_RFCOMM_INFO( "%s: Got %d more credits for dlci=%d, state=%d, flags=%#x, " \ "rx_cred=%d, tx_cred=%d\n", __func__, *mtod(m0, u_int8_t *), dlci, pcb->state, pcb->flags, pcb->rx_cred, pcb->tx_cred); pcb->tx_cred += *mtod(m0, u_int8_t *); m_adj(m0, 1); /* Send more from the DLC. XXX check for errors? */ ng_btsocket_rfcomm_pcb_send(pcb, ALOT); } /* OK the of the rest of the mbuf is the data */ if (m0->m_pkthdr.len > 0) { /* If we are using credit flow control decrease rx_cred here */ if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC) { /* Give remote peer more credits (if needed) */ if (-- pcb->rx_cred <= RFCOMM_MAX_CREDITS / 2) ng_btsocket_rfcomm_send_credits(pcb); else NG_BTSOCKET_RFCOMM_INFO( "%s: Remote side still has credits, dlci=%d, state=%d, flags=%#x, " \ "rx_cred=%d, tx_cred=%d\n", __func__, dlci, pcb->state, pcb->flags, pcb->rx_cred, pcb->tx_cred); } /* Check packet against mtu on dlci */ if (m0->m_pkthdr.len > pcb->mtu) { NG_BTSOCKET_RFCOMM_ERR( "%s: Got oversized UIH for dlci=%d, state=%d, flags=%#x, mtu=%d, len=%d\n", __func__, dlci, pcb->state, pcb->flags, pcb->mtu, m0->m_pkthdr.len); error = EMSGSIZE; } else if (m0->m_pkthdr.len > sbspace(&pcb->so->so_rcv)) { /* * This is really bad. Receive queue on socket does * not have enough space for the packet. We do not * have any other choice but drop the packet. */ NG_BTSOCKET_RFCOMM_ERR( "%s: Not enough space in socket receive queue. Dropping UIH for dlci=%d, " \ "state=%d, flags=%#x, len=%d, space=%ld\n", __func__, dlci, pcb->state, pcb->flags, m0->m_pkthdr.len, sbspace(&pcb->so->so_rcv)); error = ENOBUFS; } else { /* Append packet to the socket receive queue */ sbappend(&pcb->so->so_rcv, m0, 0); m0 = NULL; sorwakeup(pcb->so); } } drop1: mtx_unlock(&pcb->pcb_mtx); drop: NG_FREE_M(m0); /* checks for != NULL */ return (error); } /* ng_btsocket_rfcomm_receive_uih */ /* * Process RFCOMM MCC command (Multiplexor) * * From TS 07.10 spec * * "5.4.3.1 Information Data * * ...The frames (UIH) sent by the initiating station have the C/R bit set * to 1 and those sent by the responding station have the C/R bit set to 0..." * * "5.4.6.2 Operating procedures * * Messages always exist in pairs; a command message and a corresponding * response message. If the C/R bit is set to 1 the message is a command, * if it is set to 0 the message is a response... * * ... * * NOTE: Notice that when UIH frames are used to convey information on DLCI 0 * there are at least two different fields that contain a C/R bit, and the * bits are set of different form. The C/R bit in the Type field shall be set * as it is stated above, while the C/R bit in the Address field (see subclause * 5.2.1.2) shall be set as it is described in subclause 5.4.3.1." */ static int ng_btsocket_rfcomm_receive_mcc(ng_btsocket_rfcomm_session_p s, struct mbuf *m0) { struct rfcomm_mcc_hdr *hdr = NULL; u_int8_t cr, type, length; mtx_assert(&s->session_mtx, MA_OWNED); /* * We can access data directly in the first mbuf, because we have * m_pullup()'ed mbuf chain in ng_btsocket_rfcomm_receive_frame(). * All MCC commands should fit into single mbuf (except probably TEST). */ hdr = mtod(m0, struct rfcomm_mcc_hdr *); cr = RFCOMM_CR(hdr->type); type = RFCOMM_MCC_TYPE(hdr->type); length = RFCOMM_MCC_LENGTH(hdr->length); /* Check MCC frame length */ if (sizeof(*hdr) + length != m0->m_pkthdr.len) { NG_BTSOCKET_RFCOMM_ERR( "%s: Invalid MCC frame length=%d, len=%d\n", __func__, length, m0->m_pkthdr.len); NG_FREE_M(m0); return (EMSGSIZE); } switch (type) { case RFCOMM_MCC_TEST: return (ng_btsocket_rfcomm_receive_test(s, m0)); /* NOT REACHED */ case RFCOMM_MCC_FCON: case RFCOMM_MCC_FCOFF: return (ng_btsocket_rfcomm_receive_fc(s, m0)); /* NOT REACHED */ case RFCOMM_MCC_MSC: return (ng_btsocket_rfcomm_receive_msc(s, m0)); /* NOT REACHED */ case RFCOMM_MCC_RPN: return (ng_btsocket_rfcomm_receive_rpn(s, m0)); /* NOT REACHED */ case RFCOMM_MCC_RLS: return (ng_btsocket_rfcomm_receive_rls(s, m0)); /* NOT REACHED */ case RFCOMM_MCC_PN: return (ng_btsocket_rfcomm_receive_pn(s, m0)); /* NOT REACHED */ case RFCOMM_MCC_NSC: NG_BTSOCKET_RFCOMM_ERR( "%s: Got MCC NSC, type=%#x, cr=%d, length=%d, session state=%d, flags=%#x, " \ "mtu=%d, len=%d\n", __func__, RFCOMM_MCC_TYPE(*((u_int8_t *)(hdr + 1))), cr, length, s->state, s->flags, s->mtu, m0->m_pkthdr.len); NG_FREE_M(m0); break; default: NG_BTSOCKET_RFCOMM_ERR( "%s: Got unknown MCC, type=%#x, cr=%d, length=%d, session state=%d, " \ "flags=%#x, mtu=%d, len=%d\n", __func__, type, cr, length, s->state, s->flags, s->mtu, m0->m_pkthdr.len); /* Reuse mbuf to send NSC */ hdr = mtod(m0, struct rfcomm_mcc_hdr *); m0->m_pkthdr.len = m0->m_len = sizeof(*hdr); /* Create MCC NSC header */ hdr->type = RFCOMM_MKMCC_TYPE(0, RFCOMM_MCC_NSC); hdr->length = RFCOMM_MKLEN8(1); /* Put back MCC command type we did not like */ m0->m_data[m0->m_len] = RFCOMM_MKMCC_TYPE(cr, type); m0->m_pkthdr.len ++; m0->m_len ++; /* Send UIH frame */ return (ng_btsocket_rfcomm_send_uih(s, RFCOMM_MKADDRESS(INITIATOR(s), 0), 0, 0, m0)); /* NOT REACHED */ } return (0); } /* ng_btsocket_rfcomm_receive_mcc */ /* * Receive RFCOMM TEST MCC command */ static int ng_btsocket_rfcomm_receive_test(ng_btsocket_rfcomm_session_p s, struct mbuf *m0) { struct rfcomm_mcc_hdr *hdr = mtod(m0, struct rfcomm_mcc_hdr *); int error = 0; mtx_assert(&s->session_mtx, MA_OWNED); NG_BTSOCKET_RFCOMM_INFO( "%s: Got MCC TEST, cr=%d, length=%d, session state=%d, flags=%#x, mtu=%d, " \ "len=%d\n", __func__, RFCOMM_CR(hdr->type), RFCOMM_MCC_LENGTH(hdr->length), s->state, s->flags, s->mtu, m0->m_pkthdr.len); if (RFCOMM_CR(hdr->type)) { hdr->type = RFCOMM_MKMCC_TYPE(0, RFCOMM_MCC_TEST); error = ng_btsocket_rfcomm_send_uih(s, RFCOMM_MKADDRESS(INITIATOR(s), 0), 0, 0, m0); } else NG_FREE_M(m0); /* XXX ignore response */ return (error); } /* ng_btsocket_rfcomm_receive_test */ /* * Receive RFCOMM FCON/FCOFF MCC command */ static int ng_btsocket_rfcomm_receive_fc(ng_btsocket_rfcomm_session_p s, struct mbuf *m0) { struct rfcomm_mcc_hdr *hdr = mtod(m0, struct rfcomm_mcc_hdr *); u_int8_t type = RFCOMM_MCC_TYPE(hdr->type); int error = 0; mtx_assert(&s->session_mtx, MA_OWNED); /* * Turn ON/OFF aggregate flow on the entire session. When remote peer * asserted flow control no transmission shall occur except on dlci 0 * (control channel). */ NG_BTSOCKET_RFCOMM_INFO( "%s: Got MCC FC%s, cr=%d, length=%d, session state=%d, flags=%#x, mtu=%d, " \ "len=%d\n", __func__, (type == RFCOMM_MCC_FCON)? "ON" : "OFF", RFCOMM_CR(hdr->type), RFCOMM_MCC_LENGTH(hdr->length), s->state, s->flags, s->mtu, m0->m_pkthdr.len); if (RFCOMM_CR(hdr->type)) { if (type == RFCOMM_MCC_FCON) s->flags &= ~NG_BTSOCKET_RFCOMM_SESSION_RFC; else s->flags |= NG_BTSOCKET_RFCOMM_SESSION_RFC; hdr->type = RFCOMM_MKMCC_TYPE(0, type); error = ng_btsocket_rfcomm_send_uih(s, RFCOMM_MKADDRESS(INITIATOR(s), 0), 0, 0, m0); } else NG_FREE_M(m0); /* XXX ignore response */ return (error); } /* ng_btsocket_rfcomm_receive_fc */ /* * Receive RFCOMM MSC MCC command */ static int ng_btsocket_rfcomm_receive_msc(ng_btsocket_rfcomm_session_p s, struct mbuf *m0) { struct rfcomm_mcc_hdr *hdr = mtod(m0, struct rfcomm_mcc_hdr*); struct rfcomm_mcc_msc *msc = (struct rfcomm_mcc_msc *)(hdr+1); ng_btsocket_rfcomm_pcb_t *pcb = NULL; int error = 0; mtx_assert(&s->session_mtx, MA_OWNED); NG_BTSOCKET_RFCOMM_INFO( "%s: Got MCC MSC, dlci=%d, cr=%d, length=%d, session state=%d, flags=%#x, " \ "mtu=%d, len=%d\n", __func__, RFCOMM_DLCI(msc->address), RFCOMM_CR(hdr->type), RFCOMM_MCC_LENGTH(hdr->length), s->state, s->flags, s->mtu, m0->m_pkthdr.len); if (RFCOMM_CR(hdr->type)) { pcb = ng_btsocket_rfcomm_pcb_by_dlci(s, RFCOMM_DLCI(msc->address)); if (pcb == NULL) { NG_BTSOCKET_RFCOMM_WARN( "%s: Got MSC command for non-existing dlci=%d\n", __func__, RFCOMM_DLCI(msc->address)); NG_FREE_M(m0); return (ENOENT); } mtx_lock(&pcb->pcb_mtx); if (pcb->state != NG_BTSOCKET_RFCOMM_DLC_CONNECTING && pcb->state != NG_BTSOCKET_RFCOMM_DLC_CONNECTED) { NG_BTSOCKET_RFCOMM_WARN( "%s: Got MSC on dlci=%d in invalid state=%d\n", __func__, RFCOMM_DLCI(msc->address), pcb->state); mtx_unlock(&pcb->pcb_mtx); NG_FREE_M(m0); return (EINVAL); } pcb->rmodem = msc->modem; /* Update remote port signals */ hdr->type = RFCOMM_MKMCC_TYPE(0, RFCOMM_MCC_MSC); error = ng_btsocket_rfcomm_send_uih(s, RFCOMM_MKADDRESS(INITIATOR(s), 0), 0, 0, m0); #if 0 /* YYY */ /* Send more data from DLC. XXX check for errors? */ if (!(pcb->rmodem & RFCOMM_MODEM_FC) && !(pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC)) ng_btsocket_rfcomm_pcb_send(pcb, ALOT); #endif /* YYY */ mtx_unlock(&pcb->pcb_mtx); } else NG_FREE_M(m0); /* XXX ignore response */ return (error); } /* ng_btsocket_rfcomm_receive_msc */ /* * Receive RFCOMM RPN MCC command * XXX FIXME do we need htole16/le16toh for RPN param_mask? */ static int ng_btsocket_rfcomm_receive_rpn(ng_btsocket_rfcomm_session_p s, struct mbuf *m0) { struct rfcomm_mcc_hdr *hdr = mtod(m0, struct rfcomm_mcc_hdr *); struct rfcomm_mcc_rpn *rpn = (struct rfcomm_mcc_rpn *)(hdr + 1); int error = 0; u_int16_t param_mask; u_int8_t bit_rate, data_bits, stop_bits, parity, flow_control, xon_char, xoff_char; mtx_assert(&s->session_mtx, MA_OWNED); NG_BTSOCKET_RFCOMM_INFO( "%s: Got MCC RPN, dlci=%d, cr=%d, length=%d, session state=%d, flags=%#x, " \ "mtu=%d, len=%d\n", __func__, RFCOMM_DLCI(rpn->dlci), RFCOMM_CR(hdr->type), RFCOMM_MCC_LENGTH(hdr->length), s->state, s->flags, s->mtu, m0->m_pkthdr.len); if (RFCOMM_CR(hdr->type)) { param_mask = RFCOMM_RPN_PM_ALL; if (RFCOMM_MCC_LENGTH(hdr->length) == 1) { /* Request - return default setting */ bit_rate = RFCOMM_RPN_BR_115200; data_bits = RFCOMM_RPN_DATA_8; stop_bits = RFCOMM_RPN_STOP_1; parity = RFCOMM_RPN_PARITY_NONE; flow_control = RFCOMM_RPN_FLOW_NONE; xon_char = RFCOMM_RPN_XON_CHAR; xoff_char = RFCOMM_RPN_XOFF_CHAR; } else { /* * Ignore/accept bit_rate, 8 bits, 1 stop bit, no * parity, no flow control lines, default XON/XOFF * chars. */ bit_rate = rpn->bit_rate; rpn->param_mask = le16toh(rpn->param_mask); /* XXX */ data_bits = RFCOMM_RPN_DATA_BITS(rpn->line_settings); if (rpn->param_mask & RFCOMM_RPN_PM_DATA && data_bits != RFCOMM_RPN_DATA_8) { data_bits = RFCOMM_RPN_DATA_8; param_mask ^= RFCOMM_RPN_PM_DATA; } stop_bits = RFCOMM_RPN_STOP_BITS(rpn->line_settings); if (rpn->param_mask & RFCOMM_RPN_PM_STOP && stop_bits != RFCOMM_RPN_STOP_1) { stop_bits = RFCOMM_RPN_STOP_1; param_mask ^= RFCOMM_RPN_PM_STOP; } parity = RFCOMM_RPN_PARITY(rpn->line_settings); if (rpn->param_mask & RFCOMM_RPN_PM_PARITY && parity != RFCOMM_RPN_PARITY_NONE) { parity = RFCOMM_RPN_PARITY_NONE; param_mask ^= RFCOMM_RPN_PM_PARITY; } flow_control = rpn->flow_control; if (rpn->param_mask & RFCOMM_RPN_PM_FLOW && flow_control != RFCOMM_RPN_FLOW_NONE) { flow_control = RFCOMM_RPN_FLOW_NONE; param_mask ^= RFCOMM_RPN_PM_FLOW; } xon_char = rpn->xon_char; if (rpn->param_mask & RFCOMM_RPN_PM_XON && xon_char != RFCOMM_RPN_XON_CHAR) { xon_char = RFCOMM_RPN_XON_CHAR; param_mask ^= RFCOMM_RPN_PM_XON; } xoff_char = rpn->xoff_char; if (rpn->param_mask & RFCOMM_RPN_PM_XOFF && xoff_char != RFCOMM_RPN_XOFF_CHAR) { xoff_char = RFCOMM_RPN_XOFF_CHAR; param_mask ^= RFCOMM_RPN_PM_XOFF; } } rpn->bit_rate = bit_rate; rpn->line_settings = RFCOMM_MKRPN_LINE_SETTINGS(data_bits, stop_bits, parity); rpn->flow_control = flow_control; rpn->xon_char = xon_char; rpn->xoff_char = xoff_char; rpn->param_mask = htole16(param_mask); /* XXX */ m0->m_pkthdr.len = m0->m_len = sizeof(*hdr) + sizeof(*rpn); hdr->type = RFCOMM_MKMCC_TYPE(0, RFCOMM_MCC_RPN); error = ng_btsocket_rfcomm_send_uih(s, RFCOMM_MKADDRESS(INITIATOR(s), 0), 0, 0, m0); } else NG_FREE_M(m0); /* XXX ignore response */ return (error); } /* ng_btsocket_rfcomm_receive_rpn */ /* * Receive RFCOMM RLS MCC command */ static int ng_btsocket_rfcomm_receive_rls(ng_btsocket_rfcomm_session_p s, struct mbuf *m0) { struct rfcomm_mcc_hdr *hdr = mtod(m0, struct rfcomm_mcc_hdr *); struct rfcomm_mcc_rls *rls = (struct rfcomm_mcc_rls *)(hdr + 1); int error = 0; mtx_assert(&s->session_mtx, MA_OWNED); /* * XXX FIXME Do we have to do anything else here? Remote peer tries to * tell us something about DLCI. Just report what we have received and * return back received values as required by TS 07.10 spec. */ NG_BTSOCKET_RFCOMM_INFO( "%s: Got MCC RLS, dlci=%d, status=%#x, cr=%d, length=%d, session state=%d, " \ "flags=%#x, mtu=%d, len=%d\n", __func__, RFCOMM_DLCI(rls->address), rls->status, RFCOMM_CR(hdr->type), RFCOMM_MCC_LENGTH(hdr->length), s->state, s->flags, s->mtu, m0->m_pkthdr.len); if (RFCOMM_CR(hdr->type)) { if (rls->status & 0x1) NG_BTSOCKET_RFCOMM_ERR( "%s: Got RLS dlci=%d, error=%#x\n", __func__, RFCOMM_DLCI(rls->address), rls->status >> 1); hdr->type = RFCOMM_MKMCC_TYPE(0, RFCOMM_MCC_RLS); error = ng_btsocket_rfcomm_send_uih(s, RFCOMM_MKADDRESS(INITIATOR(s), 0), 0, 0, m0); } else NG_FREE_M(m0); /* XXX ignore responses */ return (error); } /* ng_btsocket_rfcomm_receive_rls */ /* * Receive RFCOMM PN MCC command */ static int ng_btsocket_rfcomm_receive_pn(ng_btsocket_rfcomm_session_p s, struct mbuf *m0) { struct rfcomm_mcc_hdr *hdr = mtod(m0, struct rfcomm_mcc_hdr*); struct rfcomm_mcc_pn *pn = (struct rfcomm_mcc_pn *)(hdr+1); ng_btsocket_rfcomm_pcb_t *pcb = NULL; int error = 0; mtx_assert(&s->session_mtx, MA_OWNED); NG_BTSOCKET_RFCOMM_INFO( "%s: Got MCC PN, dlci=%d, cr=%d, length=%d, flow_control=%#x, priority=%d, " \ "ack_timer=%d, mtu=%d, max_retrans=%d, credits=%d, session state=%d, " \ "flags=%#x, session mtu=%d, len=%d\n", __func__, pn->dlci, RFCOMM_CR(hdr->type), RFCOMM_MCC_LENGTH(hdr->length), pn->flow_control, pn->priority, pn->ack_timer, le16toh(pn->mtu), pn->max_retrans, pn->credits, s->state, s->flags, s->mtu, m0->m_pkthdr.len); if (pn->dlci == 0) { NG_BTSOCKET_RFCOMM_ERR("%s: Zero dlci in MCC PN\n", __func__); NG_FREE_M(m0); return (EINVAL); } /* Check if we have this dlci */ pcb = ng_btsocket_rfcomm_pcb_by_dlci(s, pn->dlci); if (pcb != NULL) { mtx_lock(&pcb->pcb_mtx); if (RFCOMM_CR(hdr->type)) { /* PN Request */ ng_btsocket_rfcomm_set_pn(pcb, 1, pn->flow_control, pn->credits, pn->mtu); if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC) { pn->flow_control = 0xe0; pn->credits = RFCOMM_DEFAULT_CREDITS; } else { pn->flow_control = 0; pn->credits = 0; } hdr->type = RFCOMM_MKMCC_TYPE(0, RFCOMM_MCC_PN); error = ng_btsocket_rfcomm_send_uih(s, RFCOMM_MKADDRESS(INITIATOR(s), 0), 0, 0, m0); } else { /* PN Response - proceed with SABM. Timeout still set */ if (pcb->state == NG_BTSOCKET_RFCOMM_DLC_CONFIGURING) { ng_btsocket_rfcomm_set_pn(pcb, 0, pn->flow_control, pn->credits, pn->mtu); pcb->state = NG_BTSOCKET_RFCOMM_DLC_CONNECTING; error = ng_btsocket_rfcomm_send_command(s, RFCOMM_FRAME_SABM, pn->dlci); } else NG_BTSOCKET_RFCOMM_WARN( "%s: Got PN response for dlci=%d in invalid state=%d\n", __func__, pn->dlci, pcb->state); NG_FREE_M(m0); } mtx_unlock(&pcb->pcb_mtx); } else if (RFCOMM_CR(hdr->type)) { /* PN request to non-existing dlci - incoming connection */ pcb = ng_btsocket_rfcomm_connect_ind(s, RFCOMM_SRVCHANNEL(pn->dlci)); if (pcb != NULL) { mtx_lock(&pcb->pcb_mtx); pcb->dlci = pn->dlci; ng_btsocket_rfcomm_set_pn(pcb, 1, pn->flow_control, pn->credits, pn->mtu); if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC) { pn->flow_control = 0xe0; pn->credits = RFCOMM_DEFAULT_CREDITS; } else { pn->flow_control = 0; pn->credits = 0; } hdr->type = RFCOMM_MKMCC_TYPE(0, RFCOMM_MCC_PN); error = ng_btsocket_rfcomm_send_uih(s, RFCOMM_MKADDRESS(INITIATOR(s), 0), 0, 0, m0); if (error == 0) { ng_btsocket_rfcomm_timeout(pcb); pcb->state = NG_BTSOCKET_RFCOMM_DLC_CONNECTING; soisconnecting(pcb->so); } else ng_btsocket_rfcomm_pcb_kill(pcb, error); mtx_unlock(&pcb->pcb_mtx); } else { /* Nobody is listen()ing on this channel */ error = ng_btsocket_rfcomm_send_command(s, RFCOMM_FRAME_DM, pn->dlci); NG_FREE_M(m0); } } else NG_FREE_M(m0); /* XXX ignore response to non-existing dlci */ return (error); } /* ng_btsocket_rfcomm_receive_pn */ /* * Set PN parameters for dlci. Caller must hold pcb->pcb_mtx. * * From Bluetooth spec. * * "... The CL1 - CL4 field is completely redefined. (In TS07.10 this defines * the convergence layer to use, which is not applicable to RFCOMM. In RFCOMM, * in Bluetooth versions up to 1.0B, this field was forced to 0). * * In the PN request sent prior to a DLC establishment, this field must contain * the value 15 (0xF), indicating support of credit based flow control in the * sender. See Table 5.3 below. If the PN response contains any other value * than 14 (0xE) in this field, it is inferred that the peer RFCOMM entity is * not supporting the credit based flow control feature. (This is only possible * if the peer RFCOMM implementation is only conforming to Bluetooth version * 1.0B.) If a PN request is sent on an already open DLC, then this field must * contain the value zero; it is not possible to set initial credits more * than once per DLC activation. A responding implementation must set this * field in the PN response to 14 (0xE), if (and only if) the value in the PN * request was 15..." */ static void ng_btsocket_rfcomm_set_pn(ng_btsocket_rfcomm_pcb_p pcb, u_int8_t cr, u_int8_t flow_control, u_int8_t credits, u_int16_t mtu) { mtx_assert(&pcb->pcb_mtx, MA_OWNED); pcb->mtu = le16toh(mtu); if (cr) { if (flow_control == 0xf0) { pcb->flags |= NG_BTSOCKET_RFCOMM_DLC_CFC; pcb->tx_cred = credits; } else { pcb->flags &= ~NG_BTSOCKET_RFCOMM_DLC_CFC; pcb->tx_cred = 0; } } else { if (flow_control == 0xe0) { pcb->flags |= NG_BTSOCKET_RFCOMM_DLC_CFC; pcb->tx_cred = credits; } else { pcb->flags &= ~NG_BTSOCKET_RFCOMM_DLC_CFC; pcb->tx_cred = 0; } } NG_BTSOCKET_RFCOMM_INFO( "%s: cr=%d, dlci=%d, state=%d, flags=%#x, mtu=%d, rx_cred=%d, tx_cred=%d\n", __func__, cr, pcb->dlci, pcb->state, pcb->flags, pcb->mtu, pcb->rx_cred, pcb->tx_cred); } /* ng_btsocket_rfcomm_set_pn */ /* * Send RFCOMM SABM/DISC/UA/DM frames. Caller must hold s->session_mtx */ static int ng_btsocket_rfcomm_send_command(ng_btsocket_rfcomm_session_p s, u_int8_t type, u_int8_t dlci) { struct rfcomm_cmd_hdr *hdr = NULL; struct mbuf *m = NULL; int cr; mtx_assert(&s->session_mtx, MA_OWNED); NG_BTSOCKET_RFCOMM_INFO( "%s: Sending command type %#x, session state=%d, flags=%#x, mtu=%d, dlci=%d\n", __func__, type, s->state, s->flags, s->mtu, dlci); switch (type) { case RFCOMM_FRAME_SABM: case RFCOMM_FRAME_DISC: cr = INITIATOR(s); break; case RFCOMM_FRAME_UA: case RFCOMM_FRAME_DM: cr = !INITIATOR(s); break; default: panic("%s: Invalid frame type=%#x\n", __func__, type); return (EINVAL); /* NOT REACHED */ } MGETHDR(m, M_NOWAIT, MT_DATA); if (m == NULL) return (ENOBUFS); m->m_pkthdr.len = m->m_len = sizeof(*hdr); hdr = mtod(m, struct rfcomm_cmd_hdr *); hdr->address = RFCOMM_MKADDRESS(cr, dlci); hdr->control = RFCOMM_MKCONTROL(type, 1); hdr->length = RFCOMM_MKLEN8(0); hdr->fcs = ng_btsocket_rfcomm_fcs3((u_int8_t *) hdr); NG_BT_MBUFQ_ENQUEUE(&s->outq, m); return (0); } /* ng_btsocket_rfcomm_send_command */ /* * Send RFCOMM UIH frame. Caller must hold s->session_mtx */ static int ng_btsocket_rfcomm_send_uih(ng_btsocket_rfcomm_session_p s, u_int8_t address, u_int8_t pf, u_int8_t credits, struct mbuf *data) { struct rfcomm_frame_hdr *hdr = NULL; struct mbuf *m = NULL, *mcrc = NULL; u_int16_t length; mtx_assert(&s->session_mtx, MA_OWNED); MGETHDR(m, M_NOWAIT, MT_DATA); if (m == NULL) { NG_FREE_M(data); return (ENOBUFS); } m->m_pkthdr.len = m->m_len = sizeof(*hdr); MGET(mcrc, M_NOWAIT, MT_DATA); if (mcrc == NULL) { NG_FREE_M(data); return (ENOBUFS); } mcrc->m_len = 1; /* Fill UIH frame header */ hdr = mtod(m, struct rfcomm_frame_hdr *); hdr->address = address; hdr->control = RFCOMM_MKCONTROL(RFCOMM_FRAME_UIH, pf); /* Calculate FCS */ mcrc->m_data[0] = ng_btsocket_rfcomm_fcs2((u_int8_t *) hdr); /* Put length back */ length = (data != NULL)? data->m_pkthdr.len : 0; if (length > 127) { u_int16_t l = htole16(RFCOMM_MKLEN16(length)); bcopy(&l, &hdr->length, sizeof(l)); m->m_pkthdr.len ++; m->m_len ++; } else hdr->length = RFCOMM_MKLEN8(length); if (pf) { m->m_data[m->m_len] = credits; m->m_pkthdr.len ++; m->m_len ++; } /* Add payload */ if (data != NULL) { m_cat(m, data); m->m_pkthdr.len += length; } /* Put FCS back */ m_cat(m, mcrc); m->m_pkthdr.len ++; NG_BTSOCKET_RFCOMM_INFO( "%s: Sending UIH state=%d, flags=%#x, address=%d, length=%d, pf=%d, " \ "credits=%d, len=%d\n", __func__, s->state, s->flags, address, length, pf, credits, m->m_pkthdr.len); NG_BT_MBUFQ_ENQUEUE(&s->outq, m); return (0); } /* ng_btsocket_rfcomm_send_uih */ /* * Send MSC request. Caller must hold pcb->pcb_mtx and pcb->session->session_mtx */ static int ng_btsocket_rfcomm_send_msc(ng_btsocket_rfcomm_pcb_p pcb) { struct mbuf *m = NULL; struct rfcomm_mcc_hdr *hdr = NULL; struct rfcomm_mcc_msc *msc = NULL; mtx_assert(&pcb->session->session_mtx, MA_OWNED); mtx_assert(&pcb->pcb_mtx, MA_OWNED); MGETHDR(m, M_NOWAIT, MT_DATA); if (m == NULL) return (ENOBUFS); m->m_pkthdr.len = m->m_len = sizeof(*hdr) + sizeof(*msc); hdr = mtod(m, struct rfcomm_mcc_hdr *); msc = (struct rfcomm_mcc_msc *)(hdr + 1); hdr->type = RFCOMM_MKMCC_TYPE(1, RFCOMM_MCC_MSC); hdr->length = RFCOMM_MKLEN8(sizeof(*msc)); msc->address = RFCOMM_MKADDRESS(1, pcb->dlci); msc->modem = pcb->lmodem; NG_BTSOCKET_RFCOMM_INFO( "%s: Sending MSC dlci=%d, state=%d, flags=%#x, address=%d, modem=%#x\n", __func__, pcb->dlci, pcb->state, pcb->flags, msc->address, msc->modem); return (ng_btsocket_rfcomm_send_uih(pcb->session, RFCOMM_MKADDRESS(INITIATOR(pcb->session), 0), 0, 0, m)); } /* ng_btsocket_rfcomm_send_msc */ /* * Send PN request. Caller must hold pcb->pcb_mtx and pcb->session->session_mtx */ static int ng_btsocket_rfcomm_send_pn(ng_btsocket_rfcomm_pcb_p pcb) { struct mbuf *m = NULL; struct rfcomm_mcc_hdr *hdr = NULL; struct rfcomm_mcc_pn *pn = NULL; mtx_assert(&pcb->session->session_mtx, MA_OWNED); mtx_assert(&pcb->pcb_mtx, MA_OWNED); MGETHDR(m, M_NOWAIT, MT_DATA); if (m == NULL) return (ENOBUFS); m->m_pkthdr.len = m->m_len = sizeof(*hdr) + sizeof(*pn); hdr = mtod(m, struct rfcomm_mcc_hdr *); pn = (struct rfcomm_mcc_pn *)(hdr + 1); hdr->type = RFCOMM_MKMCC_TYPE(1, RFCOMM_MCC_PN); hdr->length = RFCOMM_MKLEN8(sizeof(*pn)); pn->dlci = pcb->dlci; /* * Set default DLCI priority as described in GSM 07.10 * (ETSI TS 101 369) clause 5.6 page 42 */ pn->priority = (pcb->dlci < 56)? (((pcb->dlci >> 3) << 3) + 7) : 61; pn->ack_timer = 0; pn->mtu = htole16(pcb->mtu); pn->max_retrans = 0; if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC) { pn->flow_control = 0xf0; pn->credits = pcb->rx_cred; } else { pn->flow_control = 0; pn->credits = 0; } NG_BTSOCKET_RFCOMM_INFO( "%s: Sending PN dlci=%d, state=%d, flags=%#x, mtu=%d, flow_control=%#x, " \ "credits=%d\n", __func__, pcb->dlci, pcb->state, pcb->flags, pcb->mtu, pn->flow_control, pn->credits); return (ng_btsocket_rfcomm_send_uih(pcb->session, RFCOMM_MKADDRESS(INITIATOR(pcb->session), 0), 0, 0, m)); } /* ng_btsocket_rfcomm_send_pn */ /* * Calculate and send credits based on available space in receive buffer */ static int ng_btsocket_rfcomm_send_credits(ng_btsocket_rfcomm_pcb_p pcb) { int error = 0; u_int8_t credits; mtx_assert(&pcb->pcb_mtx, MA_OWNED); mtx_assert(&pcb->session->session_mtx, MA_OWNED); NG_BTSOCKET_RFCOMM_INFO( "%s: Sending more credits, dlci=%d, state=%d, flags=%#x, mtu=%d, " \ "space=%ld, tx_cred=%d, rx_cred=%d\n", __func__, pcb->dlci, pcb->state, pcb->flags, pcb->mtu, sbspace(&pcb->so->so_rcv), pcb->tx_cred, pcb->rx_cred); credits = sbspace(&pcb->so->so_rcv) / pcb->mtu; if (credits > 0) { if (pcb->rx_cred + credits > RFCOMM_MAX_CREDITS) credits = RFCOMM_MAX_CREDITS - pcb->rx_cred; error = ng_btsocket_rfcomm_send_uih( pcb->session, RFCOMM_MKADDRESS(INITIATOR(pcb->session), pcb->dlci), 1, credits, NULL); if (error == 0) { pcb->rx_cred += credits; NG_BTSOCKET_RFCOMM_INFO( "%s: Gave remote side %d more credits, dlci=%d, state=%d, flags=%#x, " \ "rx_cred=%d, tx_cred=%d\n", __func__, credits, pcb->dlci, pcb->state, pcb->flags, pcb->rx_cred, pcb->tx_cred); } else NG_BTSOCKET_RFCOMM_ERR( "%s: Could not send credits, error=%d, dlci=%d, state=%d, flags=%#x, " \ "mtu=%d, space=%ld, tx_cred=%d, rx_cred=%d\n", __func__, error, pcb->dlci, pcb->state, pcb->flags, pcb->mtu, sbspace(&pcb->so->so_rcv), pcb->tx_cred, pcb->rx_cred); } return (error); } /* ng_btsocket_rfcomm_send_credits */ /***************************************************************************** ***************************************************************************** ** RFCOMM DLCs ***************************************************************************** *****************************************************************************/ /* * Send data from socket send buffer * Caller must hold pcb->pcb_mtx and pcb->session->session_mtx */ static int ng_btsocket_rfcomm_pcb_send(ng_btsocket_rfcomm_pcb_p pcb, int limit) { struct mbuf *m = NULL; int sent, length, error; mtx_assert(&pcb->session->session_mtx, MA_OWNED); mtx_assert(&pcb->pcb_mtx, MA_OWNED); if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC) limit = min(limit, pcb->tx_cred); else if (!(pcb->rmodem & RFCOMM_MODEM_FC)) limit = min(limit, RFCOMM_MAX_CREDITS); /* XXX ??? */ else limit = 0; if (limit == 0) { NG_BTSOCKET_RFCOMM_INFO( "%s: Could not send - remote flow control asserted, dlci=%d, flags=%#x, " \ "rmodem=%#x, tx_cred=%d\n", __func__, pcb->dlci, pcb->flags, pcb->rmodem, pcb->tx_cred); return (0); } for (error = 0, sent = 0; sent < limit; sent ++) { length = min(pcb->mtu, sbavail(&pcb->so->so_snd)); if (length == 0) break; /* Get the chunk from the socket's send buffer */ m = ng_btsocket_rfcomm_prepare_packet(&pcb->so->so_snd, length); if (m == NULL) { error = ENOBUFS; break; } sbdrop(&pcb->so->so_snd, length); error = ng_btsocket_rfcomm_send_uih(pcb->session, RFCOMM_MKADDRESS(INITIATOR(pcb->session), pcb->dlci), 0, 0, m); if (error != 0) break; } if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC) pcb->tx_cred -= sent; if (error == 0 && sent > 0) { pcb->flags &= ~NG_BTSOCKET_RFCOMM_DLC_SENDING; sowwakeup(pcb->so); } return (error); } /* ng_btsocket_rfcomm_pcb_send */ /* * Unlink and disconnect DLC. If ng_btsocket_rfcomm_pcb_kill() returns * non zero value than socket has no reference and has to be detached. * Caller must hold pcb->pcb_mtx and pcb->session->session_mtx */ static void ng_btsocket_rfcomm_pcb_kill(ng_btsocket_rfcomm_pcb_p pcb, int error) { ng_btsocket_rfcomm_session_p s = pcb->session; NG_BTSOCKET_RFCOMM_INFO( "%s: Killing DLC, so=%p, dlci=%d, state=%d, flags=%#x, error=%d\n", __func__, pcb->so, pcb->dlci, pcb->state, pcb->flags, error); if (pcb->session == NULL) panic("%s: DLC without session, pcb=%p, state=%d, flags=%#x\n", __func__, pcb, pcb->state, pcb->flags); mtx_assert(&pcb->session->session_mtx, MA_OWNED); mtx_assert(&pcb->pcb_mtx, MA_OWNED); if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMO) ng_btsocket_rfcomm_untimeout(pcb); /* Detach DLC from the session. Does not matter which state DLC in */ LIST_REMOVE(pcb, session_next); pcb->session = NULL; /* Change DLC state and wakeup all sleepers */ pcb->state = NG_BTSOCKET_RFCOMM_DLC_CLOSED; pcb->so->so_error = error; soisdisconnected(pcb->so); wakeup(&pcb->state); /* Check if we have any DLCs left on the session */ if (LIST_EMPTY(&s->dlcs) && INITIATOR(s)) { NG_BTSOCKET_RFCOMM_INFO( "%s: Disconnecting session, state=%d, flags=%#x, mtu=%d\n", __func__, s->state, s->flags, s->mtu); switch (s->state) { case NG_BTSOCKET_RFCOMM_SESSION_CLOSED: case NG_BTSOCKET_RFCOMM_SESSION_DISCONNECTING: /* * Do not have to do anything here. We can get here * when L2CAP connection was terminated or we have * received DISC on multiplexor channel */ break; case NG_BTSOCKET_RFCOMM_SESSION_OPEN: /* Send DISC on multiplexor channel */ error = ng_btsocket_rfcomm_send_command(s, RFCOMM_FRAME_DISC, 0); if (error == 0) { s->state = NG_BTSOCKET_RFCOMM_SESSION_DISCONNECTING; break; } /* FALL THROUGH */ case NG_BTSOCKET_RFCOMM_SESSION_CONNECTING: case NG_BTSOCKET_RFCOMM_SESSION_CONNECTED: s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED; break; /* case NG_BTSOCKET_RFCOMM_SESSION_LISTENING: */ default: panic("%s: Invalid session state=%d, flags=%#x\n", __func__, s->state, s->flags); break; } ng_btsocket_rfcomm_task_wakeup(); } } /* ng_btsocket_rfcomm_pcb_kill */ /* * Look for given dlci for given RFCOMM session. Caller must hold s->session_mtx */ static ng_btsocket_rfcomm_pcb_p ng_btsocket_rfcomm_pcb_by_dlci(ng_btsocket_rfcomm_session_p s, int dlci) { ng_btsocket_rfcomm_pcb_p pcb = NULL; mtx_assert(&s->session_mtx, MA_OWNED); LIST_FOREACH(pcb, &s->dlcs, session_next) if (pcb->dlci == dlci) break; return (pcb); } /* ng_btsocket_rfcomm_pcb_by_dlci */ /* * Look for socket that listens on given src address and given channel */ static ng_btsocket_rfcomm_pcb_p ng_btsocket_rfcomm_pcb_listener(bdaddr_p src, int channel) { ng_btsocket_rfcomm_pcb_p pcb = NULL, pcb1 = NULL; mtx_lock(&ng_btsocket_rfcomm_sockets_mtx); LIST_FOREACH(pcb, &ng_btsocket_rfcomm_sockets, next) { if (pcb->channel != channel || !SOLISTENING(pcb->so)) continue; if (bcmp(&pcb->src, src, sizeof(*src)) == 0) break; if (bcmp(&pcb->src, NG_HCI_BDADDR_ANY, sizeof(bdaddr_t)) == 0) pcb1 = pcb; } mtx_unlock(&ng_btsocket_rfcomm_sockets_mtx); return ((pcb != NULL)? pcb : pcb1); } /* ng_btsocket_rfcomm_pcb_listener */ /***************************************************************************** ***************************************************************************** ** Misc. functions ***************************************************************************** *****************************************************************************/ /* * Set timeout. Caller MUST hold pcb_mtx */ static void ng_btsocket_rfcomm_timeout(ng_btsocket_rfcomm_pcb_p pcb) { mtx_assert(&pcb->pcb_mtx, MA_OWNED); if (!(pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMO)) { pcb->flags |= NG_BTSOCKET_RFCOMM_DLC_TIMO; pcb->flags &= ~NG_BTSOCKET_RFCOMM_DLC_TIMEDOUT; callout_reset(&pcb->timo, ng_btsocket_rfcomm_timo * hz, ng_btsocket_rfcomm_process_timeout, pcb); } else panic("%s: Duplicated socket timeout?!\n", __func__); } /* ng_btsocket_rfcomm_timeout */ /* * Unset pcb timeout. Caller MUST hold pcb_mtx */ static void ng_btsocket_rfcomm_untimeout(ng_btsocket_rfcomm_pcb_p pcb) { mtx_assert(&pcb->pcb_mtx, MA_OWNED); if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMO) { callout_stop(&pcb->timo); pcb->flags &= ~NG_BTSOCKET_RFCOMM_DLC_TIMO; pcb->flags &= ~NG_BTSOCKET_RFCOMM_DLC_TIMEDOUT; } else panic("%s: No socket timeout?!\n", __func__); } /* ng_btsocket_rfcomm_timeout */ /* * Process pcb timeout */ static void ng_btsocket_rfcomm_process_timeout(void *xpcb) { ng_btsocket_rfcomm_pcb_p pcb = (ng_btsocket_rfcomm_pcb_p) xpcb; mtx_assert(&pcb->pcb_mtx, MA_OWNED); NG_BTSOCKET_RFCOMM_INFO( "%s: Timeout, so=%p, dlci=%d, state=%d, flags=%#x\n", __func__, pcb->so, pcb->dlci, pcb->state, pcb->flags); pcb->flags &= ~NG_BTSOCKET_RFCOMM_DLC_TIMO; pcb->flags |= NG_BTSOCKET_RFCOMM_DLC_TIMEDOUT; switch (pcb->state) { case NG_BTSOCKET_RFCOMM_DLC_CONFIGURING: case NG_BTSOCKET_RFCOMM_DLC_CONNECTING: pcb->state = NG_BTSOCKET_RFCOMM_DLC_DISCONNECTING; break; case NG_BTSOCKET_RFCOMM_DLC_W4_CONNECT: case NG_BTSOCKET_RFCOMM_DLC_DISCONNECTING: break; default: panic( "%s: DLC timeout in invalid state, dlci=%d, state=%d, flags=%#x\n", __func__, pcb->dlci, pcb->state, pcb->flags); break; } ng_btsocket_rfcomm_task_wakeup(); } /* ng_btsocket_rfcomm_process_timeout */ /* * Get up to length bytes from the socket buffer */ static struct mbuf * ng_btsocket_rfcomm_prepare_packet(struct sockbuf *sb, int length) { struct mbuf *top = NULL, *m = NULL, *n = NULL, *nextpkt = NULL; int mlen, noff, len; MGETHDR(top, M_NOWAIT, MT_DATA); if (top == NULL) return (NULL); top->m_pkthdr.len = length; top->m_len = 0; mlen = MHLEN; m = top; n = sb->sb_mb; nextpkt = n->m_nextpkt; noff = 0; while (length > 0 && n != NULL) { len = min(mlen - m->m_len, n->m_len - noff); if (len > length) len = length; bcopy(mtod(n, caddr_t)+noff, mtod(m, caddr_t)+m->m_len, len); m->m_len += len; noff += len; length -= len; if (length > 0 && m->m_len == mlen) { MGET(m->m_next, M_NOWAIT, MT_DATA); if (m->m_next == NULL) { NG_FREE_M(top); return (NULL); } m = m->m_next; m->m_len = 0; mlen = MLEN; } if (noff == n->m_len) { noff = 0; n = n->m_next; if (n == NULL) n = nextpkt; nextpkt = (n != NULL)? n->m_nextpkt : NULL; } } if (length < 0) panic("%s: length=%d\n", __func__, length); if (length > 0 && n == NULL) panic("%s: bogus length=%d, n=%p\n", __func__, length, n); return (top); } /* ng_btsocket_rfcomm_prepare_packet */ diff --git a/sys/netgraph/bluetooth/socket/ng_btsocket_sco.c b/sys/netgraph/bluetooth/socket/ng_btsocket_sco.c index 5e198956a829..d9700f91c132 100644 --- a/sys/netgraph/bluetooth/socket/ng_btsocket_sco.c +++ b/sys/netgraph/bluetooth/socket/ng_btsocket_sco.c @@ -1,1978 +1,1988 @@ /* * ng_btsocket_sco.c */ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2001-2002 Maksim Yevmenkin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $Id: ng_btsocket_sco.c,v 1.2 2005/10/31 18:08:51 max Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* MALLOC define */ #ifdef NG_SEPARATE_MALLOC static MALLOC_DEFINE(M_NETGRAPH_BTSOCKET_SCO, "netgraph_btsocks_sco", "Netgraph Bluetooth SCO sockets"); #else #define M_NETGRAPH_BTSOCKET_SCO M_NETGRAPH #endif /* NG_SEPARATE_MALLOC */ /* Netgraph node methods */ static ng_constructor_t ng_btsocket_sco_node_constructor; static ng_rcvmsg_t ng_btsocket_sco_node_rcvmsg; static ng_shutdown_t ng_btsocket_sco_node_shutdown; static ng_newhook_t ng_btsocket_sco_node_newhook; static ng_connect_t ng_btsocket_sco_node_connect; static ng_rcvdata_t ng_btsocket_sco_node_rcvdata; static ng_disconnect_t ng_btsocket_sco_node_disconnect; static void ng_btsocket_sco_input (void *, int); static void ng_btsocket_sco_rtclean (void *, int); /* Netgraph type descriptor */ static struct ng_type typestruct = { .version = NG_ABI_VERSION, .name = NG_BTSOCKET_SCO_NODE_TYPE, .constructor = ng_btsocket_sco_node_constructor, .rcvmsg = ng_btsocket_sco_node_rcvmsg, .shutdown = ng_btsocket_sco_node_shutdown, .newhook = ng_btsocket_sco_node_newhook, .connect = ng_btsocket_sco_node_connect, .rcvdata = ng_btsocket_sco_node_rcvdata, .disconnect = ng_btsocket_sco_node_disconnect, }; /* Globals */ static u_int32_t ng_btsocket_sco_debug_level; static node_p ng_btsocket_sco_node; static struct ng_bt_itemq ng_btsocket_sco_queue; static struct mtx ng_btsocket_sco_queue_mtx; static struct task ng_btsocket_sco_queue_task; static struct mtx ng_btsocket_sco_sockets_mtx; static LIST_HEAD(, ng_btsocket_sco_pcb) ng_btsocket_sco_sockets; static LIST_HEAD(, ng_btsocket_sco_rtentry) ng_btsocket_sco_rt; static struct mtx ng_btsocket_sco_rt_mtx; static struct task ng_btsocket_sco_rt_task; static struct timeval ng_btsocket_sco_lasttime; static int ng_btsocket_sco_curpps; /* Sysctl tree */ SYSCTL_DECL(_net_bluetooth_sco_sockets); static SYSCTL_NODE(_net_bluetooth_sco_sockets, OID_AUTO, seq, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "Bluetooth SEQPACKET SCO sockets family"); SYSCTL_UINT(_net_bluetooth_sco_sockets_seq, OID_AUTO, debug_level, CTLFLAG_RW, &ng_btsocket_sco_debug_level, NG_BTSOCKET_WARN_LEVEL, "Bluetooth SEQPACKET SCO sockets debug level"); SYSCTL_UINT(_net_bluetooth_sco_sockets_seq, OID_AUTO, queue_len, CTLFLAG_RD, &ng_btsocket_sco_queue.len, 0, "Bluetooth SEQPACKET SCO sockets input queue length"); SYSCTL_UINT(_net_bluetooth_sco_sockets_seq, OID_AUTO, queue_maxlen, CTLFLAG_RD, &ng_btsocket_sco_queue.maxlen, 0, "Bluetooth SEQPACKET SCO sockets input queue max. length"); SYSCTL_UINT(_net_bluetooth_sco_sockets_seq, OID_AUTO, queue_drops, CTLFLAG_RD, &ng_btsocket_sco_queue.drops, 0, "Bluetooth SEQPACKET SCO sockets input queue drops"); /* Debug */ #define NG_BTSOCKET_SCO_INFO \ if (ng_btsocket_sco_debug_level >= NG_BTSOCKET_INFO_LEVEL && \ ppsratecheck(&ng_btsocket_sco_lasttime, &ng_btsocket_sco_curpps, 1)) \ printf #define NG_BTSOCKET_SCO_WARN \ if (ng_btsocket_sco_debug_level >= NG_BTSOCKET_WARN_LEVEL && \ ppsratecheck(&ng_btsocket_sco_lasttime, &ng_btsocket_sco_curpps, 1)) \ printf #define NG_BTSOCKET_SCO_ERR \ if (ng_btsocket_sco_debug_level >= NG_BTSOCKET_ERR_LEVEL && \ ppsratecheck(&ng_btsocket_sco_lasttime, &ng_btsocket_sco_curpps, 1)) \ printf #define NG_BTSOCKET_SCO_ALERT \ if (ng_btsocket_sco_debug_level >= NG_BTSOCKET_ALERT_LEVEL && \ ppsratecheck(&ng_btsocket_sco_lasttime, &ng_btsocket_sco_curpps, 1)) \ printf /* * Netgraph message processing routines */ static int ng_btsocket_sco_process_lp_con_cfm (struct ng_mesg *, ng_btsocket_sco_rtentry_p); static int ng_btsocket_sco_process_lp_con_ind (struct ng_mesg *, ng_btsocket_sco_rtentry_p); static int ng_btsocket_sco_process_lp_discon_ind (struct ng_mesg *, ng_btsocket_sco_rtentry_p); /* * Send LP messages to the lower layer */ static int ng_btsocket_sco_send_lp_con_req (ng_btsocket_sco_pcb_p); static int ng_btsocket_sco_send_lp_con_rsp (ng_btsocket_sco_rtentry_p, bdaddr_p, int); static int ng_btsocket_sco_send_lp_discon_req (ng_btsocket_sco_pcb_p); static int ng_btsocket_sco_send2 (ng_btsocket_sco_pcb_p); /* * Timeout processing routines */ static void ng_btsocket_sco_timeout (ng_btsocket_sco_pcb_p); static void ng_btsocket_sco_untimeout (ng_btsocket_sco_pcb_p); static void ng_btsocket_sco_process_timeout (void *); /* * Other stuff */ static ng_btsocket_sco_pcb_p ng_btsocket_sco_pcb_by_addr(bdaddr_p); static ng_btsocket_sco_pcb_p ng_btsocket_sco_pcb_by_handle(bdaddr_p, int); static ng_btsocket_sco_pcb_p ng_btsocket_sco_pcb_by_addrs(bdaddr_p, bdaddr_p); #define ng_btsocket_sco_wakeup_input_task() \ taskqueue_enqueue(taskqueue_swi, &ng_btsocket_sco_queue_task) #define ng_btsocket_sco_wakeup_route_task() \ taskqueue_enqueue(taskqueue_swi, &ng_btsocket_sco_rt_task) /***************************************************************************** ***************************************************************************** ** Netgraph node interface ***************************************************************************** *****************************************************************************/ /* * Netgraph node constructor. Do not allow to create node of this type. */ static int ng_btsocket_sco_node_constructor(node_p node) { return (EINVAL); } /* ng_btsocket_sco_node_constructor */ /* * Do local shutdown processing. Let old node go and create new fresh one. */ static int ng_btsocket_sco_node_shutdown(node_p node) { int error = 0; NG_NODE_UNREF(node); /* Create new node */ error = ng_make_node_common(&typestruct, &ng_btsocket_sco_node); if (error != 0) { NG_BTSOCKET_SCO_ALERT( "%s: Could not create Netgraph node, error=%d\n", __func__, error); ng_btsocket_sco_node = NULL; return (error); } error = ng_name_node(ng_btsocket_sco_node, NG_BTSOCKET_SCO_NODE_TYPE); if (error != 0) { NG_BTSOCKET_SCO_ALERT( "%s: Could not name Netgraph node, error=%d\n", __func__, error); NG_NODE_UNREF(ng_btsocket_sco_node); ng_btsocket_sco_node = NULL; return (error); } return (0); } /* ng_btsocket_sco_node_shutdown */ /* * We allow any hook to be connected to the node. */ static int ng_btsocket_sco_node_newhook(node_p node, hook_p hook, char const *name) { return (0); } /* ng_btsocket_sco_node_newhook */ /* * Just say "YEP, that's OK by me!" */ static int ng_btsocket_sco_node_connect(hook_p hook) { NG_HOOK_SET_PRIVATE(hook, NULL); NG_HOOK_REF(hook); /* Keep extra reference to the hook */ #if 0 NG_HOOK_FORCE_QUEUE(NG_HOOK_PEER(hook)); NG_HOOK_FORCE_QUEUE(hook); #endif return (0); } /* ng_btsocket_sco_node_connect */ /* * Hook disconnection. Schedule route cleanup task */ static int ng_btsocket_sco_node_disconnect(hook_p hook) { /* * If hook has private information than we must have this hook in * the routing table and must schedule cleaning for the routing table. * Otherwise hook was connected but we never got "hook_info" message, * so we have never added this hook to the routing table and it save * to just delete it. */ if (NG_HOOK_PRIVATE(hook) != NULL) return (ng_btsocket_sco_wakeup_route_task()); NG_HOOK_UNREF(hook); /* Remove extra reference */ return (0); } /* ng_btsocket_sco_node_disconnect */ /* * Process incoming messages */ static int ng_btsocket_sco_node_rcvmsg(node_p node, item_p item, hook_p hook) { struct ng_mesg *msg = NGI_MSG(item); /* item still has message */ int error = 0; if (msg != NULL && msg->header.typecookie == NGM_HCI_COOKIE) { mtx_lock(&ng_btsocket_sco_queue_mtx); if (NG_BT_ITEMQ_FULL(&ng_btsocket_sco_queue)) { NG_BTSOCKET_SCO_ERR( "%s: Input queue is full (msg)\n", __func__); NG_BT_ITEMQ_DROP(&ng_btsocket_sco_queue); NG_FREE_ITEM(item); error = ENOBUFS; } else { if (hook != NULL) { NG_HOOK_REF(hook); NGI_SET_HOOK(item, hook); } NG_BT_ITEMQ_ENQUEUE(&ng_btsocket_sco_queue, item); error = ng_btsocket_sco_wakeup_input_task(); } mtx_unlock(&ng_btsocket_sco_queue_mtx); } else { NG_FREE_ITEM(item); error = EINVAL; } return (error); } /* ng_btsocket_sco_node_rcvmsg */ /* * Receive data on a hook */ static int ng_btsocket_sco_node_rcvdata(hook_p hook, item_p item) { int error = 0; mtx_lock(&ng_btsocket_sco_queue_mtx); if (NG_BT_ITEMQ_FULL(&ng_btsocket_sco_queue)) { NG_BTSOCKET_SCO_ERR( "%s: Input queue is full (data)\n", __func__); NG_BT_ITEMQ_DROP(&ng_btsocket_sco_queue); NG_FREE_ITEM(item); error = ENOBUFS; } else { NG_HOOK_REF(hook); NGI_SET_HOOK(item, hook); NG_BT_ITEMQ_ENQUEUE(&ng_btsocket_sco_queue, item); error = ng_btsocket_sco_wakeup_input_task(); } mtx_unlock(&ng_btsocket_sco_queue_mtx); return (error); } /* ng_btsocket_sco_node_rcvdata */ /* * Process LP_ConnectCfm event from the lower layer protocol */ static int ng_btsocket_sco_process_lp_con_cfm(struct ng_mesg *msg, ng_btsocket_sco_rtentry_p rt) { ng_hci_lp_con_cfm_ep *ep = NULL; ng_btsocket_sco_pcb_t *pcb = NULL; int error = 0; if (msg->header.arglen != sizeof(*ep)) return (EMSGSIZE); ep = (ng_hci_lp_con_cfm_ep *)(msg->data); mtx_lock(&ng_btsocket_sco_sockets_mtx); /* Look for the socket with the token */ pcb = ng_btsocket_sco_pcb_by_addrs(&rt->src, &ep->bdaddr); if (pcb == NULL) { mtx_unlock(&ng_btsocket_sco_sockets_mtx); return (ENOENT); } /* pcb is locked */ NG_BTSOCKET_SCO_INFO( "%s: Got LP_ConnectCfm response, src bdaddr=%x:%x:%x:%x:%x:%x, " \ "dst bdaddr=%x:%x:%x:%x:%x:%x, status=%d, handle=%d, state=%d\n", __func__, pcb->src.b[5], pcb->src.b[4], pcb->src.b[3], pcb->src.b[2], pcb->src.b[1], pcb->src.b[0], pcb->dst.b[5], pcb->dst.b[4], pcb->dst.b[3], pcb->dst.b[2], pcb->dst.b[1], pcb->dst.b[0], ep->status, ep->con_handle, pcb->state); if (pcb->state != NG_BTSOCKET_SCO_CONNECTING) { mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_sco_sockets_mtx); return (ENOENT); } ng_btsocket_sco_untimeout(pcb); if (ep->status == 0) { /* * Connection is open. Update connection handle and * socket state */ pcb->con_handle = ep->con_handle; pcb->state = NG_BTSOCKET_SCO_OPEN; soisconnected(pcb->so); } else { /* * We have failed to open connection, so disconnect the socket */ pcb->so->so_error = ECONNREFUSED; /* XXX convert status ??? */ pcb->state = NG_BTSOCKET_SCO_CLOSED; soisdisconnected(pcb->so); } mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_sco_sockets_mtx); return (error); } /* ng_btsocket_sco_process_lp_con_cfm */ /* * Process LP_ConnectInd indicator. Find socket that listens on address. * Find exact or closest match. */ static int ng_btsocket_sco_process_lp_con_ind(struct ng_mesg *msg, ng_btsocket_sco_rtentry_p rt) { ng_hci_lp_con_ind_ep *ep = NULL; ng_btsocket_sco_pcb_t *pcb = NULL, *pcb1 = NULL; int error = 0; u_int16_t status = 0; if (msg->header.arglen != sizeof(*ep)) return (EMSGSIZE); ep = (ng_hci_lp_con_ind_ep *)(msg->data); NG_BTSOCKET_SCO_INFO( "%s: Got LP_ConnectInd indicator, src bdaddr=%x:%x:%x:%x:%x:%x, " \ "dst bdaddr=%x:%x:%x:%x:%x:%x\n", __func__, rt->src.b[5], rt->src.b[4], rt->src.b[3], rt->src.b[2], rt->src.b[1], rt->src.b[0], ep->bdaddr.b[5], ep->bdaddr.b[4], ep->bdaddr.b[3], ep->bdaddr.b[2], ep->bdaddr.b[1], ep->bdaddr.b[0]); mtx_lock(&ng_btsocket_sco_sockets_mtx); pcb = ng_btsocket_sco_pcb_by_addr(&rt->src); if (pcb != NULL) { struct socket *so1; /* pcb is locked */ CURVNET_SET(pcb->so->so_vnet); so1 = sonewconn(pcb->so, 0); CURVNET_RESTORE(); if (so1 == NULL) { status = 0x0d; /* Rejected due to limited resources */ goto respond; } /* * If we got here than we have created new socket. So complete * connection. If we we listening on specific address then copy * source address from listening socket, otherwise copy source * address from hook's routing information. */ pcb1 = so2sco_pcb(so1); KASSERT((pcb1 != NULL), ("%s: pcb1 == NULL\n", __func__)); mtx_lock(&pcb1->pcb_mtx); if (bcmp(&pcb->src, NG_HCI_BDADDR_ANY, sizeof(pcb->src)) != 0) bcopy(&pcb->src, &pcb1->src, sizeof(pcb1->src)); else bcopy(&rt->src, &pcb1->src, sizeof(pcb1->src)); pcb1->flags &= ~NG_BTSOCKET_SCO_CLIENT; bcopy(&ep->bdaddr, &pcb1->dst, sizeof(pcb1->dst)); pcb1->rt = rt; } else /* Nobody listens on requested BDADDR */ status = 0x1f; /* Unspecified Error */ respond: error = ng_btsocket_sco_send_lp_con_rsp(rt, &ep->bdaddr, status); if (pcb1 != NULL) { if (error != 0) { pcb1->so->so_error = error; pcb1->state = NG_BTSOCKET_SCO_CLOSED; soisdisconnected(pcb1->so); } else { pcb1->state = NG_BTSOCKET_SCO_CONNECTING; soisconnecting(pcb1->so); ng_btsocket_sco_timeout(pcb1); } mtx_unlock(&pcb1->pcb_mtx); } if (pcb != NULL) mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_sco_sockets_mtx); return (error); } /* ng_btsocket_sco_process_lp_con_ind */ /* * Process LP_DisconnectInd indicator */ static int ng_btsocket_sco_process_lp_discon_ind(struct ng_mesg *msg, ng_btsocket_sco_rtentry_p rt) { ng_hci_lp_discon_ind_ep *ep = NULL; ng_btsocket_sco_pcb_t *pcb = NULL; /* Check message */ if (msg->header.arglen != sizeof(*ep)) return (EMSGSIZE); ep = (ng_hci_lp_discon_ind_ep *)(msg->data); mtx_lock(&ng_btsocket_sco_sockets_mtx); /* Look for the socket with given channel ID */ pcb = ng_btsocket_sco_pcb_by_handle(&rt->src, ep->con_handle); if (pcb == NULL) { mtx_unlock(&ng_btsocket_sco_sockets_mtx); return (0); } /* * Disconnect the socket. If there was any pending request we can * not do anything here anyway. */ /* pcb is locked */ NG_BTSOCKET_SCO_INFO( "%s: Got LP_DisconnectInd indicator, src bdaddr=%x:%x:%x:%x:%x:%x, " \ "dst bdaddr=%x:%x:%x:%x:%x:%x, handle=%d, state=%d\n", __func__, pcb->src.b[5], pcb->src.b[4], pcb->src.b[3], pcb->src.b[2], pcb->src.b[1], pcb->src.b[0], pcb->dst.b[5], pcb->dst.b[4], pcb->dst.b[3], pcb->dst.b[2], pcb->dst.b[1], pcb->dst.b[0], pcb->con_handle, pcb->state); if (pcb->flags & NG_BTSOCKET_SCO_TIMO) ng_btsocket_sco_untimeout(pcb); pcb->state = NG_BTSOCKET_SCO_CLOSED; soisdisconnected(pcb->so); mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_sco_sockets_mtx); return (0); } /* ng_btsocket_sco_process_lp_discon_ind */ /* * Send LP_ConnectReq request */ static int ng_btsocket_sco_send_lp_con_req(ng_btsocket_sco_pcb_p pcb) { struct ng_mesg *msg = NULL; ng_hci_lp_con_req_ep *ep = NULL; int error = 0; mtx_assert(&pcb->pcb_mtx, MA_OWNED); if (pcb->rt == NULL || pcb->rt->hook == NULL || NG_HOOK_NOT_VALID(pcb->rt->hook)) return (ENETDOWN); NG_MKMESSAGE(msg, NGM_HCI_COOKIE, NGM_HCI_LP_CON_REQ, sizeof(*ep), M_NOWAIT); if (msg == NULL) return (ENOMEM); ep = (ng_hci_lp_con_req_ep *)(msg->data); ep->link_type = NG_HCI_LINK_SCO; bcopy(&pcb->dst, &ep->bdaddr, sizeof(ep->bdaddr)); NG_SEND_MSG_HOOK(error, ng_btsocket_sco_node, msg, pcb->rt->hook, 0); return (error); } /* ng_btsocket_sco_send_lp_con_req */ /* * Send LP_ConnectRsp response */ static int ng_btsocket_sco_send_lp_con_rsp(ng_btsocket_sco_rtentry_p rt, bdaddr_p dst, int status) { struct ng_mesg *msg = NULL; ng_hci_lp_con_rsp_ep *ep = NULL; int error = 0; if (rt == NULL || rt->hook == NULL || NG_HOOK_NOT_VALID(rt->hook)) return (ENETDOWN); NG_MKMESSAGE(msg, NGM_HCI_COOKIE, NGM_HCI_LP_CON_RSP, sizeof(*ep), M_NOWAIT); if (msg == NULL) return (ENOMEM); ep = (ng_hci_lp_con_rsp_ep *)(msg->data); ep->status = status; ep->link_type = NG_HCI_LINK_SCO; bcopy(dst, &ep->bdaddr, sizeof(ep->bdaddr)); NG_SEND_MSG_HOOK(error, ng_btsocket_sco_node, msg, rt->hook, 0); return (error); } /* ng_btsocket_sco_send_lp_con_rsp */ /* * Send LP_DisconReq request */ static int ng_btsocket_sco_send_lp_discon_req(ng_btsocket_sco_pcb_p pcb) { struct ng_mesg *msg = NULL; ng_hci_lp_discon_req_ep *ep = NULL; int error = 0; mtx_assert(&pcb->pcb_mtx, MA_OWNED); if (pcb->rt == NULL || pcb->rt->hook == NULL || NG_HOOK_NOT_VALID(pcb->rt->hook)) return (ENETDOWN); NG_MKMESSAGE(msg, NGM_HCI_COOKIE, NGM_HCI_LP_DISCON_REQ, sizeof(*ep), M_NOWAIT); if (msg == NULL) return (ENOMEM); ep = (ng_hci_lp_discon_req_ep *)(msg->data); ep->con_handle = pcb->con_handle; ep->reason = 0x13; /* User Ended Connection */ NG_SEND_MSG_HOOK(error, ng_btsocket_sco_node, msg, pcb->rt->hook, 0); return (error); } /* ng_btsocket_sco_send_lp_discon_req */ /***************************************************************************** ***************************************************************************** ** Socket interface ***************************************************************************** *****************************************************************************/ /* * SCO sockets data input routine */ static void ng_btsocket_sco_data_input(struct mbuf *m, hook_p hook) { ng_hci_scodata_pkt_t *hdr = NULL; ng_btsocket_sco_pcb_t *pcb = NULL; ng_btsocket_sco_rtentry_t *rt = NULL; u_int16_t con_handle; if (hook == NULL) { NG_BTSOCKET_SCO_ALERT( "%s: Invalid source hook for SCO data packet\n", __func__); goto drop; } rt = (ng_btsocket_sco_rtentry_t *) NG_HOOK_PRIVATE(hook); if (rt == NULL) { NG_BTSOCKET_SCO_ALERT( "%s: Could not find out source bdaddr for SCO data packet\n", __func__); goto drop; } /* Make sure we can access header */ if (m->m_pkthdr.len < sizeof(*hdr)) { NG_BTSOCKET_SCO_ERR( "%s: SCO data packet too small, len=%d\n", __func__, m->m_pkthdr.len); goto drop; } if (m->m_len < sizeof(*hdr)) { m = m_pullup(m, sizeof(*hdr)); if (m == NULL) goto drop; } /* Strip SCO packet header and verify packet length */ hdr = mtod(m, ng_hci_scodata_pkt_t *); m_adj(m, sizeof(*hdr)); if (hdr->length != m->m_pkthdr.len) { NG_BTSOCKET_SCO_ERR( "%s: Bad SCO data packet length, len=%d, length=%d\n", __func__, m->m_pkthdr.len, hdr->length); goto drop; } /* * Now process packet */ con_handle = NG_HCI_CON_HANDLE(le16toh(hdr->con_handle)); NG_BTSOCKET_SCO_INFO( "%s: Received SCO data packet: src bdaddr=%x:%x:%x:%x:%x:%x, handle=%d, " \ "length=%d\n", __func__, rt->src.b[5], rt->src.b[4], rt->src.b[3], rt->src.b[2], rt->src.b[1], rt->src.b[0], con_handle, hdr->length); mtx_lock(&ng_btsocket_sco_sockets_mtx); /* Find socket */ pcb = ng_btsocket_sco_pcb_by_handle(&rt->src, con_handle); if (pcb == NULL) { mtx_unlock(&ng_btsocket_sco_sockets_mtx); goto drop; } /* pcb is locked */ if (pcb->state != NG_BTSOCKET_SCO_OPEN) { NG_BTSOCKET_SCO_ERR( "%s: No connected socket found, src bdaddr=%x:%x:%x:%x:%x:%x, state=%d\n", __func__, rt->src.b[5], rt->src.b[4], rt->src.b[3], rt->src.b[2], rt->src.b[1], rt->src.b[0], pcb->state); mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_sco_sockets_mtx); goto drop; } /* Check if we have enough space in socket receive queue */ if (m->m_pkthdr.len > sbspace(&pcb->so->so_rcv)) { NG_BTSOCKET_SCO_ERR( "%s: Not enough space in socket receive queue. Dropping SCO data packet, " \ "src bdaddr=%x:%x:%x:%x:%x:%x, len=%d, space=%ld\n", __func__, rt->src.b[5], rt->src.b[4], rt->src.b[3], rt->src.b[2], rt->src.b[1], rt->src.b[0], m->m_pkthdr.len, sbspace(&pcb->so->so_rcv)); mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_sco_sockets_mtx); goto drop; } /* Append packet to the socket receive queue and wakeup */ sbappendrecord(&pcb->so->so_rcv, m); m = NULL; sorwakeup(pcb->so); mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_sco_sockets_mtx); drop: NG_FREE_M(m); /* checks for m != NULL */ } /* ng_btsocket_sco_data_input */ /* * SCO sockets default message input routine */ static void ng_btsocket_sco_default_msg_input(struct ng_mesg *msg, hook_p hook) { ng_btsocket_sco_rtentry_t *rt = NULL; if (hook == NULL || NG_HOOK_NOT_VALID(hook)) return; rt = (ng_btsocket_sco_rtentry_t *) NG_HOOK_PRIVATE(hook); switch (msg->header.cmd) { case NGM_HCI_NODE_UP: { ng_hci_node_up_ep *ep = NULL; if (msg->header.arglen != sizeof(*ep)) break; ep = (ng_hci_node_up_ep *)(msg->data); if (bcmp(&ep->bdaddr, NG_HCI_BDADDR_ANY, sizeof(bdaddr_t)) == 0) break; if (rt == NULL) { rt = malloc(sizeof(*rt), M_NETGRAPH_BTSOCKET_SCO, M_NOWAIT|M_ZERO); if (rt == NULL) break; NG_HOOK_SET_PRIVATE(hook, rt); mtx_lock(&ng_btsocket_sco_rt_mtx); LIST_INSERT_HEAD(&ng_btsocket_sco_rt, rt, next); } else mtx_lock(&ng_btsocket_sco_rt_mtx); bcopy(&ep->bdaddr, &rt->src, sizeof(rt->src)); rt->pkt_size = (ep->pkt_size == 0)? 60 : ep->pkt_size; rt->num_pkts = ep->num_pkts; rt->hook = hook; mtx_unlock(&ng_btsocket_sco_rt_mtx); NG_BTSOCKET_SCO_INFO( "%s: Updating hook \"%s\", src bdaddr=%x:%x:%x:%x:%x:%x, pkt_size=%d, " \ "num_pkts=%d\n", __func__, NG_HOOK_NAME(hook), rt->src.b[5], rt->src.b[4], rt->src.b[3], rt->src.b[2], rt->src.b[1], rt->src.b[0], rt->pkt_size, rt->num_pkts); } break; case NGM_HCI_SYNC_CON_QUEUE: { ng_hci_sync_con_queue_ep *ep = NULL; ng_btsocket_sco_pcb_t *pcb = NULL; if (rt == NULL || msg->header.arglen != sizeof(*ep)) break; ep = (ng_hci_sync_con_queue_ep *)(msg->data); rt->pending -= ep->completed; if (rt->pending < 0) { NG_BTSOCKET_SCO_WARN( "%s: Pending packet counter is out of sync! bdaddr=%x:%x:%x:%x:%x:%x, " \ "handle=%d, pending=%d, completed=%d\n", __func__, rt->src.b[5], rt->src.b[4], rt->src.b[3], rt->src.b[2], rt->src.b[1], rt->src.b[0], ep->con_handle, rt->pending, ep->completed); rt->pending = 0; } mtx_lock(&ng_btsocket_sco_sockets_mtx); /* Find socket */ pcb = ng_btsocket_sco_pcb_by_handle(&rt->src, ep->con_handle); if (pcb == NULL) { mtx_unlock(&ng_btsocket_sco_sockets_mtx); break; } /* pcb is locked */ /* Check state */ if (pcb->state == NG_BTSOCKET_SCO_OPEN) { /* Remove timeout */ ng_btsocket_sco_untimeout(pcb); /* Drop completed packets from the send queue */ for (; ep->completed > 0; ep->completed --) sbdroprecord(&pcb->so->so_snd); /* Send more if we have any */ if (sbavail(&pcb->so->so_snd) > 0) if (ng_btsocket_sco_send2(pcb) == 0) ng_btsocket_sco_timeout(pcb); /* Wake up writers */ sowwakeup(pcb->so); } mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_sco_sockets_mtx); } break; default: NG_BTSOCKET_SCO_WARN( "%s: Unknown message, cmd=%d\n", __func__, msg->header.cmd); break; } NG_FREE_MSG(msg); /* Checks for msg != NULL */ } /* ng_btsocket_sco_default_msg_input */ /* * SCO sockets LP message input routine */ static void ng_btsocket_sco_lp_msg_input(struct ng_mesg *msg, hook_p hook) { ng_btsocket_sco_rtentry_p rt = NULL; if (hook == NULL) { NG_BTSOCKET_SCO_ALERT( "%s: Invalid source hook for LP message\n", __func__); goto drop; } rt = (ng_btsocket_sco_rtentry_p) NG_HOOK_PRIVATE(hook); if (rt == NULL) { NG_BTSOCKET_SCO_ALERT( "%s: Could not find out source bdaddr for LP message\n", __func__); goto drop; } switch (msg->header.cmd) { case NGM_HCI_LP_CON_CFM: /* Connection Confirmation Event */ ng_btsocket_sco_process_lp_con_cfm(msg, rt); break; case NGM_HCI_LP_CON_IND: /* Connection Indication Event */ ng_btsocket_sco_process_lp_con_ind(msg, rt); break; case NGM_HCI_LP_DISCON_IND: /* Disconnection Indication Event */ ng_btsocket_sco_process_lp_discon_ind(msg, rt); break; /* XXX FIXME add other LP messages */ default: NG_BTSOCKET_SCO_WARN( "%s: Unknown LP message, cmd=%d\n", __func__, msg->header.cmd); break; } drop: NG_FREE_MSG(msg); } /* ng_btsocket_sco_lp_msg_input */ /* * SCO sockets input routine */ static void ng_btsocket_sco_input(void *context, int pending) { item_p item = NULL; hook_p hook = NULL; for (;;) { mtx_lock(&ng_btsocket_sco_queue_mtx); NG_BT_ITEMQ_DEQUEUE(&ng_btsocket_sco_queue, item); mtx_unlock(&ng_btsocket_sco_queue_mtx); if (item == NULL) break; NGI_GET_HOOK(item, hook); if (hook != NULL && NG_HOOK_NOT_VALID(hook)) goto drop; switch(item->el_flags & NGQF_TYPE) { case NGQF_DATA: { struct mbuf *m = NULL; NGI_GET_M(item, m); ng_btsocket_sco_data_input(m, hook); } break; case NGQF_MESG: { struct ng_mesg *msg = NULL; NGI_GET_MSG(item, msg); switch (msg->header.cmd) { case NGM_HCI_LP_CON_CFM: case NGM_HCI_LP_CON_IND: case NGM_HCI_LP_DISCON_IND: /* XXX FIXME add other LP messages */ ng_btsocket_sco_lp_msg_input(msg, hook); break; default: ng_btsocket_sco_default_msg_input(msg, hook); break; } } break; default: KASSERT(0, ("%s: invalid item type=%ld\n", __func__, (item->el_flags & NGQF_TYPE))); break; } drop: if (hook != NULL) NG_HOOK_UNREF(hook); NG_FREE_ITEM(item); } } /* ng_btsocket_sco_input */ /* * Route cleanup task. Gets scheduled when hook is disconnected. Here we * will find all sockets that use "invalid" hook and disconnect them. */ static void ng_btsocket_sco_rtclean(void *context, int pending) { ng_btsocket_sco_pcb_p pcb = NULL, pcb_next = NULL; ng_btsocket_sco_rtentry_p rt = NULL; /* * First disconnect all sockets that use "invalid" hook */ mtx_lock(&ng_btsocket_sco_sockets_mtx); for(pcb = LIST_FIRST(&ng_btsocket_sco_sockets); pcb != NULL; ) { mtx_lock(&pcb->pcb_mtx); pcb_next = LIST_NEXT(pcb, next); if (pcb->rt != NULL && pcb->rt->hook != NULL && NG_HOOK_NOT_VALID(pcb->rt->hook)) { if (pcb->flags & NG_BTSOCKET_SCO_TIMO) ng_btsocket_sco_untimeout(pcb); pcb->rt = NULL; pcb->so->so_error = ENETDOWN; pcb->state = NG_BTSOCKET_SCO_CLOSED; soisdisconnected(pcb->so); } mtx_unlock(&pcb->pcb_mtx); pcb = pcb_next; } mtx_unlock(&ng_btsocket_sco_sockets_mtx); /* * Now cleanup routing table */ mtx_lock(&ng_btsocket_sco_rt_mtx); for (rt = LIST_FIRST(&ng_btsocket_sco_rt); rt != NULL; ) { ng_btsocket_sco_rtentry_p rt_next = LIST_NEXT(rt, next); if (rt->hook != NULL && NG_HOOK_NOT_VALID(rt->hook)) { LIST_REMOVE(rt, next); NG_HOOK_SET_PRIVATE(rt->hook, NULL); NG_HOOK_UNREF(rt->hook); /* Remove extra reference */ bzero(rt, sizeof(*rt)); free(rt, M_NETGRAPH_BTSOCKET_SCO); } rt = rt_next; } mtx_unlock(&ng_btsocket_sco_rt_mtx); } /* ng_btsocket_sco_rtclean */ /* * Initialize everything */ static void ng_btsocket_sco_init(void *arg __unused) { int error = 0; ng_btsocket_sco_node = NULL; ng_btsocket_sco_debug_level = NG_BTSOCKET_WARN_LEVEL; /* Register Netgraph node type */ error = ng_newtype(&typestruct); if (error != 0) { NG_BTSOCKET_SCO_ALERT( "%s: Could not register Netgraph node type, error=%d\n", __func__, error); return; } /* Create Netgrapg node */ error = ng_make_node_common(&typestruct, &ng_btsocket_sco_node); if (error != 0) { NG_BTSOCKET_SCO_ALERT( "%s: Could not create Netgraph node, error=%d\n", __func__, error); ng_btsocket_sco_node = NULL; return; } error = ng_name_node(ng_btsocket_sco_node, NG_BTSOCKET_SCO_NODE_TYPE); if (error != 0) { NG_BTSOCKET_SCO_ALERT( "%s: Could not name Netgraph node, error=%d\n", __func__, error); NG_NODE_UNREF(ng_btsocket_sco_node); ng_btsocket_sco_node = NULL; return; } /* Create input queue */ NG_BT_ITEMQ_INIT(&ng_btsocket_sco_queue, 300); mtx_init(&ng_btsocket_sco_queue_mtx, "btsocks_sco_queue_mtx", NULL, MTX_DEF); TASK_INIT(&ng_btsocket_sco_queue_task, 0, ng_btsocket_sco_input, NULL); /* Create list of sockets */ LIST_INIT(&ng_btsocket_sco_sockets); mtx_init(&ng_btsocket_sco_sockets_mtx, "btsocks_sco_sockets_mtx", NULL, MTX_DEF); /* Routing table */ LIST_INIT(&ng_btsocket_sco_rt); mtx_init(&ng_btsocket_sco_rt_mtx, "btsocks_sco_rt_mtx", NULL, MTX_DEF); TASK_INIT(&ng_btsocket_sco_rt_task, 0, ng_btsocket_sco_rtclean, NULL); } /* ng_btsocket_sco_init */ SYSINIT(ng_btsocket_sco_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ng_btsocket_sco_init, NULL); /* * Abort connection on socket */ void ng_btsocket_sco_abort(struct socket *so) { so->so_error = ECONNABORTED; (void) ng_btsocket_sco_disconnect(so); } /* ng_btsocket_sco_abort */ void ng_btsocket_sco_close(struct socket *so) { (void) ng_btsocket_sco_disconnect(so); } /* ng_btsocket_sco_close */ -/* - * Accept connection on socket. Nothing to do here, socket must be connected - * and ready, so just return peer address and be done with it. - */ - -int -ng_btsocket_sco_accept(struct socket *so, struct sockaddr **nam) -{ - if (ng_btsocket_sco_node == NULL) - return (EINVAL); - - return (ng_btsocket_sco_peeraddr(so, nam)); -} /* ng_btsocket_sco_accept */ - /* * Create and attach new socket */ int ng_btsocket_sco_attach(struct socket *so, int proto, struct thread *td) { ng_btsocket_sco_pcb_p pcb = so2sco_pcb(so); int error; /* Check socket and protocol */ if (ng_btsocket_sco_node == NULL) return (EPROTONOSUPPORT); if (so->so_type != SOCK_SEQPACKET) return (ESOCKTNOSUPPORT); #if 0 /* XXX sonewconn() calls "pru_attach" with proto == 0 */ if (proto != 0) if (proto != BLUETOOTH_PROTO_SCO) return (EPROTONOSUPPORT); #endif /* XXX */ if (pcb != NULL) return (EISCONN); /* Reserve send and receive space if it is not reserved yet */ if ((so->so_snd.sb_hiwat == 0) || (so->so_rcv.sb_hiwat == 0)) { error = soreserve(so, NG_BTSOCKET_SCO_SENDSPACE, NG_BTSOCKET_SCO_RECVSPACE); if (error != 0) return (error); } /* Allocate the PCB */ pcb = malloc(sizeof(*pcb), M_NETGRAPH_BTSOCKET_SCO, M_NOWAIT | M_ZERO); if (pcb == NULL) return (ENOMEM); /* Link the PCB and the socket */ so->so_pcb = (caddr_t) pcb; pcb->so = so; pcb->state = NG_BTSOCKET_SCO_CLOSED; callout_init(&pcb->timo, 1); /* * Mark PCB mutex as DUPOK to prevent "duplicated lock of * the same type" message. When accepting new SCO connection * ng_btsocket_sco_process_lp_con_ind() holds both PCB mutexes * for "old" (accepting) PCB and "new" (created) PCB. */ mtx_init(&pcb->pcb_mtx, "btsocks_sco_pcb_mtx", NULL, MTX_DEF|MTX_DUPOK); /* * Add the PCB to the list * * XXX FIXME VERY IMPORTANT! * * This is totally FUBAR. We could get here in two cases: * * 1) When user calls socket() * 2) When we need to accept new incoming connection and call * sonewconn() * * In the first case we must acquire ng_btsocket_sco_sockets_mtx. * In the second case we hold ng_btsocket_sco_sockets_mtx already. * So we now need to distinguish between these cases. From reading * /sys/kern/uipc_socket2.c we can find out that sonewconn() calls * pru_attach with proto == 0 and td == NULL. For now use this fact * to figure out if we were called from socket() or from sonewconn(). */ if (td != NULL) mtx_lock(&ng_btsocket_sco_sockets_mtx); else mtx_assert(&ng_btsocket_sco_sockets_mtx, MA_OWNED); LIST_INSERT_HEAD(&ng_btsocket_sco_sockets, pcb, next); if (td != NULL) mtx_unlock(&ng_btsocket_sco_sockets_mtx); return (0); } /* ng_btsocket_sco_attach */ /* * Bind socket */ int ng_btsocket_sco_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { ng_btsocket_sco_pcb_t *pcb = NULL; struct sockaddr_sco *sa = (struct sockaddr_sco *) nam; if (ng_btsocket_sco_node == NULL) return (EINVAL); /* Verify address */ if (sa == NULL) return (EINVAL); if (sa->sco_family != AF_BLUETOOTH) return (EAFNOSUPPORT); if (sa->sco_len != sizeof(*sa)) return (EINVAL); mtx_lock(&ng_btsocket_sco_sockets_mtx); /* * Check if other socket has this address already (look for exact * match in bdaddr) and assign socket address if it's available. */ if (bcmp(&sa->sco_bdaddr, NG_HCI_BDADDR_ANY, sizeof(sa->sco_bdaddr)) != 0) { LIST_FOREACH(pcb, &ng_btsocket_sco_sockets, next) { mtx_lock(&pcb->pcb_mtx); if (bcmp(&pcb->src, &sa->sco_bdaddr, sizeof(bdaddr_t)) == 0) { mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_sco_sockets_mtx); return (EADDRINUSE); } mtx_unlock(&pcb->pcb_mtx); } } pcb = so2sco_pcb(so); if (pcb == NULL) { mtx_unlock(&ng_btsocket_sco_sockets_mtx); return (EINVAL); } mtx_lock(&pcb->pcb_mtx); bcopy(&sa->sco_bdaddr, &pcb->src, sizeof(pcb->src)); mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_sco_sockets_mtx); return (0); } /* ng_btsocket_sco_bind */ /* * Connect socket */ int ng_btsocket_sco_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { ng_btsocket_sco_pcb_t *pcb = so2sco_pcb(so); struct sockaddr_sco *sa = (struct sockaddr_sco *) nam; ng_btsocket_sco_rtentry_t *rt = NULL; int have_src, error = 0; /* Check socket */ if (pcb == NULL) return (EINVAL); if (ng_btsocket_sco_node == NULL) return (EINVAL); /* Verify address */ if (sa == NULL) return (EINVAL); if (sa->sco_family != AF_BLUETOOTH) return (EAFNOSUPPORT); if (sa->sco_len != sizeof(*sa)) return (EINVAL); if (bcmp(&sa->sco_bdaddr, NG_HCI_BDADDR_ANY, sizeof(bdaddr_t)) == 0) return (EDESTADDRREQ); /* * Routing. Socket should be bound to some source address. The source * address can be ANY. Destination address must be set and it must not * be ANY. If source address is ANY then find first rtentry that has * src != dst. */ mtx_lock(&ng_btsocket_sco_rt_mtx); mtx_lock(&pcb->pcb_mtx); if (pcb->state == NG_BTSOCKET_SCO_CONNECTING) { mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_sco_rt_mtx); return (EINPROGRESS); } if (bcmp(&sa->sco_bdaddr, &pcb->src, sizeof(pcb->src)) == 0) { mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_sco_rt_mtx); return (EINVAL); } /* Send destination address and PSM */ bcopy(&sa->sco_bdaddr, &pcb->dst, sizeof(pcb->dst)); pcb->rt = NULL; have_src = bcmp(&pcb->src, NG_HCI_BDADDR_ANY, sizeof(pcb->src)); LIST_FOREACH(rt, &ng_btsocket_sco_rt, next) { if (rt->hook == NULL || NG_HOOK_NOT_VALID(rt->hook)) continue; /* Match src and dst */ if (have_src) { if (bcmp(&pcb->src, &rt->src, sizeof(rt->src)) == 0) break; } else { if (bcmp(&pcb->dst, &rt->src, sizeof(rt->src)) != 0) break; } } if (rt != NULL) { pcb->rt = rt; if (!have_src) bcopy(&rt->src, &pcb->src, sizeof(pcb->src)); } else error = EHOSTUNREACH; /* * Send LP_Connect request */ if (error == 0) { error = ng_btsocket_sco_send_lp_con_req(pcb); if (error == 0) { pcb->flags |= NG_BTSOCKET_SCO_CLIENT; pcb->state = NG_BTSOCKET_SCO_CONNECTING; soisconnecting(pcb->so); ng_btsocket_sco_timeout(pcb); } } mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_sco_rt_mtx); return (error); } /* ng_btsocket_sco_connect */ /* * Process ioctl's calls on socket */ int ng_btsocket_sco_control(struct socket *so, u_long cmd, void *data, struct ifnet *ifp, struct thread *td) { return (EINVAL); } /* ng_btsocket_sco_control */ /* * Process getsockopt/setsockopt system calls */ int ng_btsocket_sco_ctloutput(struct socket *so, struct sockopt *sopt) { ng_btsocket_sco_pcb_p pcb = so2sco_pcb(so); int error, tmp; if (ng_btsocket_sco_node == NULL) return (EINVAL); if (pcb == NULL) return (EINVAL); if (sopt->sopt_level != SOL_SCO) return (0); mtx_lock(&pcb->pcb_mtx); switch (sopt->sopt_dir) { case SOPT_GET: if (pcb->state != NG_BTSOCKET_SCO_OPEN) { error = ENOTCONN; break; } switch (sopt->sopt_name) { case SO_SCO_MTU: tmp = pcb->rt->pkt_size; error = sooptcopyout(sopt, &tmp, sizeof(tmp)); break; case SO_SCO_CONNINFO: tmp = pcb->con_handle; error = sooptcopyout(sopt, &tmp, sizeof(tmp)); break; default: error = EINVAL; break; } break; case SOPT_SET: error = ENOPROTOOPT; break; default: error = EINVAL; break; } mtx_unlock(&pcb->pcb_mtx); return (error); } /* ng_btsocket_sco_ctloutput */ /* * Detach and destroy socket */ void ng_btsocket_sco_detach(struct socket *so) { ng_btsocket_sco_pcb_p pcb = so2sco_pcb(so); KASSERT(pcb != NULL, ("ng_btsocket_sco_detach: pcb == NULL")); if (ng_btsocket_sco_node == NULL) return; mtx_lock(&ng_btsocket_sco_sockets_mtx); mtx_lock(&pcb->pcb_mtx); if (pcb->flags & NG_BTSOCKET_SCO_TIMO) ng_btsocket_sco_untimeout(pcb); if (pcb->state == NG_BTSOCKET_SCO_OPEN) ng_btsocket_sco_send_lp_discon_req(pcb); pcb->state = NG_BTSOCKET_SCO_CLOSED; LIST_REMOVE(pcb, next); mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_sco_sockets_mtx); mtx_destroy(&pcb->pcb_mtx); bzero(pcb, sizeof(*pcb)); free(pcb, M_NETGRAPH_BTSOCKET_SCO); soisdisconnected(so); so->so_pcb = NULL; } /* ng_btsocket_sco_detach */ /* * Disconnect socket */ int ng_btsocket_sco_disconnect(struct socket *so) { ng_btsocket_sco_pcb_p pcb = so2sco_pcb(so); if (pcb == NULL) return (EINVAL); if (ng_btsocket_sco_node == NULL) return (EINVAL); mtx_lock(&pcb->pcb_mtx); if (pcb->state == NG_BTSOCKET_SCO_DISCONNECTING) { mtx_unlock(&pcb->pcb_mtx); return (EINPROGRESS); } if (pcb->flags & NG_BTSOCKET_SCO_TIMO) ng_btsocket_sco_untimeout(pcb); if (pcb->state == NG_BTSOCKET_SCO_OPEN) { ng_btsocket_sco_send_lp_discon_req(pcb); pcb->state = NG_BTSOCKET_SCO_DISCONNECTING; soisdisconnecting(so); ng_btsocket_sco_timeout(pcb); } else { pcb->state = NG_BTSOCKET_SCO_CLOSED; soisdisconnected(so); } mtx_unlock(&pcb->pcb_mtx); return (0); } /* ng_btsocket_sco_disconnect */ /* * Listen on socket */ int ng_btsocket_sco_listen(struct socket *so, int backlog, struct thread *td) { ng_btsocket_sco_pcb_p pcb = so2sco_pcb(so); int error; if (pcb == NULL) return (EINVAL); if (ng_btsocket_sco_node == NULL) return (EINVAL); SOCK_LOCK(so); mtx_lock(&pcb->pcb_mtx); error = solisten_proto_check(so); if (error != 0) goto out; #if 0 if (bcmp(&pcb->src, NG_HCI_BDADDR_ANY, sizeof(bdaddr_t)) == 0) { error = EDESTADDRREQ; goto out; } #endif solisten_proto(so, backlog); out: mtx_unlock(&pcb->pcb_mtx); SOCK_UNLOCK(so); return (error); } /* ng_btsocket_listen */ -/* - * Get peer address - */ - -int -ng_btsocket_sco_peeraddr(struct socket *so, struct sockaddr **nam) +static int +ng_btsocket_sco_peeraddr1(struct socket *so, struct sockaddr_sco *sa) { ng_btsocket_sco_pcb_p pcb = so2sco_pcb(so); - struct sockaddr_sco sa; if (pcb == NULL) return (EINVAL); if (ng_btsocket_sco_node == NULL) return (EINVAL); + *sa = (struct sockaddr_sco ){ + .sco_len = sizeof(struct sockaddr_sco), + .sco_family = AF_BLUETOOTH, + }; mtx_lock(&pcb->pcb_mtx); - bcopy(&pcb->dst, &sa.sco_bdaddr, sizeof(sa.sco_bdaddr)); + bcopy(&pcb->dst, &sa->sco_bdaddr, sizeof(sa->sco_bdaddr)); mtx_unlock(&pcb->pcb_mtx); - sa.sco_len = sizeof(sa); - sa.sco_family = AF_BLUETOOTH; + return (0); +} + +/* + * Get peer address + */ +int +ng_btsocket_sco_peeraddr(struct socket *so, struct sockaddr **nam) +{ + struct sockaddr_sco sa; + int error; + error = ng_btsocket_sco_peeraddr1(so, &sa); + if (error != 0) + return (error); *nam = sodupsockaddr((struct sockaddr *) &sa, M_NOWAIT); return ((*nam == NULL)? ENOMEM : 0); -} /* ng_btsocket_sco_peeraddr */ +} + +/* + * Accept connection on socket. Nothing to do here, socket must be connected + * and ready, so just return peer address and be done with it. + */ +int +ng_btsocket_sco_accept(struct socket *so, struct sockaddr *sa) +{ + if (ng_btsocket_sco_node == NULL) + return (EINVAL); + + return (ng_btsocket_sco_peeraddr1(so, (struct sockaddr_sco *)sa)); +} /* * Send data to socket */ int ng_btsocket_sco_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct thread *td) { ng_btsocket_sco_pcb_t *pcb = so2sco_pcb(so); int error = 0; if (ng_btsocket_sco_node == NULL) { error = ENETDOWN; goto drop; } /* Check socket and input */ if (pcb == NULL || m == NULL || control != NULL) { error = EINVAL; goto drop; } mtx_lock(&pcb->pcb_mtx); /* Make sure socket is connected */ if (pcb->state != NG_BTSOCKET_SCO_OPEN) { mtx_unlock(&pcb->pcb_mtx); error = ENOTCONN; goto drop; } /* Check route */ if (pcb->rt == NULL || pcb->rt->hook == NULL || NG_HOOK_NOT_VALID(pcb->rt->hook)) { mtx_unlock(&pcb->pcb_mtx); error = ENETDOWN; goto drop; } /* Check packet size */ if (m->m_pkthdr.len > pcb->rt->pkt_size) { NG_BTSOCKET_SCO_ERR( "%s: Packet too big, len=%d, pkt_size=%d\n", __func__, m->m_pkthdr.len, pcb->rt->pkt_size); mtx_unlock(&pcb->pcb_mtx); error = EMSGSIZE; goto drop; } /* * First put packet on socket send queue. Then check if we have * pending timeout. If we do not have timeout then we must send * packet and schedule timeout. Otherwise do nothing and wait for * NGM_HCI_SYNC_CON_QUEUE message. */ sbappendrecord(&pcb->so->so_snd, m); m = NULL; if (!(pcb->flags & NG_BTSOCKET_SCO_TIMO)) { error = ng_btsocket_sco_send2(pcb); if (error == 0) ng_btsocket_sco_timeout(pcb); else sbdroprecord(&pcb->so->so_snd); /* XXX */ } mtx_unlock(&pcb->pcb_mtx); drop: NG_FREE_M(m); /* checks for != NULL */ NG_FREE_M(control); return (error); } /* ng_btsocket_sco_send */ /* * Send first packet in the socket queue to the SCO layer */ static int ng_btsocket_sco_send2(ng_btsocket_sco_pcb_p pcb) { struct mbuf *m = NULL; ng_hci_scodata_pkt_t *hdr = NULL; int error = 0; mtx_assert(&pcb->pcb_mtx, MA_OWNED); while (pcb->rt->pending < pcb->rt->num_pkts && sbavail(&pcb->so->so_snd) > 0) { /* Get a copy of the first packet on send queue */ m = m_dup(pcb->so->so_snd.sb_mb, M_NOWAIT); if (m == NULL) { error = ENOBUFS; break; } /* Create SCO packet header */ M_PREPEND(m, sizeof(*hdr), M_NOWAIT); if (m != NULL) if (m->m_len < sizeof(*hdr)) m = m_pullup(m, sizeof(*hdr)); if (m == NULL) { error = ENOBUFS; break; } /* Fill in the header */ hdr = mtod(m, ng_hci_scodata_pkt_t *); hdr->type = NG_HCI_SCO_DATA_PKT; hdr->con_handle = htole16(NG_HCI_MK_CON_HANDLE(pcb->con_handle, 0, 0)); hdr->length = m->m_pkthdr.len - sizeof(*hdr); /* Send packet */ NG_SEND_DATA_ONLY(error, pcb->rt->hook, m); if (error != 0) break; pcb->rt->pending ++; } return ((pcb->rt->pending > 0)? 0 : error); } /* ng_btsocket_sco_send2 */ /* * Get socket address */ int ng_btsocket_sco_sockaddr(struct socket *so, struct sockaddr **nam) { ng_btsocket_sco_pcb_p pcb = so2sco_pcb(so); struct sockaddr_sco sa; if (pcb == NULL) return (EINVAL); if (ng_btsocket_sco_node == NULL) return (EINVAL); mtx_lock(&pcb->pcb_mtx); bcopy(&pcb->src, &sa.sco_bdaddr, sizeof(sa.sco_bdaddr)); mtx_unlock(&pcb->pcb_mtx); sa.sco_len = sizeof(sa); sa.sco_family = AF_BLUETOOTH; *nam = sodupsockaddr((struct sockaddr *) &sa, M_NOWAIT); return ((*nam == NULL)? ENOMEM : 0); } /* ng_btsocket_sco_sockaddr */ /***************************************************************************** ***************************************************************************** ** Misc. functions ***************************************************************************** *****************************************************************************/ /* * Look for the socket that listens on given bdaddr. * Returns exact or close match (if any). * Caller must hold ng_btsocket_sco_sockets_mtx. * Returns with locked pcb. */ static ng_btsocket_sco_pcb_p ng_btsocket_sco_pcb_by_addr(bdaddr_p bdaddr) { ng_btsocket_sco_pcb_p p = NULL, p1 = NULL; mtx_assert(&ng_btsocket_sco_sockets_mtx, MA_OWNED); LIST_FOREACH(p, &ng_btsocket_sco_sockets, next) { mtx_lock(&p->pcb_mtx); if (p->so == NULL || !SOLISTENING(p->so)) { mtx_unlock(&p->pcb_mtx); continue; } if (bcmp(&p->src, bdaddr, sizeof(p->src)) == 0) return (p); /* return with locked pcb */ if (bcmp(&p->src, NG_HCI_BDADDR_ANY, sizeof(p->src)) == 0) p1 = p; mtx_unlock(&p->pcb_mtx); } if (p1 != NULL) mtx_lock(&p1->pcb_mtx); return (p1); } /* ng_btsocket_sco_pcb_by_addr */ /* * Look for the socket that assigned to given source address and handle. * Caller must hold ng_btsocket_sco_sockets_mtx. * Returns with locked pcb. */ static ng_btsocket_sco_pcb_p ng_btsocket_sco_pcb_by_handle(bdaddr_p src, int con_handle) { ng_btsocket_sco_pcb_p p = NULL; mtx_assert(&ng_btsocket_sco_sockets_mtx, MA_OWNED); LIST_FOREACH(p, &ng_btsocket_sco_sockets, next) { mtx_lock(&p->pcb_mtx); if (p->con_handle == con_handle && bcmp(src, &p->src, sizeof(p->src)) == 0) return (p); /* return with locked pcb */ mtx_unlock(&p->pcb_mtx); } return (NULL); } /* ng_btsocket_sco_pcb_by_handle */ /* * Look for the socket in CONNECTING state with given source and destination * addresses. Caller must hold ng_btsocket_sco_sockets_mtx. * Returns with locked pcb. */ static ng_btsocket_sco_pcb_p ng_btsocket_sco_pcb_by_addrs(bdaddr_p src, bdaddr_p dst) { ng_btsocket_sco_pcb_p p = NULL; mtx_assert(&ng_btsocket_sco_sockets_mtx, MA_OWNED); LIST_FOREACH(p, &ng_btsocket_sco_sockets, next) { mtx_lock(&p->pcb_mtx); if (p->state == NG_BTSOCKET_SCO_CONNECTING && bcmp(src, &p->src, sizeof(p->src)) == 0 && bcmp(dst, &p->dst, sizeof(p->dst)) == 0) return (p); /* return with locked pcb */ mtx_unlock(&p->pcb_mtx); } return (NULL); } /* ng_btsocket_sco_pcb_by_addrs */ /* * Set timeout on socket */ static void ng_btsocket_sco_timeout(ng_btsocket_sco_pcb_p pcb) { mtx_assert(&pcb->pcb_mtx, MA_OWNED); if (!(pcb->flags & NG_BTSOCKET_SCO_TIMO)) { pcb->flags |= NG_BTSOCKET_SCO_TIMO; callout_reset(&pcb->timo, bluetooth_sco_rtx_timeout(), ng_btsocket_sco_process_timeout, pcb); } else KASSERT(0, ("%s: Duplicated socket timeout?!\n", __func__)); } /* ng_btsocket_sco_timeout */ /* * Unset timeout on socket */ static void ng_btsocket_sco_untimeout(ng_btsocket_sco_pcb_p pcb) { mtx_assert(&pcb->pcb_mtx, MA_OWNED); if (pcb->flags & NG_BTSOCKET_SCO_TIMO) { callout_stop(&pcb->timo); pcb->flags &= ~NG_BTSOCKET_SCO_TIMO; } else KASSERT(0, ("%s: No socket timeout?!\n", __func__)); } /* ng_btsocket_sco_untimeout */ /* * Process timeout on socket */ static void ng_btsocket_sco_process_timeout(void *xpcb) { ng_btsocket_sco_pcb_p pcb = (ng_btsocket_sco_pcb_p) xpcb; mtx_lock(&pcb->pcb_mtx); pcb->flags &= ~NG_BTSOCKET_SCO_TIMO; pcb->so->so_error = ETIMEDOUT; switch (pcb->state) { case NG_BTSOCKET_SCO_CONNECTING: /* Connect timeout - close the socket */ pcb->state = NG_BTSOCKET_SCO_CLOSED; soisdisconnected(pcb->so); break; case NG_BTSOCKET_SCO_OPEN: /* Send timeout - did not get NGM_HCI_SYNC_CON_QUEUE */ sbdroprecord(&pcb->so->so_snd); sowwakeup(pcb->so); /* XXX FIXME what to do with pcb->rt->pending??? */ break; case NG_BTSOCKET_SCO_DISCONNECTING: /* Disconnect timeout - disconnect the socket anyway */ pcb->state = NG_BTSOCKET_SCO_CLOSED; soisdisconnected(pcb->so); break; default: NG_BTSOCKET_SCO_ERR( "%s: Invalid socket state=%d\n", __func__, pcb->state); break; } mtx_unlock(&pcb->pcb_mtx); } /* ng_btsocket_sco_process_timeout */ diff --git a/sys/netgraph/ng_ksocket.c b/sys/netgraph/ng_ksocket.c index 777f3261356d..23a53528833f 100644 --- a/sys/netgraph/ng_ksocket.c +++ b/sys/netgraph/ng_ksocket.c @@ -1,1304 +1,1300 @@ /* * ng_ksocket.c */ /*- * Copyright (c) 1996-1999 Whistle Communications, Inc. * All rights reserved. * * Subject to the following obligations and disclaimer of warranty, use and * redistribution of this software, in source or object code forms, with or * without modifications are expressly permitted by Whistle Communications; * provided, however, that: * 1. Any and all reproductions of the source or object code must include the * copyright notice above and the following disclaimer of warranties; and * 2. No rights are granted, in any manner or form, to use Whistle * Communications, Inc. trademarks, including the mark "WHISTLE * COMMUNICATIONS" on advertising, endorsements, or otherwise except as * such appears in the above copyright notice or in the software. * * THIS SOFTWARE IS BEING PROVIDED BY WHISTLE COMMUNICATIONS "AS IS", AND * TO THE MAXIMUM EXTENT PERMITTED BY LAW, WHISTLE COMMUNICATIONS MAKES NO * REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED, REGARDING THIS SOFTWARE, * INCLUDING WITHOUT LIMITATION, ANY AND ALL IMPLIED WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT. * WHISTLE COMMUNICATIONS DOES NOT WARRANT, GUARANTEE, OR MAKE ANY * REPRESENTATIONS REGARDING THE USE OF, OR THE RESULTS OF THE USE OF THIS * SOFTWARE IN TERMS OF ITS CORRECTNESS, ACCURACY, RELIABILITY OR OTHERWISE. * IN NO EVENT SHALL WHISTLE COMMUNICATIONS BE LIABLE FOR ANY DAMAGES * RESULTING FROM OR ARISING OUT OF ANY USE OF THIS SOFTWARE, INCLUDING * WITHOUT LIMITATION, ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, * PUNITIVE, OR CONSEQUENTIAL DAMAGES, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES, LOSS OF USE, DATA OR PROFITS, HOWEVER CAUSED AND UNDER ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF WHISTLE COMMUNICATIONS IS ADVISED OF THE POSSIBILITY * OF SUCH DAMAGE. * * Author: Archie Cobbs * $Whistle: ng_ksocket.c,v 1.1 1999/11/16 20:04:40 archie Exp $ */ /* * Kernel socket node type. This node type is basically a kernel-mode * version of a socket... kindof like the reverse of the socket node type. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef NG_SEPARATE_MALLOC static MALLOC_DEFINE(M_NETGRAPH_KSOCKET, "netgraph_ksock", "netgraph ksock node"); #else #define M_NETGRAPH_KSOCKET M_NETGRAPH #endif #define OFFSETOF(s, e) ((char *)&((s *)0)->e - (char *)((s *)0)) #define SADATA_OFFSET (OFFSETOF(struct sockaddr, sa_data)) /* Node private data */ struct ng_ksocket_private { node_p node; hook_p hook; struct socket *so; int fn_sent; /* FN call on incoming event was sent */ LIST_HEAD(, ng_ksocket_private) embryos; LIST_ENTRY(ng_ksocket_private) siblings; u_int32_t flags; u_int32_t response_token; ng_ID_t response_addr; }; typedef struct ng_ksocket_private *priv_p; /* Flags for priv_p */ #define KSF_CONNECTING 0x00000001 /* Waiting for connection complete */ #define KSF_ACCEPTING 0x00000002 /* Waiting for accept complete */ #define KSF_EOFSEEN 0x00000004 /* Have sent 0-length EOF mbuf */ #define KSF_CLONED 0x00000008 /* Cloned from an accepting socket */ #define KSF_EMBRYONIC 0x00000010 /* Cloned node with no hooks yet */ /* Netgraph node methods */ static ng_constructor_t ng_ksocket_constructor; static ng_rcvmsg_t ng_ksocket_rcvmsg; static ng_shutdown_t ng_ksocket_shutdown; static ng_newhook_t ng_ksocket_newhook; static ng_rcvdata_t ng_ksocket_rcvdata; static ng_connect_t ng_ksocket_connect; static ng_disconnect_t ng_ksocket_disconnect; /* Alias structure */ struct ng_ksocket_alias { const char *name; const int value; const int family; }; /* Protocol family aliases */ static const struct ng_ksocket_alias ng_ksocket_families[] = { { "local", PF_LOCAL }, { "inet", PF_INET }, { "inet6", PF_INET6 }, { "atm", PF_ATM }, { "divert", PF_DIVERT }, { NULL, -1 }, }; /* Socket type aliases */ static const struct ng_ksocket_alias ng_ksocket_types[] = { { "stream", SOCK_STREAM }, { "dgram", SOCK_DGRAM }, { "raw", SOCK_RAW }, { "rdm", SOCK_RDM }, { "seqpacket", SOCK_SEQPACKET }, { NULL, -1 }, }; /* Protocol aliases */ static const struct ng_ksocket_alias ng_ksocket_protos[] = { { "ip", IPPROTO_IP, PF_INET }, { "raw", IPPROTO_RAW, PF_INET }, { "icmp", IPPROTO_ICMP, PF_INET }, { "igmp", IPPROTO_IGMP, PF_INET }, { "tcp", IPPROTO_TCP, PF_INET }, { "udp", IPPROTO_UDP, PF_INET }, { "gre", IPPROTO_GRE, PF_INET }, { "esp", IPPROTO_ESP, PF_INET }, { "ah", IPPROTO_AH, PF_INET }, { "swipe", IPPROTO_SWIPE, PF_INET }, { "encap", IPPROTO_ENCAP, PF_INET }, { "pim", IPPROTO_PIM, PF_INET }, { NULL, -1 }, }; /* Helper functions */ static int ng_ksocket_accept(priv_p); static int ng_ksocket_listen_upcall(struct socket *so, void *arg, int waitflag); static void ng_ksocket_listen_upcall2(node_p node, hook_p hook, void *arg1, int arg2); static int ng_ksocket_incoming(struct socket *so, void *arg, int waitflag); static int ng_ksocket_parse(const struct ng_ksocket_alias *aliases, const char *s, int family); static void ng_ksocket_incoming2(node_p node, hook_p hook, void *arg1, int arg2); /************************************************************************ STRUCT SOCKADDR PARSE TYPE ************************************************************************/ /* Get the length of the data portion of a generic struct sockaddr */ static int ng_parse_generic_sockdata_getLength(const struct ng_parse_type *type, const u_char *start, const u_char *buf) { const struct sockaddr *sa; sa = (const struct sockaddr *)(buf - SADATA_OFFSET); return (sa->sa_len < SADATA_OFFSET) ? 0 : sa->sa_len - SADATA_OFFSET; } /* Type for the variable length data portion of a generic struct sockaddr */ static const struct ng_parse_type ng_ksocket_generic_sockdata_type = { &ng_parse_bytearray_type, &ng_parse_generic_sockdata_getLength }; /* Type for a generic struct sockaddr */ static const struct ng_parse_struct_field ng_parse_generic_sockaddr_type_fields[] = { { "len", &ng_parse_uint8_type }, { "family", &ng_parse_uint8_type }, { "data", &ng_ksocket_generic_sockdata_type }, { NULL } }; static const struct ng_parse_type ng_ksocket_generic_sockaddr_type = { &ng_parse_struct_type, &ng_parse_generic_sockaddr_type_fields }; /* Convert a struct sockaddr from ASCII to binary. If its a protocol family that we specially handle, do that, otherwise defer to the generic parse type ng_ksocket_generic_sockaddr_type. */ static int ng_ksocket_sockaddr_parse(const struct ng_parse_type *type, const char *s, int *off, const u_char *const start, u_char *const buf, int *buflen) { struct sockaddr *const sa = (struct sockaddr *)buf; enum ng_parse_token tok; char fambuf[32]; int family, len; char *t; /* If next token is a left curly brace, use generic parse type */ if ((tok = ng_parse_get_token(s, off, &len)) == T_LBRACE) { return (*ng_ksocket_generic_sockaddr_type.supertype->parse) (&ng_ksocket_generic_sockaddr_type, s, off, start, buf, buflen); } /* Get socket address family followed by a slash */ while (isspace(s[*off])) (*off)++; if ((t = strchr(s + *off, '/')) == NULL) return (EINVAL); if ((len = t - (s + *off)) > sizeof(fambuf) - 1) return (EINVAL); strncpy(fambuf, s + *off, len); fambuf[len] = '\0'; *off += len + 1; if ((family = ng_ksocket_parse(ng_ksocket_families, fambuf, 0)) == -1) return (EINVAL); /* Set family */ if (*buflen < SADATA_OFFSET) return (ERANGE); sa->sa_family = family; /* Set family-specific data and length */ switch (sa->sa_family) { case PF_LOCAL: /* Get pathname */ { const int pathoff = OFFSETOF(struct sockaddr_un, sun_path); struct sockaddr_un *const sun = (struct sockaddr_un *)sa; int toklen, pathlen; char *path; if ((path = ng_get_string_token(s, off, &toklen, NULL)) == NULL) return (EINVAL); pathlen = strlen(path); if (pathlen > SOCK_MAXADDRLEN) { free(path, M_NETGRAPH_KSOCKET); return (E2BIG); } if (*buflen < pathoff + pathlen) { free(path, M_NETGRAPH_KSOCKET); return (ERANGE); } *off += toklen; bcopy(path, sun->sun_path, pathlen); sun->sun_len = pathoff + pathlen; free(path, M_NETGRAPH_KSOCKET); break; } case PF_INET: /* Get an IP address with optional port */ { struct sockaddr_in *const sin = (struct sockaddr_in *)sa; int i; /* Parse this: [:port] */ for (i = 0; i < 4; i++) { u_long val; char *eptr; val = strtoul(s + *off, &eptr, 10); if (val > 0xff || eptr == s + *off) return (EINVAL); *off += (eptr - (s + *off)); ((u_char *)&sin->sin_addr)[i] = (u_char)val; if (i < 3) { if (s[*off] != '.') return (EINVAL); (*off)++; } else if (s[*off] == ':') { (*off)++; val = strtoul(s + *off, &eptr, 10); if (val > 0xffff || eptr == s + *off) return (EINVAL); *off += (eptr - (s + *off)); sin->sin_port = htons(val); } else sin->sin_port = 0; } bzero(&sin->sin_zero, sizeof(sin->sin_zero)); sin->sin_len = sizeof(*sin); break; } #if 0 case PF_INET6: /* XXX implement this someday */ #endif default: return (EINVAL); } /* Done */ *buflen = sa->sa_len; return (0); } /* Convert a struct sockaddr from binary to ASCII */ static int ng_ksocket_sockaddr_unparse(const struct ng_parse_type *type, const u_char *data, int *off, char *cbuf, int cbuflen) { const struct sockaddr *sa = (const struct sockaddr *)(data + *off); int slen = 0; /* Output socket address, either in special or generic format */ switch (sa->sa_family) { case PF_LOCAL: { const int pathoff = OFFSETOF(struct sockaddr_un, sun_path); const struct sockaddr_un *sun = (const struct sockaddr_un *)sa; const int pathlen = sun->sun_len - pathoff; char pathbuf[SOCK_MAXADDRLEN + 1]; char *pathtoken; bcopy(sun->sun_path, pathbuf, pathlen); if ((pathtoken = ng_encode_string(pathbuf, pathlen)) == NULL) return (ENOMEM); slen += snprintf(cbuf, cbuflen, "local/%s", pathtoken); free(pathtoken, M_NETGRAPH_KSOCKET); if (slen >= cbuflen) return (ERANGE); *off += sun->sun_len; return (0); } case PF_INET: { const struct sockaddr_in *sin = (const struct sockaddr_in *)sa; slen += snprintf(cbuf, cbuflen, "inet/%d.%d.%d.%d", ((const u_char *)&sin->sin_addr)[0], ((const u_char *)&sin->sin_addr)[1], ((const u_char *)&sin->sin_addr)[2], ((const u_char *)&sin->sin_addr)[3]); if (sin->sin_port != 0) { slen += snprintf(cbuf + strlen(cbuf), cbuflen - strlen(cbuf), ":%d", (u_int)ntohs(sin->sin_port)); } if (slen >= cbuflen) return (ERANGE); *off += sizeof(*sin); return(0); } #if 0 case PF_INET6: /* XXX implement this someday */ #endif default: return (*ng_ksocket_generic_sockaddr_type.supertype->unparse) (&ng_ksocket_generic_sockaddr_type, data, off, cbuf, cbuflen); } } /* Parse type for struct sockaddr */ static const struct ng_parse_type ng_ksocket_sockaddr_type = { NULL, NULL, NULL, &ng_ksocket_sockaddr_parse, &ng_ksocket_sockaddr_unparse, NULL /* no such thing as a default struct sockaddr */ }; /************************************************************************ STRUCT NG_KSOCKET_SOCKOPT PARSE TYPE ************************************************************************/ /* Get length of the struct ng_ksocket_sockopt value field, which is the just the excess of the message argument portion over the length of the struct ng_ksocket_sockopt. */ static int ng_parse_sockoptval_getLength(const struct ng_parse_type *type, const u_char *start, const u_char *buf) { static const int offset = OFFSETOF(struct ng_ksocket_sockopt, value); const struct ng_ksocket_sockopt *sopt; const struct ng_mesg *msg; sopt = (const struct ng_ksocket_sockopt *)(buf - offset); msg = (const struct ng_mesg *)((const u_char *)sopt - sizeof(*msg)); return msg->header.arglen - sizeof(*sopt); } /* Parse type for the option value part of a struct ng_ksocket_sockopt XXX Eventually, we should handle the different socket options specially. XXX This would avoid byte order problems, eg an integer value of 1 is XXX going to be "[1]" for little endian or "[3=1]" for big endian. */ static const struct ng_parse_type ng_ksocket_sockoptval_type = { &ng_parse_bytearray_type, &ng_parse_sockoptval_getLength }; /* Parse type for struct ng_ksocket_sockopt */ static const struct ng_parse_struct_field ng_ksocket_sockopt_type_fields[] = NG_KSOCKET_SOCKOPT_INFO(&ng_ksocket_sockoptval_type); static const struct ng_parse_type ng_ksocket_sockopt_type = { &ng_parse_struct_type, &ng_ksocket_sockopt_type_fields }; /* Parse type for struct ng_ksocket_accept */ static const struct ng_parse_struct_field ng_ksocket_accept_type_fields[] = NGM_KSOCKET_ACCEPT_INFO; static const struct ng_parse_type ng_ksocket_accept_type = { &ng_parse_struct_type, &ng_ksocket_accept_type_fields }; /* List of commands and how to convert arguments to/from ASCII */ static const struct ng_cmdlist ng_ksocket_cmds[] = { { NGM_KSOCKET_COOKIE, NGM_KSOCKET_BIND, "bind", &ng_ksocket_sockaddr_type, NULL }, { NGM_KSOCKET_COOKIE, NGM_KSOCKET_LISTEN, "listen", &ng_parse_int32_type, NULL }, { NGM_KSOCKET_COOKIE, NGM_KSOCKET_ACCEPT, "accept", NULL, &ng_ksocket_accept_type }, { NGM_KSOCKET_COOKIE, NGM_KSOCKET_CONNECT, "connect", &ng_ksocket_sockaddr_type, &ng_parse_int32_type }, { NGM_KSOCKET_COOKIE, NGM_KSOCKET_GETNAME, "getname", NULL, &ng_ksocket_sockaddr_type }, { NGM_KSOCKET_COOKIE, NGM_KSOCKET_GETPEERNAME, "getpeername", NULL, &ng_ksocket_sockaddr_type }, { NGM_KSOCKET_COOKIE, NGM_KSOCKET_SETOPT, "setopt", &ng_ksocket_sockopt_type, NULL }, { NGM_KSOCKET_COOKIE, NGM_KSOCKET_GETOPT, "getopt", &ng_ksocket_sockopt_type, &ng_ksocket_sockopt_type }, { 0 } }; /* Node type descriptor */ static struct ng_type ng_ksocket_typestruct = { .version = NG_ABI_VERSION, .name = NG_KSOCKET_NODE_TYPE, .constructor = ng_ksocket_constructor, .rcvmsg = ng_ksocket_rcvmsg, .shutdown = ng_ksocket_shutdown, .newhook = ng_ksocket_newhook, .connect = ng_ksocket_connect, .rcvdata = ng_ksocket_rcvdata, .disconnect = ng_ksocket_disconnect, .cmdlist = ng_ksocket_cmds, }; NETGRAPH_INIT(ksocket, &ng_ksocket_typestruct); #define ERROUT(x) do { error = (x); goto done; } while (0) /************************************************************************ NETGRAPH NODE STUFF ************************************************************************/ /* * Node type constructor * The NODE part is assumed to be all set up. * There is already a reference to the node for us. */ static int ng_ksocket_constructor(node_p node) { priv_p priv; /* Allocate private structure */ priv = malloc(sizeof(*priv), M_NETGRAPH_KSOCKET, M_NOWAIT | M_ZERO); if (priv == NULL) return (ENOMEM); LIST_INIT(&priv->embryos); /* cross link them */ priv->node = node; NG_NODE_SET_PRIVATE(node, priv); /* Done */ return (0); } /* * Give our OK for a hook to be added. The hook name is of the * form "//" where the three components may * be decimal numbers or else aliases from the above lists. * * Connecting a hook amounts to opening the socket. Disconnecting * the hook closes the socket and destroys the node as well. */ static int ng_ksocket_newhook(node_p node, hook_p hook, const char *name0) { struct thread *td = curthread; /* XXX broken */ const priv_p priv = NG_NODE_PRIVATE(node); char *s1, *s2, name[NG_HOOKSIZ]; int family, type, protocol, error; /* Check if we're already connected */ if (priv->hook != NULL) return (EISCONN); if (priv->flags & KSF_CLONED) { if (priv->flags & KSF_EMBRYONIC) { /* Remove ourselves from our parent's embryo list */ LIST_REMOVE(priv, siblings); priv->flags &= ~KSF_EMBRYONIC; } } else { /* Extract family, type, and protocol from hook name */ snprintf(name, sizeof(name), "%s", name0); s1 = name; if ((s2 = strchr(s1, '/')) == NULL) return (EINVAL); *s2++ = '\0'; family = ng_ksocket_parse(ng_ksocket_families, s1, 0); if (family == -1) return (EINVAL); s1 = s2; if ((s2 = strchr(s1, '/')) == NULL) return (EINVAL); *s2++ = '\0'; type = ng_ksocket_parse(ng_ksocket_types, s1, 0); if (type == -1) return (EINVAL); s1 = s2; protocol = ng_ksocket_parse(ng_ksocket_protos, s1, family); if (protocol == -1) return (EINVAL); /* Create the socket */ error = socreate(family, &priv->so, type, protocol, td->td_ucred, td); if (error != 0) return (error); /* XXX call soreserve() ? */ } /* OK */ priv->hook = hook; /* * In case of misconfigured routing a packet may reenter * ksocket node recursively. Decouple stack to avoid possible * panics about sleeping with locks held. */ NG_HOOK_FORCE_QUEUE(hook); return(0); } static int ng_ksocket_connect(hook_p hook) { node_p node = NG_HOOK_NODE(hook); const priv_p priv = NG_NODE_PRIVATE(node); struct socket *const so = priv->so; /* Add our hook for incoming data and other events */ SOCKBUF_LOCK(&priv->so->so_rcv); soupcall_set(priv->so, SO_RCV, ng_ksocket_incoming, node); SOCKBUF_UNLOCK(&priv->so->so_rcv); SOCKBUF_LOCK(&priv->so->so_snd); soupcall_set(priv->so, SO_SND, ng_ksocket_incoming, node); SOCKBUF_UNLOCK(&priv->so->so_snd); SOCK_LOCK(priv->so); priv->so->so_state |= SS_NBIO; SOCK_UNLOCK(priv->so); /* * --Original comment-- * On a cloned socket we may have already received one or more * upcalls which we couldn't handle without a hook. Handle * those now. * We cannot call the upcall function directly * from here, because until this function has returned our * hook isn't connected. * * ---meta comment for -current --- * XXX This is dubius. * Upcalls between the time that the hook was * first created and now (on another processesor) will * be earlier on the queue than the request to finalise the hook. * By the time the hook is finalised, * The queued upcalls will have happened and the code * will have discarded them because of a lack of a hook. * (socket not open). * * This is a bad byproduct of the complicated way in which hooks * are now created (3 daisy chained async events). * * Since we are a netgraph operation * We know that we hold a lock on this node. This forces the * request we make below to be queued rather than implemented * immediately which will cause the upcall function to be called a bit * later. * However, as we will run any waiting queued operations immediately * after doing this one, if we have not finalised the other end * of the hook, those queued operations will fail. */ if (priv->flags & KSF_CLONED) { ng_send_fn(node, NULL, &ng_ksocket_incoming2, so, M_NOWAIT); } return (0); } /* * Receive a control message */ static int ng_ksocket_rcvmsg(node_p node, item_p item, hook_p lasthook) { struct thread *td = curthread; /* XXX broken */ const priv_p priv = NG_NODE_PRIVATE(node); struct socket *const so = priv->so; struct ng_mesg *resp = NULL; int error = 0; struct ng_mesg *msg; NGI_GET_MSG(item, msg); switch (msg->header.typecookie) { case NGM_KSOCKET_COOKIE: switch (msg->header.cmd) { case NGM_KSOCKET_BIND: { struct sockaddr *const sa = (struct sockaddr *)msg->data; /* Sanity check */ if (msg->header.arglen < SADATA_OFFSET || msg->header.arglen < sa->sa_len) ERROUT(EINVAL); if (so == NULL) ERROUT(ENXIO); /* Bind */ error = sobind(so, sa, td); break; } case NGM_KSOCKET_LISTEN: { /* Sanity check */ if (msg->header.arglen != sizeof(int32_t)) ERROUT(EINVAL); if (so == NULL) ERROUT(ENXIO); /* Listen */ so->so_state |= SS_NBIO; error = solisten(so, *((int32_t *)msg->data), td); if (error == 0) { SOLISTEN_LOCK(so); solisten_upcall_set(so, ng_ksocket_listen_upcall, priv); SOLISTEN_UNLOCK(so); } break; } case NGM_KSOCKET_ACCEPT: { /* Sanity check */ if (msg->header.arglen != 0) ERROUT(EINVAL); if (so == NULL) ERROUT(ENXIO); /* Make sure the socket is capable of accepting */ if (!(so->so_options & SO_ACCEPTCONN)) ERROUT(EINVAL); if (priv->flags & KSF_ACCEPTING) ERROUT(EALREADY); /* * If a connection is already complete, take it. * Otherwise let the upcall function deal with * the connection when it comes in. Don't return * EWOULDBLOCK, per ng_ksocket(4) documentation. */ error = ng_ksocket_accept(priv); if (error == EWOULDBLOCK) error = 0; if (error != 0) ERROUT(error); priv->response_token = msg->header.token; priv->response_addr = NGI_RETADDR(item); break; } case NGM_KSOCKET_CONNECT: { struct sockaddr *const sa = (struct sockaddr *)msg->data; /* Sanity check */ if (msg->header.arglen < SADATA_OFFSET || msg->header.arglen < sa->sa_len) ERROUT(EINVAL); if (so == NULL) ERROUT(ENXIO); /* Do connect */ if ((so->so_state & SS_ISCONNECTING) != 0) ERROUT(EALREADY); if ((error = soconnect(so, sa, td)) != 0) { so->so_state &= ~SS_ISCONNECTING; ERROUT(error); } if ((so->so_state & SS_ISCONNECTING) != 0) { /* We will notify the sender when we connect */ priv->response_token = msg->header.token; priv->response_addr = NGI_RETADDR(item); priv->flags |= KSF_CONNECTING; ERROUT(EINPROGRESS); } break; } case NGM_KSOCKET_GETNAME: case NGM_KSOCKET_GETPEERNAME: { int (*func)(struct socket *so, struct sockaddr **nam); struct sockaddr *sa = NULL; int len; /* Sanity check */ if (msg->header.arglen != 0) ERROUT(EINVAL); if (so == NULL) ERROUT(ENXIO); /* Get function */ if (msg->header.cmd == NGM_KSOCKET_GETPEERNAME) { if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) ERROUT(ENOTCONN); func = so->so_proto->pr_peeraddr; } else func = so->so_proto->pr_sockaddr; /* Get local or peer address */ if ((error = (*func)(so, &sa)) != 0) goto bail; len = (sa == NULL) ? 0 : sa->sa_len; /* Send it back in a response */ NG_MKRESPONSE(resp, msg, len, M_NOWAIT); if (resp == NULL) { error = ENOMEM; goto bail; } bcopy(sa, resp->data, len); bail: /* Cleanup */ if (sa != NULL) free(sa, M_SONAME); break; } case NGM_KSOCKET_GETOPT: { struct ng_ksocket_sockopt *ksopt = (struct ng_ksocket_sockopt *)msg->data; struct sockopt sopt; /* Sanity check */ if (msg->header.arglen != sizeof(*ksopt)) ERROUT(EINVAL); if (so == NULL) ERROUT(ENXIO); /* Get response with room for option value */ NG_MKRESPONSE(resp, msg, sizeof(*ksopt) + NG_KSOCKET_MAX_OPTLEN, M_NOWAIT); if (resp == NULL) ERROUT(ENOMEM); /* Get socket option, and put value in the response */ sopt.sopt_dir = SOPT_GET; sopt.sopt_level = ksopt->level; sopt.sopt_name = ksopt->name; sopt.sopt_td = NULL; sopt.sopt_valsize = NG_KSOCKET_MAX_OPTLEN; ksopt = (struct ng_ksocket_sockopt *)resp->data; sopt.sopt_val = ksopt->value; if ((error = sogetopt(so, &sopt)) != 0) { NG_FREE_MSG(resp); break; } /* Set actual value length */ resp->header.arglen = sizeof(*ksopt) + sopt.sopt_valsize; break; } case NGM_KSOCKET_SETOPT: { struct ng_ksocket_sockopt *const ksopt = (struct ng_ksocket_sockopt *)msg->data; const int valsize = msg->header.arglen - sizeof(*ksopt); struct sockopt sopt; /* Sanity check */ if (valsize < 0) ERROUT(EINVAL); if (so == NULL) ERROUT(ENXIO); /* Set socket option */ sopt.sopt_dir = SOPT_SET; sopt.sopt_level = ksopt->level; sopt.sopt_name = ksopt->name; sopt.sopt_val = ksopt->value; sopt.sopt_valsize = valsize; sopt.sopt_td = NULL; error = sosetopt(so, &sopt); break; } default: error = EINVAL; break; } break; default: error = EINVAL; break; } done: NG_RESPOND_MSG(error, node, item, resp); NG_FREE_MSG(msg); return (error); } /* * Receive incoming data on our hook. Send it out the socket. */ static int ng_ksocket_rcvdata(hook_p hook, item_p item) { struct thread *td = curthread; /* XXX broken */ const node_p node = NG_HOOK_NODE(hook); const priv_p priv = NG_NODE_PRIVATE(node); struct socket *const so = priv->so; struct sockaddr *sa = NULL; int error; struct mbuf *m; #ifdef ALIGNED_POINTER struct mbuf *n; #endif /* ALIGNED_POINTER */ struct sa_tag *stag; /* Extract data */ NGI_GET_M(item, m); NG_FREE_ITEM(item); #ifdef ALIGNED_POINTER if (!ALIGNED_POINTER(mtod(m, caddr_t), uint32_t)) { n = m_defrag(m, M_NOWAIT); if (n == NULL) { m_freem(m); return (ENOBUFS); } m = n; } #endif /* ALIGNED_POINTER */ /* * Look if socket address is stored in packet tags. * If sockaddr is ours, or provided by a third party (zero id), * then we accept it. */ if (((stag = (struct sa_tag *)m_tag_locate(m, NGM_KSOCKET_COOKIE, NG_KSOCKET_TAG_SOCKADDR, NULL)) != NULL) && (stag->id == NG_NODE_ID(node) || stag->id == 0)) sa = &stag->sa; /* Reset specific mbuf flags to prevent addressing problems. */ m->m_flags &= ~(M_BCAST|M_MCAST); /* Send packet */ error = sosend(so, sa, 0, m, 0, 0, td); return (error); } /* * Destroy node */ static int ng_ksocket_shutdown(node_p node) { const priv_p priv = NG_NODE_PRIVATE(node); struct socket *so = priv->so; priv_p embryo; /* Close our socket (if any) */ if (priv->so != NULL) { if (SOLISTENING(so)) { SOLISTEN_LOCK(so); solisten_upcall_set(so, NULL, NULL); SOLISTEN_UNLOCK(so); } else { SOCK_RECVBUF_LOCK(so); soupcall_clear(so, SO_RCV); SOCK_RECVBUF_UNLOCK(so); SOCK_SENDBUF_LOCK(so); soupcall_clear(so, SO_SND); SOCK_SENDBUF_UNLOCK(so); } soclose(so); priv->so = NULL; } /* If we are an embryo, take ourselves out of the parent's list */ if (priv->flags & KSF_EMBRYONIC) { LIST_REMOVE(priv, siblings); priv->flags &= ~KSF_EMBRYONIC; } /* Remove any embryonic children we have */ while (!LIST_EMPTY(&priv->embryos)) { embryo = LIST_FIRST(&priv->embryos); ng_rmnode_self(embryo->node); } /* Take down netgraph node */ bzero(priv, sizeof(*priv)); free(priv, M_NETGRAPH_KSOCKET); NG_NODE_SET_PRIVATE(node, NULL); NG_NODE_UNREF(node); /* let the node escape */ return (0); } /* * Hook disconnection */ static int ng_ksocket_disconnect(hook_p hook) { KASSERT(NG_NODE_NUMHOOKS(NG_HOOK_NODE(hook)) == 0, ("%s: numhooks=%d?", __func__, NG_NODE_NUMHOOKS(NG_HOOK_NODE(hook)))); if (NG_NODE_IS_VALID(NG_HOOK_NODE(hook))) ng_rmnode_self(NG_HOOK_NODE(hook)); return (0); } /************************************************************************ HELPER STUFF ************************************************************************/ /* * You should not "just call" a netgraph node function from an external * asynchronous event. This is because in doing so you are ignoring the * locking on the netgraph nodes. Instead call your function via ng_send_fn(). * This will call the function you chose, but will first do all the * locking rigmarole. Your function MAY only be called at some distant future * time (several millisecs away) so don't give it any arguments * that may be revoked soon (e.g. on your stack). * * To decouple stack, we use queue version of ng_send_fn(). */ static int ng_ksocket_incoming(struct socket *so, void *arg, int waitflag) { const node_p node = arg; const priv_p priv = NG_NODE_PRIVATE(node); int wait = ((waitflag & M_WAITOK) ? NG_WAITOK : 0) | NG_QUEUE; /* * Even if node is not locked, as soon as we are called, we assume * it exist and it's private area is valid. With some care we can * access it. Mark node that incoming event for it was sent to * avoid unneded queue trashing. */ if (atomic_cmpset_int(&priv->fn_sent, 0, 1) && ng_send_fn1(node, NULL, &ng_ksocket_incoming2, so, 0, wait)) { atomic_store_rel_int(&priv->fn_sent, 0); } return (SU_OK); } /* * When incoming data is appended to the socket, we get notified here. * This is also called whenever a significant event occurs for the socket. * Our original caller may have queued this even some time ago and * we cannot trust that he even still exists. The node however is being * held with a reference by the queueing code and guarantied to be valid. */ static void ng_ksocket_incoming2(node_p node, hook_p hook, void *arg1, int arg2) { struct socket *so = arg1; const priv_p priv = NG_NODE_PRIVATE(node); struct ng_mesg *response; int error; KASSERT(so == priv->so, ("%s: wrong socket", __func__)); /* Allow next incoming event to be queued. */ atomic_store_rel_int(&priv->fn_sent, 0); /* Check whether a pending connect operation has completed */ if (priv->flags & KSF_CONNECTING) { if ((error = so->so_error) != 0) { so->so_error = 0; so->so_state &= ~SS_ISCONNECTING; } if (!(so->so_state & SS_ISCONNECTING)) { NG_MKMESSAGE(response, NGM_KSOCKET_COOKIE, NGM_KSOCKET_CONNECT, sizeof(int32_t), M_NOWAIT); if (response != NULL) { response->header.flags |= NGF_RESP; response->header.token = priv->response_token; *(int32_t *)response->data = error; /* * send an async "response" message * to the node that set us up * (if it still exists) */ NG_SEND_MSG_ID(error, node, response, priv->response_addr, 0); } priv->flags &= ~KSF_CONNECTING; } } /* * If we don't have a hook, we must handle data events later. When * the hook gets created and is connected, this upcall function * will be called again. */ if (priv->hook == NULL) return; /* Read and forward available mbufs. */ while (1) { struct uio uio; struct sockaddr *sa; struct mbuf *m; int flags; /* Try to get next packet from socket. */ uio.uio_td = NULL; uio.uio_resid = IP_MAXPACKET; flags = MSG_DONTWAIT; sa = NULL; if ((error = soreceive(so, (so->so_state & SS_ISCONNECTED) ? NULL : &sa, &uio, &m, NULL, &flags)) != 0) break; /* See if we got anything. */ if (flags & MSG_TRUNC) { m_freem(m); m = NULL; } if (m == NULL) { if (sa != NULL) free(sa, M_SONAME); break; } KASSERT(m->m_nextpkt == NULL, ("%s: nextpkt", __func__)); /* * Stream sockets do not have packet boundaries, so * we have to allocate a header mbuf and attach the * stream of data to it. */ if (so->so_type == SOCK_STREAM) { struct mbuf *mh; mh = m_gethdr(M_NOWAIT, MT_DATA); if (mh == NULL) { m_freem(m); if (sa != NULL) free(sa, M_SONAME); break; } mh->m_next = m; for (; m; m = m->m_next) mh->m_pkthdr.len += m->m_len; m = mh; } /* Put peer's socket address (if any) into a tag */ if (sa != NULL) { struct sa_tag *stag; stag = (struct sa_tag *)m_tag_alloc(NGM_KSOCKET_COOKIE, NG_KSOCKET_TAG_SOCKADDR, sizeof(ng_ID_t) + sa->sa_len, M_NOWAIT); if (stag == NULL) { free(sa, M_SONAME); goto sendit; } bcopy(sa, &stag->sa, sa->sa_len); free(sa, M_SONAME); stag->id = NG_NODE_ID(node); m_tag_prepend(m, &stag->tag); } sendit: /* Forward data with optional peer sockaddr as packet tag */ NG_SEND_DATA_ONLY(error, priv->hook, m); } /* * If the peer has closed the connection, forward a 0-length mbuf * to indicate end-of-file. */ if (so->so_rcv.sb_state & SBS_CANTRCVMORE && !(priv->flags & KSF_EOFSEEN)) { struct mbuf *m; m = m_gethdr(M_NOWAIT, MT_DATA); if (m != NULL) NG_SEND_DATA_ONLY(error, priv->hook, m); priv->flags |= KSF_EOFSEEN; } } static int ng_ksocket_accept(priv_p priv) { struct socket *const head = priv->so; struct socket *so; - struct sockaddr *sa = NULL; + struct sockaddr_storage ss = { .ss_len = sizeof(ss) }; struct ng_mesg *resp; struct ng_ksocket_accept *resp_data; node_p node; priv_p priv2; int len; int error; SOLISTEN_LOCK(head); error = solisten_dequeue(head, &so, SOCK_NONBLOCK); if (error == EWOULDBLOCK) { priv->flags |= KSF_ACCEPTING; return (error); } priv->flags &= ~KSF_ACCEPTING; if (error) return (error); - if ((error = soaccept(so, &sa)) != 0) + if ((error = soaccept(so, (struct sockaddr *)&ss)) != 0) return (error); len = OFFSETOF(struct ng_ksocket_accept, addr); - if (sa != NULL) - len += sa->sa_len; + len += ss.ss_len; NG_MKMESSAGE(resp, NGM_KSOCKET_COOKIE, NGM_KSOCKET_ACCEPT, len, M_NOWAIT); if (resp == NULL) { soclose(so); goto out; } resp->header.flags |= NGF_RESP; resp->header.token = priv->response_token; /* Clone a ksocket node to wrap the new socket */ error = ng_make_node_common(&ng_ksocket_typestruct, &node); if (error) { free(resp, M_NETGRAPH); soclose(so); goto out; } if (ng_ksocket_constructor(node) != 0) { NG_NODE_UNREF(node); free(resp, M_NETGRAPH); soclose(so); goto out; } priv2 = NG_NODE_PRIVATE(node); priv2->so = so; priv2->flags |= KSF_CLONED | KSF_EMBRYONIC; /* * Insert the cloned node into a list of embryonic children * on the parent node. When a hook is created on the cloned * node it will be removed from this list. When the parent * is destroyed it will destroy any embryonic children it has. */ LIST_INSERT_HEAD(&priv->embryos, priv2, siblings); SOCKBUF_LOCK(&so->so_rcv); soupcall_set(so, SO_RCV, ng_ksocket_incoming, node); SOCKBUF_UNLOCK(&so->so_rcv); SOCKBUF_LOCK(&so->so_snd); soupcall_set(so, SO_SND, ng_ksocket_incoming, node); SOCKBUF_UNLOCK(&so->so_snd); /* Fill in the response data and send it or return it to the caller */ resp_data = (struct ng_ksocket_accept *)resp->data; resp_data->nodeid = NG_NODE_ID(node); - if (sa != NULL) - bcopy(sa, &resp_data->addr, sa->sa_len); + bcopy(&ss, &resp_data->addr, ss.ss_len); NG_SEND_MSG_ID(error, node, resp, priv->response_addr, 0); out: - if (sa != NULL) - free(sa, M_SONAME); return (0); } static int ng_ksocket_listen_upcall(struct socket *so, void *arg, int waitflag) { priv_p priv = arg; int wait = ((waitflag & M_WAITOK) ? NG_WAITOK : 0) | NG_QUEUE; ng_send_fn1(priv->node, NULL, &ng_ksocket_listen_upcall2, priv, 0, wait); return (SU_OK); } static void ng_ksocket_listen_upcall2(node_p node, hook_p hook, void *arg1, int arg2) { const priv_p priv = NG_NODE_PRIVATE(node); (void )ng_ksocket_accept(priv); } /* * Parse out either an integer value or an alias. */ static int ng_ksocket_parse(const struct ng_ksocket_alias *aliases, const char *s, int family) { int k, val; char *eptr; /* Try aliases */ for (k = 0; aliases[k].name != NULL; k++) { if (strcmp(s, aliases[k].name) == 0 && aliases[k].family == family) return aliases[k].value; } /* Try parsing as a number */ val = (int)strtoul(s, &eptr, 10); if (val < 0 || *eptr != '\0') return (-1); return (val); } diff --git a/sys/netinet/sctp_usrreq.c b/sys/netinet/sctp_usrreq.c index 29d63f989e79..8fb96db84f95 100644 --- a/sys/netinet/sctp_usrreq.c +++ b/sys/netinet/sctp_usrreq.c @@ -1,7571 +1,7557 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved. * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #ifdef INET6 #include #endif #include #include #include #include #include #include #include #include #include #include #include extern const struct sctp_cc_functions sctp_cc_functions[]; extern const struct sctp_ss_functions sctp_ss_functions[]; static void sctp_init(void *arg SCTP_UNUSED) { u_long sb_max_adj; /* Initialize and modify the sysctled variables */ sctp_init_sysctls(); if ((nmbclusters / 8) > SCTP_ASOC_MAX_CHUNKS_ON_QUEUE) SCTP_BASE_SYSCTL(sctp_max_chunks_on_queue) = (nmbclusters / 8); /* * Allow a user to take no more than 1/2 the number of clusters or * the SB_MAX, whichever is smaller, for the send window. */ sb_max_adj = (u_long)((u_quad_t)(SB_MAX) * MCLBYTES / (MSIZE + MCLBYTES)); SCTP_BASE_SYSCTL(sctp_sendspace) = min(sb_max_adj, (((uint32_t)nmbclusters / 2) * MCLBYTES)); /* * Now for the recv window, should we take the same amount? or * should I do 1/2 the SB_MAX instead in the SB_MAX min above. For * now I will just copy. */ SCTP_BASE_SYSCTL(sctp_recvspace) = SCTP_BASE_SYSCTL(sctp_sendspace); SCTP_BASE_VAR(first_time) = 0; SCTP_BASE_VAR(sctp_pcb_initialized) = 0; sctp_pcb_init(); #if defined(SCTP_PACKET_LOGGING) SCTP_BASE_VAR(packet_log_writers) = 0; SCTP_BASE_VAR(packet_log_end) = 0; memset(&SCTP_BASE_VAR(packet_log_buffer), 0, SCTP_PACKET_LOG_SIZE); #endif SCTP_BASE_VAR(eh_tag) = EVENTHANDLER_REGISTER(rt_addrmsg, sctp_addr_change_event_handler, NULL, EVENTHANDLER_PRI_FIRST); } VNET_SYSINIT(sctp_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, sctp_init, NULL); #ifdef VIMAGE static void sctp_finish(void *unused __unused) { EVENTHANDLER_DEREGISTER(rt_addrmsg, SCTP_BASE_VAR(eh_tag)); sctp_pcb_finish(); } VNET_SYSUNINIT(sctp, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, sctp_finish, NULL); #endif void sctp_pathmtu_adjustment(struct sctp_tcb *stcb, uint32_t mtu, bool resend) { struct sctp_association *asoc; struct sctp_tmit_chunk *chk; uint32_t overhead; asoc = &stcb->asoc; KASSERT(mtu < asoc->smallest_mtu, ("Currently only reducing association MTU %u supported (MTU %u)", asoc->smallest_mtu, mtu)); asoc->smallest_mtu = mtu; if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) { overhead = SCTP_MIN_OVERHEAD; } else { overhead = SCTP_MIN_V4_OVERHEAD; } if (asoc->idata_supported) { if (sctp_auth_is_required_chunk(SCTP_IDATA, asoc->peer_auth_chunks)) { overhead += sctp_get_auth_chunk_len(asoc->peer_hmac_id); } } else { if (sctp_auth_is_required_chunk(SCTP_DATA, asoc->peer_auth_chunks)) { overhead += sctp_get_auth_chunk_len(asoc->peer_hmac_id); } } KASSERT(overhead % 4 == 0, ("overhead (%u) not a multiple of 4", overhead)); TAILQ_FOREACH(chk, &asoc->send_queue, sctp_next) { if (((uint32_t)chk->send_size + overhead) > mtu) { chk->flags |= CHUNK_FLAGS_FRAGMENT_OK; } } TAILQ_FOREACH(chk, &asoc->sent_queue, sctp_next) { if (((uint32_t)chk->send_size + overhead) > mtu) { chk->flags |= CHUNK_FLAGS_FRAGMENT_OK; if (resend && chk->sent < SCTP_DATAGRAM_RESEND) { /* * If requested, mark the chunk for * immediate resend, since we sent it being * too big. */ sctp_flight_size_decrease(chk); sctp_total_flight_decrease(stcb, chk); chk->sent = SCTP_DATAGRAM_RESEND; sctp_ucount_incr(asoc->sent_queue_retran_cnt); chk->rec.data.doing_fast_retransmit = 0; if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FLIGHT_LOGGING_ENABLE) { sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_PMTU, chk->whoTo->flight_size, chk->book_size, (uint32_t)(uintptr_t)chk->whoTo, chk->rec.data.tsn); } /* Clear any time, so NO RTT is being done. */ if (chk->do_rtt == 1) { chk->do_rtt = 0; chk->whoTo->rto_needed = 1; } } } } } #ifdef INET void sctp_notify(struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sctp_nets *net, uint8_t icmp_type, uint8_t icmp_code, uint16_t ip_len, uint32_t next_mtu) { int timer_stopped; if (icmp_type != ICMP_UNREACH) { /* We only care about unreachable */ SCTP_TCB_UNLOCK(stcb); return; } if ((icmp_code == ICMP_UNREACH_NET) || (icmp_code == ICMP_UNREACH_HOST) || (icmp_code == ICMP_UNREACH_NET_UNKNOWN) || (icmp_code == ICMP_UNREACH_HOST_UNKNOWN) || (icmp_code == ICMP_UNREACH_ISOLATED) || (icmp_code == ICMP_UNREACH_NET_PROHIB) || (icmp_code == ICMP_UNREACH_HOST_PROHIB) || (icmp_code == ICMP_UNREACH_FILTER_PROHIB)) { /* Mark the net unreachable. */ if (net->dest_state & SCTP_ADDR_REACHABLE) { /* OK, that destination is NOT reachable. */ net->dest_state &= ~SCTP_ADDR_REACHABLE; net->dest_state &= ~SCTP_ADDR_PF; sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN, stcb, 0, (void *)net, SCTP_SO_NOT_LOCKED); } SCTP_TCB_UNLOCK(stcb); } else if ((icmp_code == ICMP_UNREACH_PROTOCOL) || (icmp_code == ICMP_UNREACH_PORT)) { /* Treat it like an ABORT. */ sctp_abort_notification(stcb, true, false, 0, NULL, SCTP_SO_NOT_LOCKED); (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_2); /* no need to unlock here, since the TCB is gone */ } else if (icmp_code == ICMP_UNREACH_NEEDFRAG) { if (net->dest_state & SCTP_ADDR_NO_PMTUD) { SCTP_TCB_UNLOCK(stcb); return; } /* Find the next (smaller) MTU */ if (next_mtu == 0) { /* * Old type router that does not tell us what the * next MTU is. Rats we will have to guess (in a * educated fashion of course). */ next_mtu = sctp_get_prev_mtu(ip_len); } /* Stop the PMTU timer. */ if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) { timer_stopped = 1; sctp_timer_stop(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_1); } else { timer_stopped = 0; } /* Update the path MTU. */ if (net->port) { next_mtu -= sizeof(struct udphdr); } if (net->mtu > next_mtu) { net->mtu = next_mtu; if (net->port) { sctp_hc_set_mtu(&net->ro._l_addr, inp->fibnum, next_mtu + sizeof(struct udphdr)); } else { sctp_hc_set_mtu(&net->ro._l_addr, inp->fibnum, next_mtu); } } /* Update the association MTU */ if (stcb->asoc.smallest_mtu > next_mtu) { sctp_pathmtu_adjustment(stcb, next_mtu, true); } /* Finally, start the PMTU timer if it was running before. */ if (timer_stopped) { sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net); } SCTP_TCB_UNLOCK(stcb); } else { SCTP_TCB_UNLOCK(stcb); } } void sctp_ctlinput(struct icmp *icmp) { struct ip *inner_ip, *outer_ip; struct sctphdr *sh; struct sctp_inpcb *inp; struct sctp_tcb *stcb; struct sctp_nets *net; struct sctp_init_chunk *ch; struct sockaddr_in src, dst; if (icmp_errmap(icmp) == 0) return; outer_ip = (struct ip *)((caddr_t)icmp - sizeof(struct ip)); inner_ip = &icmp->icmp_ip; sh = (struct sctphdr *)((caddr_t)inner_ip + (inner_ip->ip_hl << 2)); memset(&src, 0, sizeof(struct sockaddr_in)); src.sin_family = AF_INET; src.sin_len = sizeof(struct sockaddr_in); src.sin_port = sh->src_port; src.sin_addr = inner_ip->ip_src; memset(&dst, 0, sizeof(struct sockaddr_in)); dst.sin_family = AF_INET; dst.sin_len = sizeof(struct sockaddr_in); dst.sin_port = sh->dest_port; dst.sin_addr = inner_ip->ip_dst; /* * 'dst' holds the dest of the packet that failed to be sent. 'src' * holds our local endpoint address. Thus we reverse the dst and the * src in the lookup. */ inp = NULL; net = NULL; stcb = sctp_findassociation_addr_sa((struct sockaddr *)&dst, (struct sockaddr *)&src, &inp, &net, 1, SCTP_DEFAULT_VRFID); if ((stcb != NULL) && (net != NULL) && (inp != NULL)) { /* Check the verification tag */ if (ntohl(sh->v_tag) != 0) { /* * This must be the verification tag used for * sending out packets. We don't consider packets * reflecting the verification tag. */ if (ntohl(sh->v_tag) != stcb->asoc.peer_vtag) { SCTP_TCB_UNLOCK(stcb); return; } } else { if (ntohs(outer_ip->ip_len) >= sizeof(struct ip) + 8 + (inner_ip->ip_hl << 2) + 20) { /* * In this case we can check if we got an * INIT chunk and if the initiate tag * matches. */ ch = (struct sctp_init_chunk *)(sh + 1); if ((ch->ch.chunk_type != SCTP_INITIATION) || (ntohl(ch->init.initiate_tag) != stcb->asoc.my_vtag)) { SCTP_TCB_UNLOCK(stcb); return; } } else { SCTP_TCB_UNLOCK(stcb); return; } } sctp_notify(inp, stcb, net, icmp->icmp_type, icmp->icmp_code, ntohs(inner_ip->ip_len), (uint32_t)ntohs(icmp->icmp_nextmtu)); } else { if ((stcb == NULL) && (inp != NULL)) { /* reduce ref-count */ SCTP_INP_WLOCK(inp); SCTP_INP_DECR_REF(inp); SCTP_INP_WUNLOCK(inp); } if (stcb) { SCTP_TCB_UNLOCK(stcb); } } } #endif static int sctp_getcred(SYSCTL_HANDLER_ARGS) { struct xucred xuc; struct sockaddr_in addrs[2]; struct sctp_inpcb *inp; struct sctp_nets *net; struct sctp_tcb *stcb; int error; uint32_t vrf_id; /* FIX, for non-bsd is this right? */ vrf_id = SCTP_DEFAULT_VRFID; error = priv_check(req->td, PRIV_NETINET_GETCRED); if (error) return (error); error = SYSCTL_IN(req, addrs, sizeof(addrs)); if (error) return (error); stcb = sctp_findassociation_addr_sa(sintosa(&addrs[1]), sintosa(&addrs[0]), &inp, &net, 1, vrf_id); if (stcb == NULL || inp == NULL || inp->sctp_socket == NULL) { if ((inp != NULL) && (stcb == NULL)) { /* reduce ref-count */ SCTP_INP_WLOCK(inp); SCTP_INP_DECR_REF(inp); goto cred_can_cont; } SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT); error = ENOENT; goto out; } SCTP_TCB_UNLOCK(stcb); /* * We use the write lock here, only since in the error leg we need * it. If we used RLOCK, then we would have to * wlock/decr/unlock/rlock. Which in theory could create a hole. * Better to use higher wlock. */ SCTP_INP_WLOCK(inp); cred_can_cont: error = cr_canseesocket(req->td->td_ucred, inp->sctp_socket); if (error) { SCTP_INP_WUNLOCK(inp); goto out; } cru2x(inp->sctp_socket->so_cred, &xuc); SCTP_INP_WUNLOCK(inp); error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred)); out: return (error); } SYSCTL_PROC(_net_inet_sctp, OID_AUTO, getcred, CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_NEEDGIANT, 0, 0, sctp_getcred, "S,ucred", "Get the ucred of a SCTP connection"); void sctp_abort(struct socket *so) { struct epoch_tracker et; struct sctp_inpcb *inp; inp = (struct sctp_inpcb *)so->so_pcb; if (inp == NULL) { return; } SCTP_INP_WLOCK(inp); NET_EPOCH_ENTER(et); #ifdef SCTP_LOG_CLOSING sctp_log_closing(inp, NULL, 17); #endif if (((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0)) { inp->sctp_flags |= SCTP_PCB_FLAGS_SOCKET_GONE | SCTP_PCB_FLAGS_CLOSE_IP; #ifdef SCTP_LOG_CLOSING sctp_log_closing(inp, NULL, 16); #endif SCTP_INP_WUNLOCK(inp); sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_ABORT, SCTP_CALLED_AFTER_CMPSET_OFCLOSE); SOCK_LOCK(so); KASSERT(!SOLISTENING(so), ("sctp_abort: called on listening socket %p", so)); SCTP_SB_CLEAR(so->so_snd); SCTP_SB_CLEAR(so->so_rcv); /* Now null out the reference, we are completely detached. */ so->so_pcb = NULL; SOCK_UNLOCK(so); } else { SCTP_INP_WUNLOCK(inp); } NET_EPOCH_EXIT(et); } #ifdef INET static int sctp_attach(struct socket *so, int proto SCTP_UNUSED, struct thread *p SCTP_UNUSED) { struct sctp_inpcb *inp; struct inpcb *ip_inp; int error; uint32_t vrf_id = SCTP_DEFAULT_VRFID; inp = (struct sctp_inpcb *)so->so_pcb; if (inp != NULL) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); return (EINVAL); } if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { error = SCTP_SORESERVE(so, SCTP_BASE_SYSCTL(sctp_sendspace), SCTP_BASE_SYSCTL(sctp_recvspace)); if (error) { return (error); } } error = sctp_inpcb_alloc(so, vrf_id); if (error) { return (error); } inp = (struct sctp_inpcb *)so->so_pcb; SCTP_INP_WLOCK(inp); inp->sctp_flags &= ~SCTP_PCB_FLAGS_BOUND_V6; /* I'm not v6! */ ip_inp = &inp->ip_inp.inp; ip_inp->inp_vflag |= INP_IPV4; ip_inp->inp_ip_ttl = MODULE_GLOBAL(ip_defttl); SCTP_INP_WUNLOCK(inp); return (0); } static int sctp_bind(struct socket *so, struct sockaddr *addr, struct thread *p) { struct sctp_inpcb *inp; inp = (struct sctp_inpcb *)so->so_pcb; if (inp == NULL) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); return (EINVAL); } if (addr != NULL) { if ((addr->sa_family != AF_INET) || (addr->sa_len != sizeof(struct sockaddr_in))) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); return (EINVAL); } } return (sctp_inpcb_bind(so, addr, NULL, p)); } #endif void sctp_close(struct socket *so) { struct epoch_tracker et; struct sctp_inpcb *inp; inp = (struct sctp_inpcb *)so->so_pcb; if (inp == NULL) return; /* * Inform all the lower layer assoc that we are done. */ SCTP_INP_WLOCK(inp); NET_EPOCH_ENTER(et); #ifdef SCTP_LOG_CLOSING sctp_log_closing(inp, NULL, 17); #endif if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) { inp->sctp_flags |= SCTP_PCB_FLAGS_SOCKET_GONE | SCTP_PCB_FLAGS_CLOSE_IP; if (((so->so_options & SO_LINGER) && (so->so_linger == 0)) || (SCTP_SBAVAIL(&so->so_rcv) > 0)) { #ifdef SCTP_LOG_CLOSING sctp_log_closing(inp, NULL, 13); #endif SCTP_INP_WUNLOCK(inp); sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_ABORT, SCTP_CALLED_AFTER_CMPSET_OFCLOSE); } else { #ifdef SCTP_LOG_CLOSING sctp_log_closing(inp, NULL, 14); #endif SCTP_INP_WUNLOCK(inp); sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_GRACEFUL_CLOSE, SCTP_CALLED_AFTER_CMPSET_OFCLOSE); } /* * The socket is now detached, no matter what the state of * the SCTP association. */ SOCK_LOCK(so); if (!SOLISTENING(so)) { SCTP_SB_CLEAR(so->so_snd); SCTP_SB_CLEAR(so->so_rcv); } /* Now null out the reference, we are completely detached. */ so->so_pcb = NULL; SOCK_UNLOCK(so); } else { SCTP_INP_WUNLOCK(inp); } NET_EPOCH_EXIT(et); } int sctp_sendm(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, struct mbuf *control, struct thread *p); int sctp_sendm(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, struct mbuf *control, struct thread *p) { struct sctp_inpcb *inp; int error; inp = (struct sctp_inpcb *)so->so_pcb; if (inp == NULL) { if (control) { sctp_m_freem(control); control = NULL; } SCTP_LTRACE_ERR_RET_PKT(m, inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); sctp_m_freem(m); return (EINVAL); } /* Got to have an to address if we are NOT a connected socket */ if ((addr == NULL) && ((inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) || (inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE))) { goto connected_type; } error = 0; if (addr == NULL) { SCTP_LTRACE_ERR_RET_PKT(m, inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EDESTADDRREQ); error = EDESTADDRREQ; } else if (addr->sa_family != AF_INET) { SCTP_LTRACE_ERR_RET_PKT(m, inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EAFNOSUPPORT); error = EAFNOSUPPORT; } else if (addr->sa_len != sizeof(struct sockaddr_in)) { SCTP_LTRACE_ERR_RET_PKT(m, inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } if (error != 0) { sctp_m_freem(m); if (control) { sctp_m_freem(control); control = NULL; } return (error); } connected_type: /* now what about control */ if (control) { if (inp->control) { sctp_m_freem(inp->control); inp->control = NULL; } inp->control = control; } /* Place the data */ if (inp->pkt) { SCTP_BUF_NEXT(inp->pkt_last) = m; inp->pkt_last = m; } else { inp->pkt_last = inp->pkt = m; } if ( /* FreeBSD uses a flag passed */ ((flags & PRUS_MORETOCOME) == 0) ) { /* * note with the current version this code will only be used * by OpenBSD-- NetBSD, FreeBSD, and MacOS have methods for * re-defining sosend to use the sctp_sosend. One can * optionally switch back to this code (by changing back the * definitions) but this is not advisable. This code is used * by FreeBSD when sending a file with sendfile() though. */ struct epoch_tracker et; int ret; NET_EPOCH_ENTER(et); ret = sctp_output(inp, inp->pkt, addr, inp->control, p, flags); NET_EPOCH_EXIT(et); inp->pkt = NULL; inp->control = NULL; return (ret); } else { return (0); } } int sctp_disconnect(struct socket *so) { struct epoch_tracker et; struct sctp_inpcb *inp; struct sctp_association *asoc; struct sctp_tcb *stcb; inp = (struct sctp_inpcb *)so->so_pcb; if (inp == NULL) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTCONN); return (ENOTCONN); } SCTP_INP_RLOCK(inp); KASSERT(inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE || inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL, ("Not a one-to-one style socket")); stcb = LIST_FIRST(&inp->sctp_asoc_list); if (stcb == NULL) { SCTP_INP_RUNLOCK(inp); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTCONN); return (ENOTCONN); } SCTP_TCB_LOCK(stcb); asoc = &stcb->asoc; if (asoc->state & SCTP_STATE_ABOUT_TO_BE_FREED) { /* We are about to be freed, out of here */ SCTP_TCB_UNLOCK(stcb); SCTP_INP_RUNLOCK(inp); return (0); } NET_EPOCH_ENTER(et); if (((so->so_options & SO_LINGER) && (so->so_linger == 0)) || (SCTP_SBAVAIL(&so->so_rcv) > 0)) { if (SCTP_GET_STATE(stcb) != SCTP_STATE_COOKIE_WAIT) { /* Left with Data unread */ struct mbuf *op_err; op_err = sctp_generate_cause(SCTP_CAUSE_USER_INITIATED_ABT, ""); sctp_send_abort_tcb(stcb, op_err, SCTP_SO_LOCKED); SCTP_STAT_INCR_COUNTER32(sctps_aborted); } SCTP_INP_RUNLOCK(inp); if ((SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) || (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED)) { SCTP_STAT_DECR_GAUGE32(sctps_currestab); } (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_3); /* No unlock tcb assoc is gone */ NET_EPOCH_EXIT(et); return (0); } if (TAILQ_EMPTY(&asoc->send_queue) && TAILQ_EMPTY(&asoc->sent_queue) && (asoc->stream_queue_cnt == 0)) { /* there is nothing queued to send, so done */ if ((*asoc->ss_functions.sctp_ss_is_user_msgs_incomplete) (stcb, asoc)) { goto abort_anyway; } if ((SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_SENT) && (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_ACK_SENT)) { /* only send SHUTDOWN 1st time thru */ struct sctp_nets *netp; if ((SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) || (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED)) { SCTP_STAT_DECR_GAUGE32(sctps_currestab); } SCTP_SET_STATE(stcb, SCTP_STATE_SHUTDOWN_SENT); sctp_stop_timers_for_shutdown(stcb); if (stcb->asoc.alternate) { netp = stcb->asoc.alternate; } else { netp = stcb->asoc.primary_destination; } sctp_send_shutdown(stcb, netp); sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb, netp); sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb, NULL); sctp_chunk_output(stcb->sctp_ep, stcb, SCTP_OUTPUT_FROM_CLOSING, SCTP_SO_LOCKED); } } else { /* * we still got (or just got) data to send, so set * SHUTDOWN_PENDING */ /* * XXX sockets draft says that SCTP_EOF should be sent with * no data. currently, we will allow user data to be sent * first and move to SHUTDOWN-PENDING */ SCTP_ADD_SUBSTATE(stcb, SCTP_STATE_SHUTDOWN_PENDING); if ((*asoc->ss_functions.sctp_ss_is_user_msgs_incomplete) (stcb, asoc)) { SCTP_ADD_SUBSTATE(stcb, SCTP_STATE_PARTIAL_MSG_LEFT); } if (TAILQ_EMPTY(&asoc->send_queue) && TAILQ_EMPTY(&asoc->sent_queue) && (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT)) { struct mbuf *op_err; abort_anyway: op_err = sctp_generate_cause(SCTP_CAUSE_USER_INITIATED_ABT, ""); stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_USRREQ + SCTP_LOC_4; sctp_send_abort_tcb(stcb, op_err, SCTP_SO_LOCKED); SCTP_STAT_INCR_COUNTER32(sctps_aborted); if ((SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) || (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED)) { SCTP_STAT_DECR_GAUGE32(sctps_currestab); } SCTP_INP_RUNLOCK(inp); (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_5); NET_EPOCH_EXIT(et); return (0); } else { sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_CLOSING, SCTP_SO_LOCKED); } } soisdisconnecting(so); NET_EPOCH_EXIT(et); SCTP_TCB_UNLOCK(stcb); SCTP_INP_RUNLOCK(inp); return (0); } int sctp_flush(struct socket *so, int how) { struct epoch_tracker et; struct sctp_tcb *stcb; struct sctp_queued_to_read *control, *ncontrol; struct sctp_inpcb *inp; struct mbuf *m, *op_err; bool need_to_abort = false; /* * For 1-to-1 style sockets, flush the read queue and trigger an * ungraceful shutdown of the association, if and only if user * messages are lost. Loosing notifications does not need to be * signalled to the peer. */ if (how == PRU_FLUSH_WR) { /* This function is only relevant for the read directions. */ return (0); } inp = (struct sctp_inpcb *)so->so_pcb; if (inp == NULL) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); return (EINVAL); } SCTP_INP_WLOCK(inp); if (inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) { /* For 1-to-many style sockets this function does nothing. */ SCTP_INP_WUNLOCK(inp); return (0); } stcb = LIST_FIRST(&inp->sctp_asoc_list); if (stcb != NULL) { SCTP_TCB_LOCK(stcb); } SCTP_INP_READ_LOCK(inp); inp->sctp_flags |= SCTP_PCB_FLAGS_SOCKET_CANT_READ; SOCK_LOCK(so); TAILQ_FOREACH_SAFE(control, &inp->read_queue, next, ncontrol) { if ((control->spec_flags & M_NOTIFICATION) == 0) { need_to_abort = true; } TAILQ_REMOVE(&inp->read_queue, control, next); control->on_read_q = 0; for (m = control->data; m; m = SCTP_BUF_NEXT(m)) { sctp_sbfree(control, control->stcb, &so->so_rcv, m); } if (control->on_strm_q == 0) { sctp_free_remote_addr(control->whoFrom); if (control->data) { sctp_m_freem(control->data); control->data = NULL; } sctp_free_a_readq(stcb, control); } else { stcb->asoc.size_on_all_streams += control->length; } } SOCK_UNLOCK(so); SCTP_INP_READ_UNLOCK(inp); if (need_to_abort && (stcb != NULL)) { inp->last_abort_code = SCTP_FROM_SCTP_USRREQ + SCTP_LOC_6; SCTP_INP_WUNLOCK(inp); op_err = sctp_generate_cause(SCTP_CAUSE_OUT_OF_RESC, ""); NET_EPOCH_ENTER(et); sctp_abort_an_association(inp, stcb, op_err, false, SCTP_SO_LOCKED); NET_EPOCH_EXIT(et); return (ECONNABORTED); } if (stcb != NULL) { SCTP_TCB_UNLOCK(stcb); } SCTP_INP_WUNLOCK(inp); return (0); } int sctp_shutdown(struct socket *so) { struct sctp_inpcb *inp; inp = (struct sctp_inpcb *)so->so_pcb; if (inp == NULL) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); return (EINVAL); } SCTP_INP_RLOCK(inp); /* For UDP model this is a invalid call */ if (!((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL))) { /* Restore the flags that the soshutdown took away. */ SOCKBUF_LOCK(&so->so_rcv); so->so_rcv.sb_state &= ~SBS_CANTRCVMORE; SOCKBUF_UNLOCK(&so->so_rcv); /* This proc will wakeup for read and do nothing (I hope) */ SCTP_INP_RUNLOCK(inp); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP); return (EOPNOTSUPP); } else { /* * Ok, if we reach here its the TCP model and it is either a * SHUT_WR or SHUT_RDWR. This means we put the shutdown flag * against it. */ struct epoch_tracker et; struct sctp_tcb *stcb; struct sctp_association *asoc; struct sctp_nets *netp; if ((so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) == 0) { SCTP_INP_RUNLOCK(inp); return (ENOTCONN); } socantsendmore(so); stcb = LIST_FIRST(&inp->sctp_asoc_list); if (stcb == NULL) { /* * Ok, we hit the case that the shutdown call was * made after an abort or something. Nothing to do * now. */ SCTP_INP_RUNLOCK(inp); return (0); } SCTP_TCB_LOCK(stcb); asoc = &stcb->asoc; if (asoc->state & SCTP_STATE_ABOUT_TO_BE_FREED) { SCTP_TCB_UNLOCK(stcb); SCTP_INP_RUNLOCK(inp); return (0); } if ((SCTP_GET_STATE(stcb) != SCTP_STATE_COOKIE_WAIT) && (SCTP_GET_STATE(stcb) != SCTP_STATE_COOKIE_ECHOED) && (SCTP_GET_STATE(stcb) != SCTP_STATE_OPEN)) { /* * If we are not in or before ESTABLISHED, there is * no protocol action required. */ SCTP_TCB_UNLOCK(stcb); SCTP_INP_RUNLOCK(inp); return (0); } NET_EPOCH_ENTER(et); if (stcb->asoc.alternate) { netp = stcb->asoc.alternate; } else { netp = stcb->asoc.primary_destination; } if ((SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) && TAILQ_EMPTY(&asoc->send_queue) && TAILQ_EMPTY(&asoc->sent_queue) && (asoc->stream_queue_cnt == 0)) { if ((*asoc->ss_functions.sctp_ss_is_user_msgs_incomplete) (stcb, asoc)) { goto abort_anyway; } /* there is nothing queued to send, so I'm done... */ SCTP_STAT_DECR_GAUGE32(sctps_currestab); SCTP_SET_STATE(stcb, SCTP_STATE_SHUTDOWN_SENT); sctp_stop_timers_for_shutdown(stcb); sctp_send_shutdown(stcb, netp); sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb, netp); sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb, NULL); } else { /* * We still got (or just got) data to send, so set * SHUTDOWN_PENDING. */ SCTP_ADD_SUBSTATE(stcb, SCTP_STATE_SHUTDOWN_PENDING); if ((*asoc->ss_functions.sctp_ss_is_user_msgs_incomplete) (stcb, asoc)) { SCTP_ADD_SUBSTATE(stcb, SCTP_STATE_PARTIAL_MSG_LEFT); } if (TAILQ_EMPTY(&asoc->send_queue) && TAILQ_EMPTY(&asoc->sent_queue) && (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT)) { struct mbuf *op_err; abort_anyway: op_err = sctp_generate_cause(SCTP_CAUSE_USER_INITIATED_ABT, ""); stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_USRREQ + SCTP_LOC_6; SCTP_INP_RUNLOCK(inp); sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, false, SCTP_SO_LOCKED); NET_EPOCH_EXIT(et); return (0); } } /* * XXX: Why do this in the case where we have still data * queued? */ sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_CLOSING, SCTP_SO_LOCKED); SCTP_TCB_UNLOCK(stcb); SCTP_INP_RUNLOCK(inp); NET_EPOCH_EXIT(et); return (0); } } /* * copies a "user" presentable address and removes embedded scope, etc. * returns 0 on success, 1 on error */ static uint32_t sctp_fill_user_address(struct sockaddr *dst, struct sockaddr *src) { #ifdef INET6 struct sockaddr_in6 lsa6; src = (struct sockaddr *)sctp_recover_scope((struct sockaddr_in6 *)src, &lsa6); #endif memcpy(dst, src, src->sa_len); return (0); } static size_t sctp_fill_up_addresses_vrf(struct sctp_inpcb *inp, struct sctp_tcb *stcb, size_t limit, struct sockaddr *addr, uint32_t vrf_id) { struct sctp_ifn *sctp_ifn; struct sctp_ifa *sctp_ifa; size_t actual; int loopback_scope; #if defined(INET) int ipv4_local_scope, ipv4_addr_legal; #endif #if defined(INET6) int local_scope, site_scope, ipv6_addr_legal; #endif struct sctp_vrf *vrf; SCTP_IPI_ADDR_LOCK_ASSERT(); actual = 0; if (limit == 0) return (actual); if (stcb) { /* Turn on all the appropriate scope */ loopback_scope = stcb->asoc.scope.loopback_scope; #if defined(INET) ipv4_local_scope = stcb->asoc.scope.ipv4_local_scope; ipv4_addr_legal = stcb->asoc.scope.ipv4_addr_legal; #endif #if defined(INET6) local_scope = stcb->asoc.scope.local_scope; site_scope = stcb->asoc.scope.site_scope; ipv6_addr_legal = stcb->asoc.scope.ipv6_addr_legal; #endif } else { /* Use generic values for endpoints. */ loopback_scope = 1; #if defined(INET) ipv4_local_scope = 1; #endif #if defined(INET6) local_scope = 1; site_scope = 1; #endif if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) { #if defined(INET6) ipv6_addr_legal = 1; #endif #if defined(INET) if (SCTP_IPV6_V6ONLY(inp)) { ipv4_addr_legal = 0; } else { ipv4_addr_legal = 1; } #endif } else { #if defined(INET6) ipv6_addr_legal = 0; #endif #if defined(INET) ipv4_addr_legal = 1; #endif } } vrf = sctp_find_vrf(vrf_id); if (vrf == NULL) { return (0); } if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) { LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) { if ((loopback_scope == 0) && SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) { /* Skip loopback if loopback_scope not set */ continue; } LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) { if (stcb) { /* * For the BOUND-ALL case, the list * associated with a TCB is Always * considered a reverse list.. i.e. * it lists addresses that are NOT * part of the association. If this * is one of those we must skip it. */ if (sctp_is_addr_restricted(stcb, sctp_ifa)) { continue; } } switch (sctp_ifa->address.sa.sa_family) { #ifdef INET case AF_INET: if (ipv4_addr_legal) { struct sockaddr_in *sin; sin = &sctp_ifa->address.sin; if (sin->sin_addr.s_addr == 0) { /* * we skip * unspecified * addresses */ continue; } if (prison_check_ip4(inp->ip_inp.inp.inp_cred, &sin->sin_addr) != 0) { continue; } if ((ipv4_local_scope == 0) && (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr))) { continue; } #ifdef INET6 if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) { if (actual + sizeof(struct sockaddr_in6) > limit) { return (actual); } in6_sin_2_v4mapsin6(sin, (struct sockaddr_in6 *)addr); ((struct sockaddr_in6 *)addr)->sin6_port = inp->sctp_lport; addr = (struct sockaddr *)((caddr_t)addr + sizeof(struct sockaddr_in6)); actual += sizeof(struct sockaddr_in6); } else { #endif if (actual + sizeof(struct sockaddr_in) > limit) { return (actual); } memcpy(addr, sin, sizeof(struct sockaddr_in)); ((struct sockaddr_in *)addr)->sin_port = inp->sctp_lport; addr = (struct sockaddr *)((caddr_t)addr + sizeof(struct sockaddr_in)); actual += sizeof(struct sockaddr_in); #ifdef INET6 } #endif } else { continue; } break; #endif #ifdef INET6 case AF_INET6: if (ipv6_addr_legal) { struct sockaddr_in6 *sin6; sin6 = &sctp_ifa->address.sin6; if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { /* * we skip * unspecified * addresses */ continue; } if (prison_check_ip6(inp->ip_inp.inp.inp_cred, &sin6->sin6_addr) != 0) { continue; } if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { if (local_scope == 0) continue; if (sin6->sin6_scope_id == 0) { if (sa6_recoverscope(sin6) != 0) /* * * bad * link * * local * * address */ continue; } } if ((site_scope == 0) && (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr))) { continue; } if (actual + sizeof(struct sockaddr_in6) > limit) { return (actual); } memcpy(addr, sin6, sizeof(struct sockaddr_in6)); ((struct sockaddr_in6 *)addr)->sin6_port = inp->sctp_lport; addr = (struct sockaddr *)((caddr_t)addr + sizeof(struct sockaddr_in6)); actual += sizeof(struct sockaddr_in6); } else { continue; } break; #endif default: /* TSNH */ break; } } } } else { struct sctp_laddr *laddr; size_t sa_len; LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { if (stcb) { if (sctp_is_addr_restricted(stcb, laddr->ifa)) { continue; } } sa_len = laddr->ifa->address.sa.sa_len; if (actual + sa_len > limit) { return (actual); } if (sctp_fill_user_address(addr, &laddr->ifa->address.sa)) continue; switch (laddr->ifa->address.sa.sa_family) { #ifdef INET case AF_INET: ((struct sockaddr_in *)addr)->sin_port = inp->sctp_lport; break; #endif #ifdef INET6 case AF_INET6: ((struct sockaddr_in6 *)addr)->sin6_port = inp->sctp_lport; break; #endif default: /* TSNH */ break; } addr = (struct sockaddr *)((caddr_t)addr + sa_len); actual += sa_len; } } return (actual); } static size_t sctp_fill_up_addresses(struct sctp_inpcb *inp, struct sctp_tcb *stcb, size_t limit, struct sockaddr *addr) { size_t size; SCTP_IPI_ADDR_RLOCK(); /* fill up addresses for the endpoint's default vrf */ size = sctp_fill_up_addresses_vrf(inp, stcb, limit, addr, inp->def_vrf_id); SCTP_IPI_ADDR_RUNLOCK(); return (size); } static size_t sctp_max_size_addresses_vrf(struct sctp_inpcb *inp, uint32_t vrf_id) { struct sctp_vrf *vrf; size_t size; /* * In both sub-set bound an bound_all cases we return the size of * the maximum number of addresses that you could get. In reality * the sub-set bound may have an exclusion list for a given TCB or * in the bound-all case a TCB may NOT include the loopback or other * addresses as well. */ SCTP_IPI_ADDR_LOCK_ASSERT(); vrf = sctp_find_vrf(vrf_id); if (vrf == NULL) { return (0); } size = 0; if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) { struct sctp_ifn *sctp_ifn; struct sctp_ifa *sctp_ifa; LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) { LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) { /* Count them if they are the right type */ switch (sctp_ifa->address.sa.sa_family) { #ifdef INET case AF_INET: #ifdef INET6 if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) size += sizeof(struct sockaddr_in6); else size += sizeof(struct sockaddr_in); #else size += sizeof(struct sockaddr_in); #endif break; #endif #ifdef INET6 case AF_INET6: size += sizeof(struct sockaddr_in6); break; #endif default: break; } } } } else { struct sctp_laddr *laddr; LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { switch (laddr->ifa->address.sa.sa_family) { #ifdef INET case AF_INET: #ifdef INET6 if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) size += sizeof(struct sockaddr_in6); else size += sizeof(struct sockaddr_in); #else size += sizeof(struct sockaddr_in); #endif break; #endif #ifdef INET6 case AF_INET6: size += sizeof(struct sockaddr_in6); break; #endif default: break; } } } return (size); } static size_t sctp_max_size_addresses(struct sctp_inpcb *inp) { size_t size; SCTP_IPI_ADDR_RLOCK(); /* Maximum size of all addresses for the endpoint's default VRF */ size = sctp_max_size_addresses_vrf(inp, inp->def_vrf_id); SCTP_IPI_ADDR_RUNLOCK(); return (size); } static int sctp_do_connect_x(struct socket *so, struct sctp_inpcb *inp, void *optval, size_t optsize, void *p, int delay) { int error; int creat_lock_on = 0; struct sctp_tcb *stcb = NULL; struct sockaddr *sa; unsigned int num_v6 = 0, num_v4 = 0, *totaddrp, totaddr; uint32_t vrf_id; sctp_assoc_t *a_id; SCTPDBG(SCTP_DEBUG_PCB1, "Connectx called\n"); if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) && (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED)) { /* We are already connected AND the TCP model */ SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, EADDRINUSE); return (EADDRINUSE); } if ((inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) && (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_PORTREUSE))) { SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); return (EINVAL); } if (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) { SCTP_INP_RLOCK(inp); stcb = LIST_FIRST(&inp->sctp_asoc_list); SCTP_INP_RUNLOCK(inp); } if (stcb) { SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, EALREADY); return (EALREADY); } SCTP_INP_INCR_REF(inp); SCTP_ASOC_CREATE_LOCK(inp); creat_lock_on = 1; if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) || (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)) { SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, EFAULT); error = EFAULT; goto out_now; } totaddrp = (unsigned int *)optval; totaddr = *totaddrp; sa = (struct sockaddr *)(totaddrp + 1); error = sctp_connectx_helper_find(inp, sa, totaddr, &num_v4, &num_v6, (unsigned int)(optsize - sizeof(int))); if (error != 0) { /* Already have or am bring up an association */ SCTP_ASOC_CREATE_UNLOCK(inp); creat_lock_on = 0; SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); goto out_now; } #ifdef INET6 if (((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) && (num_v6 > 0)) { error = EINVAL; goto out_now; } if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) && (num_v4 > 0)) { if (SCTP_IPV6_V6ONLY(inp)) { /* * if IPV6_V6ONLY flag, ignore connections destined * to a v4 addr or v4-mapped addr */ SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; goto out_now; } } #endif /* INET6 */ if (inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) { /* Bind a ephemeral port */ error = sctp_inpcb_bind(so, NULL, NULL, p); if (error) { goto out_now; } } /* FIX ME: do we want to pass in a vrf on the connect call? */ vrf_id = inp->def_vrf_id; /* We are GOOD to go */ stcb = sctp_aloc_assoc_connected(inp, sa, &error, 0, 0, vrf_id, inp->sctp_ep.pre_open_stream_count, inp->sctp_ep.port, (struct thread *)p, SCTP_INITIALIZE_AUTH_PARAMS); if (stcb == NULL) { /* Gak! no memory */ goto out_now; } SCTP_SET_STATE(stcb, SCTP_STATE_COOKIE_WAIT); /* move to second address */ switch (sa->sa_family) { #ifdef INET case AF_INET: sa = (struct sockaddr *)((caddr_t)sa + sizeof(struct sockaddr_in)); break; #endif #ifdef INET6 case AF_INET6: sa = (struct sockaddr *)((caddr_t)sa + sizeof(struct sockaddr_in6)); break; #endif default: break; } error = 0; sctp_connectx_helper_add(stcb, sa, (totaddr - 1), &error); /* Fill in the return id */ if (error) { goto out_now; } a_id = (sctp_assoc_t *)optval; *a_id = sctp_get_associd(stcb); if (delay) { /* doing delayed connection */ stcb->asoc.delayed_connection = 1; sctp_timer_start(SCTP_TIMER_TYPE_INIT, inp, stcb, stcb->asoc.primary_destination); } else { (void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered); sctp_send_initiate(inp, stcb, SCTP_SO_LOCKED); } SCTP_TCB_UNLOCK(stcb); out_now: if (creat_lock_on) { SCTP_ASOC_CREATE_UNLOCK(inp); } SCTP_INP_DECR_REF(inp); return (error); } #define SCTP_FIND_STCB(inp, stcb, assoc_id) { \ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||\ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) { \ SCTP_INP_RLOCK(inp); \ stcb = LIST_FIRST(&inp->sctp_asoc_list); \ if (stcb) { \ SCTP_TCB_LOCK(stcb); \ } \ SCTP_INP_RUNLOCK(inp); \ } else if (assoc_id > SCTP_ALL_ASSOC) { \ stcb = sctp_findassociation_ep_asocid(inp, assoc_id, 1); \ if (stcb == NULL) { \ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT); \ error = ENOENT; \ break; \ } \ } else { \ stcb = NULL; \ } \ } #define SCTP_CHECK_AND_CAST(destp, srcp, type, size) {\ if (size < sizeof(type)) { \ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); \ error = EINVAL; \ break; \ } else { \ destp = (type *)srcp; \ } \ } static int sctp_getopt(struct socket *so, int optname, void *optval, size_t *optsize, void *p) { struct sctp_inpcb *inp = NULL; int error, val = 0; struct sctp_tcb *stcb = NULL; if (optval == NULL) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); return (EINVAL); } inp = (struct sctp_inpcb *)so->so_pcb; if (inp == NULL) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); return EINVAL; } error = 0; switch (optname) { case SCTP_NODELAY: case SCTP_AUTOCLOSE: case SCTP_EXPLICIT_EOR: case SCTP_AUTO_ASCONF: case SCTP_DISABLE_FRAGMENTS: case SCTP_I_WANT_MAPPED_V4_ADDR: case SCTP_USE_EXT_RCVINFO: SCTP_INP_RLOCK(inp); switch (optname) { case SCTP_DISABLE_FRAGMENTS: val = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NO_FRAGMENT); break; case SCTP_I_WANT_MAPPED_V4_ADDR: val = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4); break; case SCTP_AUTO_ASCONF: if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) { /* only valid for bound all sockets */ val = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTO_ASCONF); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; goto flags_out; } break; case SCTP_EXPLICIT_EOR: val = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXPLICIT_EOR); break; case SCTP_NODELAY: val = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NODELAY); break; case SCTP_USE_EXT_RCVINFO: val = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXT_RCVINFO); break; case SCTP_AUTOCLOSE: if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTOCLOSE)) val = sctp_ticks_to_secs(inp->sctp_ep.auto_close_time); else val = 0; break; default: SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT); error = ENOPROTOOPT; } /* end switch (sopt->sopt_name) */ if (*optsize < sizeof(val)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } flags_out: SCTP_INP_RUNLOCK(inp); if (error == 0) { /* return the option value */ *(int *)optval = val; *optsize = sizeof(val); } break; case SCTP_GET_PACKET_LOG: { #ifdef SCTP_PACKET_LOGGING uint8_t *target; int ret; SCTP_CHECK_AND_CAST(target, optval, uint8_t, *optsize); ret = sctp_copy_out_packet_log(target, (int)*optsize); *optsize = ret; #else SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP); error = EOPNOTSUPP; #endif break; } case SCTP_REUSE_PORT: { uint32_t *value; if ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE)) { /* Can't do this for a 1-m socket */ error = EINVAL; break; } SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize); *value = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PORTREUSE); *optsize = sizeof(uint32_t); break; } case SCTP_PARTIAL_DELIVERY_POINT: { uint32_t *value; SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize); *value = inp->partial_delivery_point; *optsize = sizeof(uint32_t); break; } case SCTP_FRAGMENT_INTERLEAVE: { uint32_t *value; SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize); if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE)) { if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS)) { *value = SCTP_FRAG_LEVEL_2; } else { *value = SCTP_FRAG_LEVEL_1; } } else { *value = SCTP_FRAG_LEVEL_0; } *optsize = sizeof(uint32_t); break; } case SCTP_INTERLEAVING_SUPPORTED: { struct sctp_assoc_value *av; SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize); SCTP_FIND_STCB(inp, stcb, av->assoc_id); if (stcb) { av->assoc_value = stcb->asoc.idata_supported; SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (av->assoc_id == SCTP_FUTURE_ASSOC))) { SCTP_INP_RLOCK(inp); if (inp->idata_supported) { av->assoc_value = 1; } else { av->assoc_value = 0; } SCTP_INP_RUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } if (error == 0) { *optsize = sizeof(struct sctp_assoc_value); } break; } case SCTP_CMT_ON_OFF: { struct sctp_assoc_value *av; SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize); SCTP_FIND_STCB(inp, stcb, av->assoc_id); if (stcb) { av->assoc_value = stcb->asoc.sctp_cmt_on_off; SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (av->assoc_id == SCTP_FUTURE_ASSOC))) { SCTP_INP_RLOCK(inp); av->assoc_value = inp->sctp_cmt_on_off; SCTP_INP_RUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } if (error == 0) { *optsize = sizeof(struct sctp_assoc_value); } break; } case SCTP_PLUGGABLE_CC: { struct sctp_assoc_value *av; SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize); SCTP_FIND_STCB(inp, stcb, av->assoc_id); if (stcb) { av->assoc_value = stcb->asoc.congestion_control_module; SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (av->assoc_id == SCTP_FUTURE_ASSOC))) { SCTP_INP_RLOCK(inp); av->assoc_value = inp->sctp_ep.sctp_default_cc_module; SCTP_INP_RUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } if (error == 0) { *optsize = sizeof(struct sctp_assoc_value); } break; } case SCTP_CC_OPTION: { struct sctp_cc_option *cc_opt; SCTP_CHECK_AND_CAST(cc_opt, optval, struct sctp_cc_option, *optsize); SCTP_FIND_STCB(inp, stcb, cc_opt->aid_value.assoc_id); if (stcb == NULL) { error = EINVAL; } else { if (stcb->asoc.cc_functions.sctp_cwnd_socket_option == NULL) { error = ENOTSUP; } else { error = (*stcb->asoc.cc_functions.sctp_cwnd_socket_option) (stcb, 0, cc_opt); *optsize = sizeof(struct sctp_cc_option); } SCTP_TCB_UNLOCK(stcb); } break; } case SCTP_STREAM_SCHEDULER: { struct sctp_assoc_value *av; SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize); SCTP_FIND_STCB(inp, stcb, av->assoc_id); if (stcb) { av->assoc_value = stcb->asoc.stream_scheduling_module; SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (av->assoc_id == SCTP_FUTURE_ASSOC))) { SCTP_INP_RLOCK(inp); av->assoc_value = inp->sctp_ep.sctp_default_ss_module; SCTP_INP_RUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } if (error == 0) { *optsize = sizeof(struct sctp_assoc_value); } break; } case SCTP_STREAM_SCHEDULER_VALUE: { struct sctp_stream_value *av; SCTP_CHECK_AND_CAST(av, optval, struct sctp_stream_value, *optsize); SCTP_FIND_STCB(inp, stcb, av->assoc_id); if (stcb) { if ((av->stream_id >= stcb->asoc.streamoutcnt) || (stcb->asoc.ss_functions.sctp_ss_get_value(stcb, &stcb->asoc, &stcb->asoc.strmout[av->stream_id], &av->stream_value) < 0)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } else { *optsize = sizeof(struct sctp_stream_value); } SCTP_TCB_UNLOCK(stcb); } else { /* * Can't get stream value without * association */ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } break; } case SCTP_GET_ADDR_LEN: { struct sctp_assoc_value *av; SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize); error = EINVAL; #ifdef INET if (av->assoc_value == AF_INET) { av->assoc_value = sizeof(struct sockaddr_in); error = 0; } #endif #ifdef INET6 if (av->assoc_value == AF_INET6) { av->assoc_value = sizeof(struct sockaddr_in6); error = 0; } #endif if (error) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); } else { *optsize = sizeof(struct sctp_assoc_value); } break; } case SCTP_GET_ASSOC_NUMBER: { uint32_t *value, cnt; SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize); SCTP_INP_RLOCK(inp); if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) { /* Can't do this for a 1-1 socket */ error = EINVAL; SCTP_INP_RUNLOCK(inp); break; } cnt = 0; LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { cnt++; } SCTP_INP_RUNLOCK(inp); *value = cnt; *optsize = sizeof(uint32_t); break; } case SCTP_GET_ASSOC_ID_LIST: { struct sctp_assoc_ids *ids; uint32_t at; size_t limit; SCTP_CHECK_AND_CAST(ids, optval, struct sctp_assoc_ids, *optsize); SCTP_INP_RLOCK(inp); if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) { /* Can't do this for a 1-1 socket */ error = EINVAL; SCTP_INP_RUNLOCK(inp); break; } at = 0; limit = (*optsize - sizeof(uint32_t)) / sizeof(sctp_assoc_t); LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { if (at < limit) { ids->gaids_assoc_id[at++] = sctp_get_associd(stcb); if (at == 0) { error = EINVAL; SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); break; } } else { error = EINVAL; SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); break; } } SCTP_INP_RUNLOCK(inp); if (error == 0) { ids->gaids_number_of_ids = at; *optsize = ((at * sizeof(sctp_assoc_t)) + sizeof(uint32_t)); } break; } case SCTP_CONTEXT: { struct sctp_assoc_value *av; SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize); SCTP_FIND_STCB(inp, stcb, av->assoc_id); if (stcb) { av->assoc_value = stcb->asoc.context; SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (av->assoc_id == SCTP_FUTURE_ASSOC))) { SCTP_INP_RLOCK(inp); av->assoc_value = inp->sctp_context; SCTP_INP_RUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } if (error == 0) { *optsize = sizeof(struct sctp_assoc_value); } break; } case SCTP_VRF_ID: { uint32_t *default_vrfid; SCTP_CHECK_AND_CAST(default_vrfid, optval, uint32_t, *optsize); *default_vrfid = inp->def_vrf_id; *optsize = sizeof(uint32_t); break; } case SCTP_GET_ASOC_VRF: { struct sctp_assoc_value *id; SCTP_CHECK_AND_CAST(id, optval, struct sctp_assoc_value, *optsize); SCTP_FIND_STCB(inp, stcb, id->assoc_id); if (stcb == NULL) { error = EINVAL; SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); } else { id->assoc_value = stcb->asoc.vrf_id; SCTP_TCB_UNLOCK(stcb); *optsize = sizeof(struct sctp_assoc_value); } break; } case SCTP_GET_VRF_IDS: { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP); error = EOPNOTSUPP; break; } case SCTP_GET_NONCE_VALUES: { struct sctp_get_nonce_values *gnv; SCTP_CHECK_AND_CAST(gnv, optval, struct sctp_get_nonce_values, *optsize); SCTP_FIND_STCB(inp, stcb, gnv->gn_assoc_id); if (stcb) { gnv->gn_peers_tag = stcb->asoc.peer_vtag; gnv->gn_local_tag = stcb->asoc.my_vtag; SCTP_TCB_UNLOCK(stcb); *optsize = sizeof(struct sctp_get_nonce_values); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTCONN); error = ENOTCONN; } break; } case SCTP_DELAYED_SACK: { struct sctp_sack_info *sack; SCTP_CHECK_AND_CAST(sack, optval, struct sctp_sack_info, *optsize); SCTP_FIND_STCB(inp, stcb, sack->sack_assoc_id); if (stcb) { sack->sack_delay = stcb->asoc.delayed_ack; sack->sack_freq = stcb->asoc.sack_freq; SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (sack->sack_assoc_id == SCTP_FUTURE_ASSOC))) { SCTP_INP_RLOCK(inp); sack->sack_delay = sctp_ticks_to_msecs(inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_RECV]); sack->sack_freq = inp->sctp_ep.sctp_sack_freq; SCTP_INP_RUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } if (error == 0) { *optsize = sizeof(struct sctp_sack_info); } break; } case SCTP_GET_SNDBUF_USE: { struct sctp_sockstat *ss; SCTP_CHECK_AND_CAST(ss, optval, struct sctp_sockstat, *optsize); SCTP_FIND_STCB(inp, stcb, ss->ss_assoc_id); if (stcb) { ss->ss_total_sndbuf = stcb->asoc.total_output_queue_size; ss->ss_total_recv_buf = (stcb->asoc.size_on_reasm_queue + stcb->asoc.size_on_all_streams); SCTP_TCB_UNLOCK(stcb); *optsize = sizeof(struct sctp_sockstat); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTCONN); error = ENOTCONN; } break; } case SCTP_MAX_BURST: { struct sctp_assoc_value *av; SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize); SCTP_FIND_STCB(inp, stcb, av->assoc_id); if (stcb) { av->assoc_value = stcb->asoc.max_burst; SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (av->assoc_id == SCTP_FUTURE_ASSOC))) { SCTP_INP_RLOCK(inp); av->assoc_value = inp->sctp_ep.max_burst; SCTP_INP_RUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } if (error == 0) { *optsize = sizeof(struct sctp_assoc_value); } break; } case SCTP_MAXSEG: { struct sctp_assoc_value *av; SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize); SCTP_FIND_STCB(inp, stcb, av->assoc_id); if (stcb) { av->assoc_value = stcb->asoc.sctp_frag_point; SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (av->assoc_id == SCTP_FUTURE_ASSOC))) { SCTP_INP_RLOCK(inp); av->assoc_value = inp->sctp_frag_point; SCTP_INP_RUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } if (error == 0) { *optsize = sizeof(struct sctp_assoc_value); } break; } case SCTP_GET_STAT_LOG: error = sctp_fill_stat_log(optval, optsize); break; case SCTP_EVENTS: { struct sctp_event_subscribe *events; SCTP_CHECK_AND_CAST(events, optval, struct sctp_event_subscribe, *optsize); memset(events, 0, sizeof(struct sctp_event_subscribe)); SCTP_INP_RLOCK(inp); if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT)) events->sctp_data_io_event = 1; if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVASSOCEVNT)) events->sctp_association_event = 1; if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVPADDREVNT)) events->sctp_address_event = 1; if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVSENDFAILEVNT)) events->sctp_send_failure_event = 1; if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVPEERERR)) events->sctp_peer_error_event = 1; if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT)) events->sctp_shutdown_event = 1; if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PDAPIEVNT)) events->sctp_partial_delivery_event = 1; if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_ADAPTATIONEVNT)) events->sctp_adaptation_layer_event = 1; if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTHEVNT)) events->sctp_authentication_event = 1; if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_DRYEVNT)) events->sctp_sender_dry_event = 1; if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_STREAM_RESETEVNT)) events->sctp_stream_reset_event = 1; SCTP_INP_RUNLOCK(inp); *optsize = sizeof(struct sctp_event_subscribe); break; } case SCTP_ADAPTATION_LAYER: { uint32_t *value; SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize); SCTP_INP_RLOCK(inp); *value = inp->sctp_ep.adaptation_layer_indicator; SCTP_INP_RUNLOCK(inp); *optsize = sizeof(uint32_t); break; } case SCTP_SET_INITIAL_DBG_SEQ: { uint32_t *value; SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize); SCTP_INP_RLOCK(inp); *value = inp->sctp_ep.initial_sequence_debug; SCTP_INP_RUNLOCK(inp); *optsize = sizeof(uint32_t); break; } case SCTP_GET_LOCAL_ADDR_SIZE: { uint32_t *value; SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize); SCTP_INP_RLOCK(inp); *value = (uint32_t)sctp_max_size_addresses(inp); SCTP_INP_RUNLOCK(inp); *optsize = sizeof(uint32_t); break; } case SCTP_GET_REMOTE_ADDR_SIZE: { uint32_t *value; struct sctp_nets *net; size_t size; SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize); /* FIXME MT: change to sctp_assoc_value? */ SCTP_FIND_STCB(inp, stcb, (sctp_assoc_t)*value); if (stcb != NULL) { size = 0; /* Count the sizes */ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { switch (net->ro._l_addr.sa.sa_family) { #ifdef INET case AF_INET: #ifdef INET6 if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) { size += sizeof(struct sockaddr_in6); } else { size += sizeof(struct sockaddr_in); } #else size += sizeof(struct sockaddr_in); #endif break; #endif #ifdef INET6 case AF_INET6: size += sizeof(struct sockaddr_in6); break; #endif default: break; } } SCTP_TCB_UNLOCK(stcb); *value = (uint32_t)size; *optsize = sizeof(uint32_t); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && ((sctp_assoc_t)*value <= SCTP_ALL_ASSOC)) { error = EINVAL; } else { error = ENOENT; } SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); } break; } case SCTP_GET_PEER_ADDRESSES: /* * Get the address information, an array is passed in to * fill up we pack it. */ { size_t cpsz, left; struct sockaddr *addr; struct sctp_nets *net; struct sctp_getaddresses *saddr; SCTP_CHECK_AND_CAST(saddr, optval, struct sctp_getaddresses, *optsize); SCTP_FIND_STCB(inp, stcb, saddr->sget_assoc_id); if (stcb != NULL) { left = *optsize - offsetof(struct sctp_getaddresses, addr); *optsize = offsetof(struct sctp_getaddresses, addr); addr = &saddr->addr[0].sa; TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { switch (net->ro._l_addr.sa.sa_family) { #ifdef INET case AF_INET: #ifdef INET6 if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) { cpsz = sizeof(struct sockaddr_in6); } else { cpsz = sizeof(struct sockaddr_in); } #else cpsz = sizeof(struct sockaddr_in); #endif break; #endif #ifdef INET6 case AF_INET6: cpsz = sizeof(struct sockaddr_in6); break; #endif default: cpsz = 0; break; } if (cpsz == 0) { break; } if (left < cpsz) { /* not enough room. */ break; } #if defined(INET) && defined(INET6) if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) && (net->ro._l_addr.sa.sa_family == AF_INET)) { /* Must map the address */ in6_sin_2_v4mapsin6(&net->ro._l_addr.sin, (struct sockaddr_in6 *)addr); } else { memcpy(addr, &net->ro._l_addr, cpsz); } #else memcpy(addr, &net->ro._l_addr, cpsz); #endif ((struct sockaddr_in *)addr)->sin_port = stcb->rport; addr = (struct sockaddr *)((caddr_t)addr + cpsz); left -= cpsz; *optsize += cpsz; } SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (saddr->sget_assoc_id <= SCTP_ALL_ASSOC)) { error = EINVAL; } else { error = ENOENT; } SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); } break; } case SCTP_GET_LOCAL_ADDRESSES: { size_t limit, actual; struct sctp_getaddresses *saddr; SCTP_CHECK_AND_CAST(saddr, optval, struct sctp_getaddresses, *optsize); SCTP_FIND_STCB(inp, stcb, saddr->sget_assoc_id); if ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && ((saddr->sget_assoc_id == SCTP_CURRENT_ASSOC) || (saddr->sget_assoc_id == SCTP_ALL_ASSOC))) { error = EINVAL; } else { limit = *optsize - offsetof(struct sctp_getaddresses, addr); actual = sctp_fill_up_addresses(inp, stcb, limit, &saddr->addr[0].sa); *optsize = offsetof(struct sctp_getaddresses, addr) + actual; } if (stcb != NULL) { SCTP_TCB_UNLOCK(stcb); } break; } case SCTP_PEER_ADDR_PARAMS: { struct sctp_paddrparams *paddrp; struct sctp_nets *net; struct sockaddr *addr; #if defined(INET) && defined(INET6) struct sockaddr_in sin_store; #endif SCTP_CHECK_AND_CAST(paddrp, optval, struct sctp_paddrparams, *optsize); SCTP_FIND_STCB(inp, stcb, paddrp->spp_assoc_id); #if defined(INET) && defined(INET6) if (paddrp->spp_address.ss_family == AF_INET6) { struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)&paddrp->spp_address; if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { in6_sin6_2_sin(&sin_store, sin6); addr = (struct sockaddr *)&sin_store; } else { addr = (struct sockaddr *)&paddrp->spp_address; } } else { addr = (struct sockaddr *)&paddrp->spp_address; } #else addr = (struct sockaddr *)&paddrp->spp_address; #endif if (stcb != NULL) { net = sctp_findnet(stcb, addr); } else { /* * We increment here since * sctp_findassociation_ep_addr() wil do a * decrement if it finds the stcb as long as * the locked tcb (last argument) is NOT a * TCB.. aka NULL. */ net = NULL; SCTP_INP_INCR_REF(inp); stcb = sctp_findassociation_ep_addr(&inp, addr, &net, NULL, NULL); if (stcb == NULL) { SCTP_INP_DECR_REF(inp); } } if ((stcb != NULL) && (net == NULL)) { #ifdef INET if (addr->sa_family == AF_INET) { struct sockaddr_in *sin; sin = (struct sockaddr_in *)addr; if (sin->sin_addr.s_addr != INADDR_ANY) { error = EINVAL; SCTP_TCB_UNLOCK(stcb); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); break; } } else #endif #ifdef INET6 if (addr->sa_family == AF_INET6) { struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)addr; if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { error = EINVAL; SCTP_TCB_UNLOCK(stcb); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); break; } } else #endif { error = EAFNOSUPPORT; SCTP_TCB_UNLOCK(stcb); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); break; } } if (stcb != NULL) { /* Applies to the specific association */ paddrp->spp_flags = 0; if (net != NULL) { paddrp->spp_hbinterval = net->heart_beat_delay; paddrp->spp_pathmaxrxt = net->failure_threshold; paddrp->spp_pathmtu = net->mtu; switch (net->ro._l_addr.sa.sa_family) { #ifdef INET case AF_INET: paddrp->spp_pathmtu -= SCTP_MIN_V4_OVERHEAD; break; #endif #ifdef INET6 case AF_INET6: paddrp->spp_pathmtu -= SCTP_MIN_OVERHEAD; break; #endif default: break; } /* get flags for HB */ if (net->dest_state & SCTP_ADDR_NOHB) { paddrp->spp_flags |= SPP_HB_DISABLE; } else { paddrp->spp_flags |= SPP_HB_ENABLE; } /* get flags for PMTU */ if (net->dest_state & SCTP_ADDR_NO_PMTUD) { paddrp->spp_flags |= SPP_PMTUD_DISABLE; } else { paddrp->spp_flags |= SPP_PMTUD_ENABLE; } if (net->dscp & 0x01) { paddrp->spp_dscp = net->dscp & 0xfc; paddrp->spp_flags |= SPP_DSCP; } #ifdef INET6 if ((net->ro._l_addr.sa.sa_family == AF_INET6) && (net->flowlabel & 0x80000000)) { paddrp->spp_ipv6_flowlabel = net->flowlabel & 0x000fffff; paddrp->spp_flags |= SPP_IPV6_FLOWLABEL; } #endif } else { /* * No destination so return default * value */ paddrp->spp_pathmaxrxt = stcb->asoc.def_net_failure; paddrp->spp_pathmtu = stcb->asoc.default_mtu; if (stcb->asoc.default_dscp & 0x01) { paddrp->spp_dscp = stcb->asoc.default_dscp & 0xfc; paddrp->spp_flags |= SPP_DSCP; } #ifdef INET6 if (stcb->asoc.default_flowlabel & 0x80000000) { paddrp->spp_ipv6_flowlabel = stcb->asoc.default_flowlabel & 0x000fffff; paddrp->spp_flags |= SPP_IPV6_FLOWLABEL; } #endif /* default settings should be these */ if (sctp_stcb_is_feature_on(inp, stcb, SCTP_PCB_FLAGS_DONOT_HEARTBEAT)) { paddrp->spp_flags |= SPP_HB_DISABLE; } else { paddrp->spp_flags |= SPP_HB_ENABLE; } if (sctp_stcb_is_feature_on(inp, stcb, SCTP_PCB_FLAGS_DO_NOT_PMTUD)) { paddrp->spp_flags |= SPP_PMTUD_DISABLE; } else { paddrp->spp_flags |= SPP_PMTUD_ENABLE; } paddrp->spp_hbinterval = stcb->asoc.heart_beat_delay; } paddrp->spp_assoc_id = sctp_get_associd(stcb); SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (paddrp->spp_assoc_id == SCTP_FUTURE_ASSOC))) { /* Use endpoint defaults */ SCTP_INP_RLOCK(inp); paddrp->spp_pathmaxrxt = inp->sctp_ep.def_net_failure; paddrp->spp_hbinterval = sctp_ticks_to_msecs(inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT]); paddrp->spp_assoc_id = SCTP_FUTURE_ASSOC; /* get inp's default */ if (inp->sctp_ep.default_dscp & 0x01) { paddrp->spp_dscp = inp->sctp_ep.default_dscp & 0xfc; paddrp->spp_flags |= SPP_DSCP; } #ifdef INET6 if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) && (inp->sctp_ep.default_flowlabel & 0x80000000)) { paddrp->spp_ipv6_flowlabel = inp->sctp_ep.default_flowlabel & 0x000fffff; paddrp->spp_flags |= SPP_IPV6_FLOWLABEL; } #endif paddrp->spp_pathmtu = inp->sctp_ep.default_mtu; if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_DONOT_HEARTBEAT)) { paddrp->spp_flags |= SPP_HB_ENABLE; } else { paddrp->spp_flags |= SPP_HB_DISABLE; } if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_DO_NOT_PMTUD)) { paddrp->spp_flags |= SPP_PMTUD_ENABLE; } else { paddrp->spp_flags |= SPP_PMTUD_DISABLE; } SCTP_INP_RUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } if (error == 0) { *optsize = sizeof(struct sctp_paddrparams); } break; } case SCTP_GET_PEER_ADDR_INFO: { struct sctp_paddrinfo *paddri; struct sctp_nets *net; struct sockaddr *addr; #if defined(INET) && defined(INET6) struct sockaddr_in sin_store; #endif SCTP_CHECK_AND_CAST(paddri, optval, struct sctp_paddrinfo, *optsize); SCTP_FIND_STCB(inp, stcb, paddri->spinfo_assoc_id); #if defined(INET) && defined(INET6) if (paddri->spinfo_address.ss_family == AF_INET6) { struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)&paddri->spinfo_address; if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { in6_sin6_2_sin(&sin_store, sin6); addr = (struct sockaddr *)&sin_store; } else { addr = (struct sockaddr *)&paddri->spinfo_address; } } else { addr = (struct sockaddr *)&paddri->spinfo_address; } #else addr = (struct sockaddr *)&paddri->spinfo_address; #endif if (stcb != NULL) { net = sctp_findnet(stcb, addr); } else { /* * We increment here since * sctp_findassociation_ep_addr() wil do a * decrement if it finds the stcb as long as * the locked tcb (last argument) is NOT a * TCB.. aka NULL. */ net = NULL; SCTP_INP_INCR_REF(inp); stcb = sctp_findassociation_ep_addr(&inp, addr, &net, NULL, NULL); if (stcb == NULL) { SCTP_INP_DECR_REF(inp); } } if ((stcb != NULL) && (net != NULL)) { if (net->dest_state & SCTP_ADDR_UNCONFIRMED) { /* It's unconfirmed */ paddri->spinfo_state = SCTP_UNCONFIRMED; } else if (net->dest_state & SCTP_ADDR_REACHABLE) { /* It's active */ paddri->spinfo_state = SCTP_ACTIVE; } else { /* It's inactive */ paddri->spinfo_state = SCTP_INACTIVE; } paddri->spinfo_cwnd = net->cwnd; paddri->spinfo_srtt = net->lastsa >> SCTP_RTT_SHIFT; paddri->spinfo_rto = net->RTO; paddri->spinfo_assoc_id = sctp_get_associd(stcb); paddri->spinfo_mtu = net->mtu; switch (addr->sa_family) { #if defined(INET) case AF_INET: paddri->spinfo_mtu -= SCTP_MIN_V4_OVERHEAD; break; #endif #if defined(INET6) case AF_INET6: paddri->spinfo_mtu -= SCTP_MIN_OVERHEAD; break; #endif default: break; } SCTP_TCB_UNLOCK(stcb); *optsize = sizeof(struct sctp_paddrinfo); } else { if (stcb != NULL) { SCTP_TCB_UNLOCK(stcb); } SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT); error = ENOENT; } break; } case SCTP_PCB_STATUS: { struct sctp_pcbinfo *spcb; SCTP_CHECK_AND_CAST(spcb, optval, struct sctp_pcbinfo, *optsize); sctp_fill_pcbinfo(spcb); *optsize = sizeof(struct sctp_pcbinfo); break; } case SCTP_STATUS: { struct sctp_nets *net; struct sctp_status *sstat; SCTP_CHECK_AND_CAST(sstat, optval, struct sctp_status, *optsize); SCTP_FIND_STCB(inp, stcb, sstat->sstat_assoc_id); if (stcb == NULL) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; break; } sstat->sstat_state = sctp_map_assoc_state(stcb->asoc.state); sstat->sstat_assoc_id = sctp_get_associd(stcb); sstat->sstat_rwnd = stcb->asoc.peers_rwnd; sstat->sstat_unackdata = stcb->asoc.sent_queue_cnt; /* * We can't include chunks that have been passed to * the socket layer. Only things in queue. */ sstat->sstat_penddata = (stcb->asoc.cnt_on_reasm_queue + stcb->asoc.cnt_on_all_streams); sstat->sstat_instrms = stcb->asoc.streamincnt; sstat->sstat_outstrms = stcb->asoc.streamoutcnt; sstat->sstat_fragmentation_point = sctp_get_frag_point(stcb); net = stcb->asoc.primary_destination; if (net != NULL) { memcpy(&sstat->sstat_primary.spinfo_address, &net->ro._l_addr, ((struct sockaddr *)(&net->ro._l_addr))->sa_len); ((struct sockaddr_in *)&sstat->sstat_primary.spinfo_address)->sin_port = stcb->rport; /* * Again the user can get info from * sctp_constants.h for what the state of * the network is. */ if (net->dest_state & SCTP_ADDR_UNCONFIRMED) { /* It's unconfirmed */ sstat->sstat_primary.spinfo_state = SCTP_UNCONFIRMED; } else if (net->dest_state & SCTP_ADDR_REACHABLE) { /* It's active */ sstat->sstat_primary.spinfo_state = SCTP_ACTIVE; } else { /* It's inactive */ sstat->sstat_primary.spinfo_state = SCTP_INACTIVE; } sstat->sstat_primary.spinfo_cwnd = net->cwnd; sstat->sstat_primary.spinfo_srtt = net->lastsa >> SCTP_RTT_SHIFT; sstat->sstat_primary.spinfo_rto = net->RTO; sstat->sstat_primary.spinfo_mtu = net->mtu; switch (stcb->asoc.primary_destination->ro._l_addr.sa.sa_family) { #if defined(INET) case AF_INET: sstat->sstat_primary.spinfo_mtu -= SCTP_MIN_V4_OVERHEAD; break; #endif #if defined(INET6) case AF_INET6: sstat->sstat_primary.spinfo_mtu -= SCTP_MIN_OVERHEAD; break; #endif default: break; } } else { memset(&sstat->sstat_primary, 0, sizeof(struct sctp_paddrinfo)); } sstat->sstat_primary.spinfo_assoc_id = sctp_get_associd(stcb); SCTP_TCB_UNLOCK(stcb); *optsize = sizeof(struct sctp_status); break; } case SCTP_RTOINFO: { struct sctp_rtoinfo *srto; SCTP_CHECK_AND_CAST(srto, optval, struct sctp_rtoinfo, *optsize); SCTP_FIND_STCB(inp, stcb, srto->srto_assoc_id); if (stcb) { srto->srto_initial = stcb->asoc.initial_rto; srto->srto_max = stcb->asoc.maxrto; srto->srto_min = stcb->asoc.minrto; SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (srto->srto_assoc_id == SCTP_FUTURE_ASSOC))) { SCTP_INP_RLOCK(inp); srto->srto_initial = inp->sctp_ep.initial_rto; srto->srto_max = inp->sctp_ep.sctp_maxrto; srto->srto_min = inp->sctp_ep.sctp_minrto; SCTP_INP_RUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } if (error == 0) { *optsize = sizeof(struct sctp_rtoinfo); } break; } case SCTP_TIMEOUTS: { struct sctp_timeouts *stimo; SCTP_CHECK_AND_CAST(stimo, optval, struct sctp_timeouts, *optsize); SCTP_FIND_STCB(inp, stcb, stimo->stimo_assoc_id); if (stcb) { stimo->stimo_init = stcb->asoc.timoinit; stimo->stimo_data = stcb->asoc.timodata; stimo->stimo_sack = stcb->asoc.timosack; stimo->stimo_shutdown = stcb->asoc.timoshutdown; stimo->stimo_heartbeat = stcb->asoc.timoheartbeat; stimo->stimo_cookie = stcb->asoc.timocookie; stimo->stimo_shutdownack = stcb->asoc.timoshutdownack; SCTP_TCB_UNLOCK(stcb); *optsize = sizeof(struct sctp_timeouts); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } break; } case SCTP_ASSOCINFO: { struct sctp_assocparams *sasoc; SCTP_CHECK_AND_CAST(sasoc, optval, struct sctp_assocparams, *optsize); SCTP_FIND_STCB(inp, stcb, sasoc->sasoc_assoc_id); if (stcb) { sasoc->sasoc_cookie_life = sctp_ticks_to_msecs(stcb->asoc.cookie_life); sasoc->sasoc_asocmaxrxt = stcb->asoc.max_send_times; sasoc->sasoc_number_peer_destinations = stcb->asoc.numnets; sasoc->sasoc_peer_rwnd = stcb->asoc.peers_rwnd; sasoc->sasoc_local_rwnd = stcb->asoc.my_rwnd; SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (sasoc->sasoc_assoc_id == SCTP_FUTURE_ASSOC))) { SCTP_INP_RLOCK(inp); sasoc->sasoc_cookie_life = sctp_ticks_to_msecs(inp->sctp_ep.def_cookie_life); sasoc->sasoc_asocmaxrxt = inp->sctp_ep.max_send_times; sasoc->sasoc_number_peer_destinations = 0; sasoc->sasoc_peer_rwnd = 0; sasoc->sasoc_local_rwnd = (uint32_t)sbspace(&inp->sctp_socket->so_rcv); SCTP_INP_RUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } if (error == 0) { *optsize = sizeof(struct sctp_assocparams); } break; } case SCTP_DEFAULT_SEND_PARAM: { struct sctp_sndrcvinfo *s_info; SCTP_CHECK_AND_CAST(s_info, optval, struct sctp_sndrcvinfo, *optsize); SCTP_FIND_STCB(inp, stcb, s_info->sinfo_assoc_id); if (stcb) { memcpy(s_info, &stcb->asoc.def_send, sizeof(stcb->asoc.def_send)); SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (s_info->sinfo_assoc_id == SCTP_FUTURE_ASSOC))) { SCTP_INP_RLOCK(inp); memcpy(s_info, &inp->def_send, sizeof(inp->def_send)); SCTP_INP_RUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } if (error == 0) { *optsize = sizeof(struct sctp_sndrcvinfo); } break; } case SCTP_INITMSG: { struct sctp_initmsg *sinit; SCTP_CHECK_AND_CAST(sinit, optval, struct sctp_initmsg, *optsize); SCTP_INP_RLOCK(inp); sinit->sinit_num_ostreams = inp->sctp_ep.pre_open_stream_count; sinit->sinit_max_instreams = inp->sctp_ep.max_open_streams_intome; sinit->sinit_max_attempts = inp->sctp_ep.max_init_times; sinit->sinit_max_init_timeo = inp->sctp_ep.initial_init_rto_max; SCTP_INP_RUNLOCK(inp); *optsize = sizeof(struct sctp_initmsg); break; } case SCTP_PRIMARY_ADDR: /* we allow a "get" operation on this */ { struct sctp_setprim *ssp; SCTP_CHECK_AND_CAST(ssp, optval, struct sctp_setprim, *optsize); SCTP_FIND_STCB(inp, stcb, ssp->ssp_assoc_id); if (stcb) { union sctp_sockstore *addr; addr = &stcb->asoc.primary_destination->ro._l_addr; switch (addr->sa.sa_family) { #ifdef INET case AF_INET: #ifdef INET6 if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) { in6_sin_2_v4mapsin6(&addr->sin, (struct sockaddr_in6 *)&ssp->ssp_addr); } else { memcpy(&ssp->ssp_addr, &addr->sin, sizeof(struct sockaddr_in)); } #else memcpy(&ssp->ssp_addr, &addr->sin, sizeof(struct sockaddr_in)); #endif break; #endif #ifdef INET6 case AF_INET6: memcpy(&ssp->ssp_addr, &addr->sin6, sizeof(struct sockaddr_in6)); break; #endif default: break; } SCTP_TCB_UNLOCK(stcb); *optsize = sizeof(struct sctp_setprim); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } break; } case SCTP_HMAC_IDENT: { struct sctp_hmacalgo *shmac; sctp_hmaclist_t *hmaclist; size_t size; int i; SCTP_CHECK_AND_CAST(shmac, optval, struct sctp_hmacalgo, *optsize); SCTP_INP_RLOCK(inp); hmaclist = inp->sctp_ep.local_hmacs; if (hmaclist == NULL) { /* no HMACs to return */ *optsize = sizeof(*shmac); SCTP_INP_RUNLOCK(inp); break; } /* is there room for all of the hmac ids? */ size = sizeof(*shmac) + (hmaclist->num_algo * sizeof(shmac->shmac_idents[0])); if (*optsize < size) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; SCTP_INP_RUNLOCK(inp); break; } /* copy in the list */ shmac->shmac_number_of_idents = hmaclist->num_algo; for (i = 0; i < hmaclist->num_algo; i++) { shmac->shmac_idents[i] = hmaclist->hmac[i]; } SCTP_INP_RUNLOCK(inp); *optsize = size; break; } case SCTP_AUTH_ACTIVE_KEY: { struct sctp_authkeyid *scact; SCTP_CHECK_AND_CAST(scact, optval, struct sctp_authkeyid, *optsize); SCTP_FIND_STCB(inp, stcb, scact->scact_assoc_id); if (stcb) { /* get the active key on the assoc */ scact->scact_keynumber = stcb->asoc.authinfo.active_keyid; SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (scact->scact_assoc_id == SCTP_FUTURE_ASSOC))) { /* get the endpoint active key */ SCTP_INP_RLOCK(inp); scact->scact_keynumber = inp->sctp_ep.default_keyid; SCTP_INP_RUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } if (error == 0) { *optsize = sizeof(struct sctp_authkeyid); } break; } case SCTP_LOCAL_AUTH_CHUNKS: { struct sctp_authchunks *sac; sctp_auth_chklist_t *chklist = NULL; size_t size = 0; SCTP_CHECK_AND_CAST(sac, optval, struct sctp_authchunks, *optsize); SCTP_FIND_STCB(inp, stcb, sac->gauth_assoc_id); if (stcb) { /* get off the assoc */ chklist = stcb->asoc.local_auth_chunks; /* is there enough space? */ size = sctp_auth_get_chklist_size(chklist); if (*optsize < (sizeof(struct sctp_authchunks) + size)) { error = EINVAL; SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); } else { /* copy in the chunks */ (void)sctp_serialize_auth_chunks(chklist, sac->gauth_chunks); sac->gauth_number_of_chunks = (uint32_t)size; *optsize = sizeof(struct sctp_authchunks) + size; } SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (sac->gauth_assoc_id == SCTP_FUTURE_ASSOC))) { /* get off the endpoint */ SCTP_INP_RLOCK(inp); chklist = inp->sctp_ep.local_auth_chunks; /* is there enough space? */ size = sctp_auth_get_chklist_size(chklist); if (*optsize < (sizeof(struct sctp_authchunks) + size)) { error = EINVAL; SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); } else { /* copy in the chunks */ (void)sctp_serialize_auth_chunks(chklist, sac->gauth_chunks); sac->gauth_number_of_chunks = (uint32_t)size; *optsize = sizeof(struct sctp_authchunks) + size; } SCTP_INP_RUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } break; } case SCTP_PEER_AUTH_CHUNKS: { struct sctp_authchunks *sac; sctp_auth_chklist_t *chklist = NULL; size_t size = 0; SCTP_CHECK_AND_CAST(sac, optval, struct sctp_authchunks, *optsize); SCTP_FIND_STCB(inp, stcb, sac->gauth_assoc_id); if (stcb) { /* get off the assoc */ chklist = stcb->asoc.peer_auth_chunks; /* is there enough space? */ size = sctp_auth_get_chklist_size(chklist); if (*optsize < (sizeof(struct sctp_authchunks) + size)) { error = EINVAL; SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); } else { /* copy in the chunks */ (void)sctp_serialize_auth_chunks(chklist, sac->gauth_chunks); sac->gauth_number_of_chunks = (uint32_t)size; *optsize = sizeof(struct sctp_authchunks) + size; } SCTP_TCB_UNLOCK(stcb); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT); error = ENOENT; } break; } case SCTP_EVENT: { struct sctp_event *event; uint32_t event_type; SCTP_CHECK_AND_CAST(event, optval, struct sctp_event, *optsize); SCTP_FIND_STCB(inp, stcb, event->se_assoc_id); switch (event->se_type) { case SCTP_ASSOC_CHANGE: event_type = SCTP_PCB_FLAGS_RECVASSOCEVNT; break; case SCTP_PEER_ADDR_CHANGE: event_type = SCTP_PCB_FLAGS_RECVPADDREVNT; break; case SCTP_REMOTE_ERROR: event_type = SCTP_PCB_FLAGS_RECVPEERERR; break; case SCTP_SEND_FAILED: event_type = SCTP_PCB_FLAGS_RECVSENDFAILEVNT; break; case SCTP_SHUTDOWN_EVENT: event_type = SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT; break; case SCTP_ADAPTATION_INDICATION: event_type = SCTP_PCB_FLAGS_ADAPTATIONEVNT; break; case SCTP_PARTIAL_DELIVERY_EVENT: event_type = SCTP_PCB_FLAGS_PDAPIEVNT; break; case SCTP_AUTHENTICATION_EVENT: event_type = SCTP_PCB_FLAGS_AUTHEVNT; break; case SCTP_STREAM_RESET_EVENT: event_type = SCTP_PCB_FLAGS_STREAM_RESETEVNT; break; case SCTP_SENDER_DRY_EVENT: event_type = SCTP_PCB_FLAGS_DRYEVNT; break; case SCTP_NOTIFICATIONS_STOPPED_EVENT: event_type = 0; SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTSUP); error = ENOTSUP; break; case SCTP_ASSOC_RESET_EVENT: event_type = SCTP_PCB_FLAGS_ASSOC_RESETEVNT; break; case SCTP_STREAM_CHANGE_EVENT: event_type = SCTP_PCB_FLAGS_STREAM_CHANGEEVNT; break; case SCTP_SEND_FAILED_EVENT: event_type = SCTP_PCB_FLAGS_RECVNSENDFAILEVNT; break; default: event_type = 0; SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; break; } if (event_type > 0) { if (stcb) { event->se_on = sctp_stcb_is_feature_on(inp, stcb, event_type); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (event->se_assoc_id == SCTP_FUTURE_ASSOC))) { SCTP_INP_RLOCK(inp); event->se_on = sctp_is_feature_on(inp, event_type); SCTP_INP_RUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } } if (stcb != NULL) { SCTP_TCB_UNLOCK(stcb); } if (error == 0) { *optsize = sizeof(struct sctp_event); } break; } case SCTP_RECVRCVINFO: if (*optsize < sizeof(int)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } else { SCTP_INP_RLOCK(inp); *(int *)optval = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVRCVINFO); SCTP_INP_RUNLOCK(inp); *optsize = sizeof(int); } break; case SCTP_RECVNXTINFO: if (*optsize < sizeof(int)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } else { SCTP_INP_RLOCK(inp); *(int *)optval = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVNXTINFO); SCTP_INP_RUNLOCK(inp); *optsize = sizeof(int); } break; case SCTP_DEFAULT_SNDINFO: { struct sctp_sndinfo *info; SCTP_CHECK_AND_CAST(info, optval, struct sctp_sndinfo, *optsize); SCTP_FIND_STCB(inp, stcb, info->snd_assoc_id); if (stcb) { info->snd_sid = stcb->asoc.def_send.sinfo_stream; info->snd_flags = stcb->asoc.def_send.sinfo_flags; info->snd_flags &= 0xfff0; info->snd_ppid = stcb->asoc.def_send.sinfo_ppid; info->snd_context = stcb->asoc.def_send.sinfo_context; SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (info->snd_assoc_id == SCTP_FUTURE_ASSOC))) { SCTP_INP_RLOCK(inp); info->snd_sid = inp->def_send.sinfo_stream; info->snd_flags = inp->def_send.sinfo_flags; info->snd_flags &= 0xfff0; info->snd_ppid = inp->def_send.sinfo_ppid; info->snd_context = inp->def_send.sinfo_context; SCTP_INP_RUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } if (error == 0) { *optsize = sizeof(struct sctp_sndinfo); } break; } case SCTP_DEFAULT_PRINFO: { struct sctp_default_prinfo *info; SCTP_CHECK_AND_CAST(info, optval, struct sctp_default_prinfo, *optsize); SCTP_FIND_STCB(inp, stcb, info->pr_assoc_id); if (stcb) { info->pr_policy = PR_SCTP_POLICY(stcb->asoc.def_send.sinfo_flags); info->pr_value = stcb->asoc.def_send.sinfo_timetolive; SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (info->pr_assoc_id == SCTP_FUTURE_ASSOC))) { SCTP_INP_RLOCK(inp); info->pr_policy = PR_SCTP_POLICY(inp->def_send.sinfo_flags); info->pr_value = inp->def_send.sinfo_timetolive; SCTP_INP_RUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } if (error == 0) { *optsize = sizeof(struct sctp_default_prinfo); } break; } case SCTP_PEER_ADDR_THLDS: { struct sctp_paddrthlds *thlds; struct sctp_nets *net; struct sockaddr *addr; #if defined(INET) && defined(INET6) struct sockaddr_in sin_store; #endif SCTP_CHECK_AND_CAST(thlds, optval, struct sctp_paddrthlds, *optsize); SCTP_FIND_STCB(inp, stcb, thlds->spt_assoc_id); #if defined(INET) && defined(INET6) if (thlds->spt_address.ss_family == AF_INET6) { struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)&thlds->spt_address; if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { in6_sin6_2_sin(&sin_store, sin6); addr = (struct sockaddr *)&sin_store; } else { addr = (struct sockaddr *)&thlds->spt_address; } } else { addr = (struct sockaddr *)&thlds->spt_address; } #else addr = (struct sockaddr *)&thlds->spt_address; #endif if (stcb != NULL) { net = sctp_findnet(stcb, addr); } else { /* * We increment here since * sctp_findassociation_ep_addr() wil do a * decrement if it finds the stcb as long as * the locked tcb (last argument) is NOT a * TCB.. aka NULL. */ net = NULL; SCTP_INP_INCR_REF(inp); stcb = sctp_findassociation_ep_addr(&inp, addr, &net, NULL, NULL); if (stcb == NULL) { SCTP_INP_DECR_REF(inp); } } if ((stcb != NULL) && (net == NULL)) { #ifdef INET if (addr->sa_family == AF_INET) { struct sockaddr_in *sin; sin = (struct sockaddr_in *)addr; if (sin->sin_addr.s_addr != INADDR_ANY) { error = EINVAL; SCTP_TCB_UNLOCK(stcb); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); break; } } else #endif #ifdef INET6 if (addr->sa_family == AF_INET6) { struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)addr; if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { error = EINVAL; SCTP_TCB_UNLOCK(stcb); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); break; } } else #endif { error = EAFNOSUPPORT; SCTP_TCB_UNLOCK(stcb); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); break; } } if (stcb != NULL) { if (net != NULL) { thlds->spt_pathmaxrxt = net->failure_threshold; thlds->spt_pathpfthld = net->pf_threshold; thlds->spt_pathcpthld = 0xffff; } else { thlds->spt_pathmaxrxt = stcb->asoc.def_net_failure; thlds->spt_pathpfthld = stcb->asoc.def_net_pf_threshold; thlds->spt_pathcpthld = 0xffff; } thlds->spt_assoc_id = sctp_get_associd(stcb); SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (thlds->spt_assoc_id == SCTP_FUTURE_ASSOC))) { /* Use endpoint defaults */ SCTP_INP_RLOCK(inp); thlds->spt_pathmaxrxt = inp->sctp_ep.def_net_failure; thlds->spt_pathpfthld = inp->sctp_ep.def_net_pf_threshold; thlds->spt_pathcpthld = 0xffff; SCTP_INP_RUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } if (error == 0) { *optsize = sizeof(struct sctp_paddrthlds); } break; } case SCTP_REMOTE_UDP_ENCAPS_PORT: { struct sctp_udpencaps *encaps; struct sctp_nets *net; struct sockaddr *addr; #if defined(INET) && defined(INET6) struct sockaddr_in sin_store; #endif SCTP_CHECK_AND_CAST(encaps, optval, struct sctp_udpencaps, *optsize); SCTP_FIND_STCB(inp, stcb, encaps->sue_assoc_id); #if defined(INET) && defined(INET6) if (encaps->sue_address.ss_family == AF_INET6) { struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)&encaps->sue_address; if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { in6_sin6_2_sin(&sin_store, sin6); addr = (struct sockaddr *)&sin_store; } else { addr = (struct sockaddr *)&encaps->sue_address; } } else { addr = (struct sockaddr *)&encaps->sue_address; } #else addr = (struct sockaddr *)&encaps->sue_address; #endif if (stcb) { net = sctp_findnet(stcb, addr); } else { /* * We increment here since * sctp_findassociation_ep_addr() wil do a * decrement if it finds the stcb as long as * the locked tcb (last argument) is NOT a * TCB.. aka NULL. */ net = NULL; SCTP_INP_INCR_REF(inp); stcb = sctp_findassociation_ep_addr(&inp, addr, &net, NULL, NULL); if (stcb == NULL) { SCTP_INP_DECR_REF(inp); } } if ((stcb != NULL) && (net == NULL)) { #ifdef INET if (addr->sa_family == AF_INET) { struct sockaddr_in *sin; sin = (struct sockaddr_in *)addr; if (sin->sin_addr.s_addr != INADDR_ANY) { error = EINVAL; SCTP_TCB_UNLOCK(stcb); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); break; } } else #endif #ifdef INET6 if (addr->sa_family == AF_INET6) { struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)addr; if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { error = EINVAL; SCTP_TCB_UNLOCK(stcb); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); break; } } else #endif { error = EAFNOSUPPORT; SCTP_TCB_UNLOCK(stcb); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); break; } } if (stcb != NULL) { if (net) { encaps->sue_port = net->port; } else { encaps->sue_port = stcb->asoc.port; } SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (encaps->sue_assoc_id == SCTP_FUTURE_ASSOC))) { SCTP_INP_RLOCK(inp); encaps->sue_port = inp->sctp_ep.port; SCTP_INP_RUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } if (error == 0) { *optsize = sizeof(struct sctp_udpencaps); } break; } case SCTP_ECN_SUPPORTED: { struct sctp_assoc_value *av; SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize); SCTP_FIND_STCB(inp, stcb, av->assoc_id); if (stcb) { av->assoc_value = stcb->asoc.ecn_supported; SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (av->assoc_id == SCTP_FUTURE_ASSOC))) { SCTP_INP_RLOCK(inp); av->assoc_value = inp->ecn_supported; SCTP_INP_RUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } if (error == 0) { *optsize = sizeof(struct sctp_assoc_value); } break; } case SCTP_PR_SUPPORTED: { struct sctp_assoc_value *av; SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize); SCTP_FIND_STCB(inp, stcb, av->assoc_id); if (stcb) { av->assoc_value = stcb->asoc.prsctp_supported; SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (av->assoc_id == SCTP_FUTURE_ASSOC))) { SCTP_INP_RLOCK(inp); av->assoc_value = inp->prsctp_supported; SCTP_INP_RUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } if (error == 0) { *optsize = sizeof(struct sctp_assoc_value); } break; } case SCTP_AUTH_SUPPORTED: { struct sctp_assoc_value *av; SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize); SCTP_FIND_STCB(inp, stcb, av->assoc_id); if (stcb) { av->assoc_value = stcb->asoc.auth_supported; SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (av->assoc_id == SCTP_FUTURE_ASSOC))) { SCTP_INP_RLOCK(inp); av->assoc_value = inp->auth_supported; SCTP_INP_RUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } if (error == 0) { *optsize = sizeof(struct sctp_assoc_value); } break; } case SCTP_ASCONF_SUPPORTED: { struct sctp_assoc_value *av; SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize); SCTP_FIND_STCB(inp, stcb, av->assoc_id); if (stcb) { av->assoc_value = stcb->asoc.asconf_supported; SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (av->assoc_id == SCTP_FUTURE_ASSOC))) { SCTP_INP_RLOCK(inp); av->assoc_value = inp->asconf_supported; SCTP_INP_RUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } if (error == 0) { *optsize = sizeof(struct sctp_assoc_value); } break; } case SCTP_RECONFIG_SUPPORTED: { struct sctp_assoc_value *av; SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize); SCTP_FIND_STCB(inp, stcb, av->assoc_id); if (stcb) { av->assoc_value = stcb->asoc.reconfig_supported; SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (av->assoc_id == SCTP_FUTURE_ASSOC))) { SCTP_INP_RLOCK(inp); av->assoc_value = inp->reconfig_supported; SCTP_INP_RUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } if (error == 0) { *optsize = sizeof(struct sctp_assoc_value); } break; } case SCTP_NRSACK_SUPPORTED: { struct sctp_assoc_value *av; SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize); SCTP_FIND_STCB(inp, stcb, av->assoc_id); if (stcb) { av->assoc_value = stcb->asoc.nrsack_supported; SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (av->assoc_id == SCTP_FUTURE_ASSOC))) { SCTP_INP_RLOCK(inp); av->assoc_value = inp->nrsack_supported; SCTP_INP_RUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } if (error == 0) { *optsize = sizeof(struct sctp_assoc_value); } break; } case SCTP_PKTDROP_SUPPORTED: { struct sctp_assoc_value *av; SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize); SCTP_FIND_STCB(inp, stcb, av->assoc_id); if (stcb) { av->assoc_value = stcb->asoc.pktdrop_supported; SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (av->assoc_id == SCTP_FUTURE_ASSOC))) { SCTP_INP_RLOCK(inp); av->assoc_value = inp->pktdrop_supported; SCTP_INP_RUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } if (error == 0) { *optsize = sizeof(struct sctp_assoc_value); } break; } case SCTP_ENABLE_STREAM_RESET: { struct sctp_assoc_value *av; SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize); SCTP_FIND_STCB(inp, stcb, av->assoc_id); if (stcb) { av->assoc_value = (uint32_t)stcb->asoc.local_strreset_support; SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (av->assoc_id == SCTP_FUTURE_ASSOC))) { SCTP_INP_RLOCK(inp); av->assoc_value = (uint32_t)inp->local_strreset_support; SCTP_INP_RUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } if (error == 0) { *optsize = sizeof(struct sctp_assoc_value); } break; } case SCTP_PR_STREAM_STATUS: { struct sctp_prstatus *sprstat; uint16_t sid; uint16_t policy; SCTP_CHECK_AND_CAST(sprstat, optval, struct sctp_prstatus, *optsize); SCTP_FIND_STCB(inp, stcb, sprstat->sprstat_assoc_id); sid = sprstat->sprstat_sid; policy = sprstat->sprstat_policy; #if defined(SCTP_DETAILED_STR_STATS) if ((stcb != NULL) && (sid < stcb->asoc.streamoutcnt) && (policy != SCTP_PR_SCTP_NONE) && ((policy <= SCTP_PR_SCTP_MAX) || (policy == SCTP_PR_SCTP_ALL))) { if (policy == SCTP_PR_SCTP_ALL) { sprstat->sprstat_abandoned_unsent = stcb->asoc.strmout[sid].abandoned_unsent[0]; sprstat->sprstat_abandoned_sent = stcb->asoc.strmout[sid].abandoned_sent[0]; } else { sprstat->sprstat_abandoned_unsent = stcb->asoc.strmout[sid].abandoned_unsent[policy]; sprstat->sprstat_abandoned_sent = stcb->asoc.strmout[sid].abandoned_sent[policy]; } #else if ((stcb != NULL) && (sid < stcb->asoc.streamoutcnt) && (policy == SCTP_PR_SCTP_ALL)) { sprstat->sprstat_abandoned_unsent = stcb->asoc.strmout[sid].abandoned_unsent[0]; sprstat->sprstat_abandoned_sent = stcb->asoc.strmout[sid].abandoned_sent[0]; #endif } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } if (stcb != NULL) { SCTP_TCB_UNLOCK(stcb); } if (error == 0) { *optsize = sizeof(struct sctp_prstatus); } break; } case SCTP_PR_ASSOC_STATUS: { struct sctp_prstatus *sprstat; uint16_t policy; SCTP_CHECK_AND_CAST(sprstat, optval, struct sctp_prstatus, *optsize); SCTP_FIND_STCB(inp, stcb, sprstat->sprstat_assoc_id); policy = sprstat->sprstat_policy; if ((stcb != NULL) && (policy != SCTP_PR_SCTP_NONE) && ((policy <= SCTP_PR_SCTP_MAX) || (policy == SCTP_PR_SCTP_ALL))) { if (policy == SCTP_PR_SCTP_ALL) { sprstat->sprstat_abandoned_unsent = stcb->asoc.abandoned_unsent[0]; sprstat->sprstat_abandoned_sent = stcb->asoc.abandoned_sent[0]; } else { sprstat->sprstat_abandoned_unsent = stcb->asoc.abandoned_unsent[policy]; sprstat->sprstat_abandoned_sent = stcb->asoc.abandoned_sent[policy]; } } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } if (stcb != NULL) { SCTP_TCB_UNLOCK(stcb); } if (error == 0) { *optsize = sizeof(struct sctp_prstatus); } break; } case SCTP_MAX_CWND: { struct sctp_assoc_value *av; SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize); SCTP_FIND_STCB(inp, stcb, av->assoc_id); if (stcb) { av->assoc_value = stcb->asoc.max_cwnd; SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (av->assoc_id == SCTP_FUTURE_ASSOC))) { SCTP_INP_RLOCK(inp); av->assoc_value = inp->max_cwnd; SCTP_INP_RUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } if (error == 0) { *optsize = sizeof(struct sctp_assoc_value); } break; } default: SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT); error = ENOPROTOOPT; break; } /* end switch (sopt->sopt_name) */ if (error) { *optsize = 0; } return (error); } static int sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, void *p) { int error, set_opt; uint32_t *mopt; struct sctp_tcb *stcb = NULL; struct sctp_inpcb *inp = NULL; uint32_t vrf_id; if (optval == NULL) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); return (EINVAL); } inp = (struct sctp_inpcb *)so->so_pcb; if (inp == NULL) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); return (EINVAL); } vrf_id = inp->def_vrf_id; error = 0; switch (optname) { case SCTP_NODELAY: case SCTP_AUTOCLOSE: case SCTP_AUTO_ASCONF: case SCTP_EXPLICIT_EOR: case SCTP_DISABLE_FRAGMENTS: case SCTP_USE_EXT_RCVINFO: case SCTP_I_WANT_MAPPED_V4_ADDR: /* copy in the option value */ SCTP_CHECK_AND_CAST(mopt, optval, uint32_t, optsize); set_opt = 0; if (error) break; switch (optname) { case SCTP_DISABLE_FRAGMENTS: set_opt = SCTP_PCB_FLAGS_NO_FRAGMENT; break; case SCTP_AUTO_ASCONF: /* * NOTE: we don't really support this flag */ if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) { /* only valid for bound all sockets */ if ((SCTP_BASE_SYSCTL(sctp_auto_asconf) == 0) && (*mopt != 0)) { /* forbidden by admin */ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EPERM); return (EPERM); } set_opt = SCTP_PCB_FLAGS_AUTO_ASCONF; } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); return (EINVAL); } break; case SCTP_EXPLICIT_EOR: set_opt = SCTP_PCB_FLAGS_EXPLICIT_EOR; break; case SCTP_USE_EXT_RCVINFO: set_opt = SCTP_PCB_FLAGS_EXT_RCVINFO; break; case SCTP_I_WANT_MAPPED_V4_ADDR: if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) { set_opt = SCTP_PCB_FLAGS_NEEDS_MAPPED_V4; } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); return (EINVAL); } break; case SCTP_NODELAY: set_opt = SCTP_PCB_FLAGS_NODELAY; break; case SCTP_AUTOCLOSE: if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); return (EINVAL); } set_opt = SCTP_PCB_FLAGS_AUTOCLOSE; /* * The value is in ticks. Note this does not effect * old associations, only new ones. */ inp->sctp_ep.auto_close_time = sctp_secs_to_ticks(*mopt); break; } SCTP_INP_WLOCK(inp); if (*mopt != 0) { sctp_feature_on(inp, set_opt); } else { sctp_feature_off(inp, set_opt); } SCTP_INP_WUNLOCK(inp); break; case SCTP_REUSE_PORT: { SCTP_CHECK_AND_CAST(mopt, optval, uint32_t, optsize); if ((inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) == 0) { /* Can't set it after we are bound */ error = EINVAL; break; } if ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE)) { /* Can't do this for a 1-m socket */ error = EINVAL; break; } if (optval) sctp_feature_on(inp, SCTP_PCB_FLAGS_PORTREUSE); else sctp_feature_off(inp, SCTP_PCB_FLAGS_PORTREUSE); break; } case SCTP_PARTIAL_DELIVERY_POINT: { uint32_t *value; SCTP_CHECK_AND_CAST(value, optval, uint32_t, optsize); if (*value > SCTP_SB_LIMIT_RCV(so)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; break; } inp->partial_delivery_point = *value; break; } case SCTP_FRAGMENT_INTERLEAVE: /* not yet until we re-write sctp_recvmsg() */ { uint32_t *level; SCTP_CHECK_AND_CAST(level, optval, uint32_t, optsize); if (*level == SCTP_FRAG_LEVEL_2) { sctp_feature_on(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE); sctp_feature_on(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS); } else if (*level == SCTP_FRAG_LEVEL_1) { sctp_feature_on(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE); sctp_feature_off(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS); } else if (*level == SCTP_FRAG_LEVEL_0) { sctp_feature_off(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE); sctp_feature_off(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } break; } case SCTP_INTERLEAVING_SUPPORTED: { struct sctp_assoc_value *av; SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize); SCTP_FIND_STCB(inp, stcb, av->assoc_id); if (stcb) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (av->assoc_id == SCTP_FUTURE_ASSOC))) { SCTP_INP_WLOCK(inp); if (av->assoc_value == 0) { inp->idata_supported = 0; } else { if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE)) && (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS))) { inp->idata_supported = 1; } else { /* * Must have Frag * interleave and * stream interleave * on */ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } SCTP_INP_WUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } break; } case SCTP_CMT_ON_OFF: if (SCTP_BASE_SYSCTL(sctp_cmt_on_off)) { struct sctp_assoc_value *av; SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize); if (av->assoc_value > SCTP_CMT_MAX) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; break; } SCTP_FIND_STCB(inp, stcb, av->assoc_id); if (stcb) { stcb->asoc.sctp_cmt_on_off = av->assoc_value; SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && ((av->assoc_id == SCTP_FUTURE_ASSOC) || (av->assoc_id == SCTP_ALL_ASSOC)))) { SCTP_INP_WLOCK(inp); inp->sctp_cmt_on_off = av->assoc_value; SCTP_INP_WUNLOCK(inp); } if ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && ((av->assoc_id == SCTP_CURRENT_ASSOC) || (av->assoc_id == SCTP_ALL_ASSOC))) { SCTP_INP_RLOCK(inp); LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { SCTP_TCB_LOCK(stcb); stcb->asoc.sctp_cmt_on_off = av->assoc_value; SCTP_TCB_UNLOCK(stcb); } SCTP_INP_RUNLOCK(inp); } } } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT); error = ENOPROTOOPT; } break; case SCTP_PLUGGABLE_CC: { struct sctp_assoc_value *av; struct sctp_nets *net; SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize); if ((av->assoc_value != SCTP_CC_RFC2581) && (av->assoc_value != SCTP_CC_HSTCP) && (av->assoc_value != SCTP_CC_HTCP) && (av->assoc_value != SCTP_CC_RTCC)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; break; } SCTP_FIND_STCB(inp, stcb, av->assoc_id); if (stcb) { stcb->asoc.cc_functions = sctp_cc_functions[av->assoc_value]; stcb->asoc.congestion_control_module = av->assoc_value; if (stcb->asoc.cc_functions.sctp_set_initial_cc_param != NULL) { TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { stcb->asoc.cc_functions.sctp_set_initial_cc_param(stcb, net); } } SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && ((av->assoc_id == SCTP_FUTURE_ASSOC) || (av->assoc_id == SCTP_ALL_ASSOC)))) { SCTP_INP_WLOCK(inp); inp->sctp_ep.sctp_default_cc_module = av->assoc_value; SCTP_INP_WUNLOCK(inp); } if ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && ((av->assoc_id == SCTP_CURRENT_ASSOC) || (av->assoc_id == SCTP_ALL_ASSOC))) { SCTP_INP_RLOCK(inp); LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { SCTP_TCB_LOCK(stcb); stcb->asoc.cc_functions = sctp_cc_functions[av->assoc_value]; stcb->asoc.congestion_control_module = av->assoc_value; if (stcb->asoc.cc_functions.sctp_set_initial_cc_param != NULL) { TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { stcb->asoc.cc_functions.sctp_set_initial_cc_param(stcb, net); } } SCTP_TCB_UNLOCK(stcb); } SCTP_INP_RUNLOCK(inp); } } break; } case SCTP_CC_OPTION: { struct sctp_cc_option *cc_opt; SCTP_CHECK_AND_CAST(cc_opt, optval, struct sctp_cc_option, optsize); SCTP_FIND_STCB(inp, stcb, cc_opt->aid_value.assoc_id); if (stcb == NULL) { if ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (cc_opt->aid_value.assoc_id == SCTP_CURRENT_ASSOC)) { SCTP_INP_RLOCK(inp); LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { SCTP_TCB_LOCK(stcb); if (stcb->asoc.cc_functions.sctp_cwnd_socket_option) { (*stcb->asoc.cc_functions.sctp_cwnd_socket_option) (stcb, 1, cc_opt); } SCTP_TCB_UNLOCK(stcb); } SCTP_INP_RUNLOCK(inp); } else { error = EINVAL; } } else { if (stcb->asoc.cc_functions.sctp_cwnd_socket_option == NULL) { error = ENOTSUP; } else { error = (*stcb->asoc.cc_functions.sctp_cwnd_socket_option) (stcb, 1, cc_opt); } SCTP_TCB_UNLOCK(stcb); } break; } case SCTP_STREAM_SCHEDULER: { struct sctp_assoc_value *av; SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize); if ((av->assoc_value != SCTP_SS_DEFAULT) && (av->assoc_value != SCTP_SS_RR) && (av->assoc_value != SCTP_SS_RR_PKT) && (av->assoc_value != SCTP_SS_PRIO) && (av->assoc_value != SCTP_SS_FB) && (av->assoc_value != SCTP_SS_FCFS)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; break; } SCTP_FIND_STCB(inp, stcb, av->assoc_id); if (stcb) { stcb->asoc.ss_functions.sctp_ss_clear(stcb, &stcb->asoc, true); stcb->asoc.ss_functions = sctp_ss_functions[av->assoc_value]; stcb->asoc.stream_scheduling_module = av->assoc_value; stcb->asoc.ss_functions.sctp_ss_init(stcb, &stcb->asoc); SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && ((av->assoc_id == SCTP_FUTURE_ASSOC) || (av->assoc_id == SCTP_ALL_ASSOC)))) { SCTP_INP_WLOCK(inp); inp->sctp_ep.sctp_default_ss_module = av->assoc_value; SCTP_INP_WUNLOCK(inp); } if ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && ((av->assoc_id == SCTP_CURRENT_ASSOC) || (av->assoc_id == SCTP_ALL_ASSOC))) { SCTP_INP_RLOCK(inp); LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { SCTP_TCB_LOCK(stcb); stcb->asoc.ss_functions.sctp_ss_clear(stcb, &stcb->asoc, true); stcb->asoc.ss_functions = sctp_ss_functions[av->assoc_value]; stcb->asoc.stream_scheduling_module = av->assoc_value; stcb->asoc.ss_functions.sctp_ss_init(stcb, &stcb->asoc); SCTP_TCB_UNLOCK(stcb); } SCTP_INP_RUNLOCK(inp); } } break; } case SCTP_STREAM_SCHEDULER_VALUE: { struct sctp_stream_value *av; SCTP_CHECK_AND_CAST(av, optval, struct sctp_stream_value, optsize); SCTP_FIND_STCB(inp, stcb, av->assoc_id); if (stcb) { if ((av->stream_id >= stcb->asoc.streamoutcnt) || (stcb->asoc.ss_functions.sctp_ss_set_value(stcb, &stcb->asoc, &stcb->asoc.strmout[av->stream_id], av->stream_value) < 0)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (av->assoc_id == SCTP_CURRENT_ASSOC)) { SCTP_INP_RLOCK(inp); LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { SCTP_TCB_LOCK(stcb); if (av->stream_id < stcb->asoc.streamoutcnt) { stcb->asoc.ss_functions.sctp_ss_set_value(stcb, &stcb->asoc, &stcb->asoc.strmout[av->stream_id], av->stream_value); } SCTP_TCB_UNLOCK(stcb); } SCTP_INP_RUNLOCK(inp); } else { /* * Can't set stream value without * association */ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } break; } case SCTP_CLR_STAT_LOG: SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP); error = EOPNOTSUPP; break; case SCTP_CONTEXT: { struct sctp_assoc_value *av; SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize); SCTP_FIND_STCB(inp, stcb, av->assoc_id); if (stcb) { stcb->asoc.context = av->assoc_value; SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && ((av->assoc_id == SCTP_FUTURE_ASSOC) || (av->assoc_id == SCTP_ALL_ASSOC)))) { SCTP_INP_WLOCK(inp); inp->sctp_context = av->assoc_value; SCTP_INP_WUNLOCK(inp); } if ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && ((av->assoc_id == SCTP_CURRENT_ASSOC) || (av->assoc_id == SCTP_ALL_ASSOC))) { SCTP_INP_RLOCK(inp); LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { SCTP_TCB_LOCK(stcb); stcb->asoc.context = av->assoc_value; SCTP_TCB_UNLOCK(stcb); } SCTP_INP_RUNLOCK(inp); } } break; } case SCTP_VRF_ID: { uint32_t *default_vrfid; SCTP_CHECK_AND_CAST(default_vrfid, optval, uint32_t, optsize); if (*default_vrfid > SCTP_MAX_VRF_ID) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; break; } inp->def_vrf_id = *default_vrfid; break; } case SCTP_DEL_VRF_ID: { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP); error = EOPNOTSUPP; break; } case SCTP_ADD_VRF_ID: { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP); error = EOPNOTSUPP; break; } case SCTP_DELAYED_SACK: { struct sctp_sack_info *sack; SCTP_CHECK_AND_CAST(sack, optval, struct sctp_sack_info, optsize); SCTP_FIND_STCB(inp, stcb, sack->sack_assoc_id); if (sack->sack_delay) { if (sack->sack_delay > SCTP_MAX_SACK_DELAY) { error = EINVAL; if (stcb != NULL) { SCTP_TCB_UNLOCK(stcb); } break; } } if (stcb) { if (sack->sack_delay) { stcb->asoc.delayed_ack = sack->sack_delay; } if (sack->sack_freq) { stcb->asoc.sack_freq = sack->sack_freq; } SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && ((sack->sack_assoc_id == SCTP_FUTURE_ASSOC) || (sack->sack_assoc_id == SCTP_ALL_ASSOC)))) { SCTP_INP_WLOCK(inp); if (sack->sack_delay) { inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_RECV] = sctp_msecs_to_ticks(sack->sack_delay); } if (sack->sack_freq) { inp->sctp_ep.sctp_sack_freq = sack->sack_freq; } SCTP_INP_WUNLOCK(inp); } if ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && ((sack->sack_assoc_id == SCTP_CURRENT_ASSOC) || (sack->sack_assoc_id == SCTP_ALL_ASSOC))) { SCTP_INP_RLOCK(inp); LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { SCTP_TCB_LOCK(stcb); if (sack->sack_delay) { stcb->asoc.delayed_ack = sack->sack_delay; } if (sack->sack_freq) { stcb->asoc.sack_freq = sack->sack_freq; } SCTP_TCB_UNLOCK(stcb); } SCTP_INP_RUNLOCK(inp); } } break; } case SCTP_AUTH_CHUNK: { struct sctp_authchunk *sauth; SCTP_CHECK_AND_CAST(sauth, optval, struct sctp_authchunk, optsize); SCTP_INP_WLOCK(inp); if (sctp_auth_add_chunk(sauth->sauth_chunk, inp->sctp_ep.local_auth_chunks)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } else { inp->auth_supported = 1; } SCTP_INP_WUNLOCK(inp); break; } case SCTP_AUTH_KEY: { struct sctp_authkey *sca; struct sctp_keyhead *shared_keys; sctp_sharedkey_t *shared_key; sctp_key_t *key = NULL; size_t size; SCTP_CHECK_AND_CAST(sca, optval, struct sctp_authkey, optsize); if (sca->sca_keylength == 0) { size = optsize - sizeof(struct sctp_authkey); } else { if (sca->sca_keylength + sizeof(struct sctp_authkey) <= optsize) { size = sca->sca_keylength; } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; break; } } SCTP_FIND_STCB(inp, stcb, sca->sca_assoc_id); if (stcb) { shared_keys = &stcb->asoc.shared_keys; /* clear the cached keys for this key id */ sctp_clear_cachedkeys(stcb, sca->sca_keynumber); /* * create the new shared key and * insert/replace it */ if (size > 0) { key = sctp_set_key(sca->sca_key, (uint32_t)size); if (key == NULL) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM); error = ENOMEM; SCTP_TCB_UNLOCK(stcb); break; } } shared_key = sctp_alloc_sharedkey(); if (shared_key == NULL) { sctp_free_key(key); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM); error = ENOMEM; SCTP_TCB_UNLOCK(stcb); break; } shared_key->key = key; shared_key->keyid = sca->sca_keynumber; error = sctp_insert_sharedkey(shared_keys, shared_key); SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && ((sca->sca_assoc_id == SCTP_FUTURE_ASSOC) || (sca->sca_assoc_id == SCTP_ALL_ASSOC)))) { SCTP_INP_WLOCK(inp); shared_keys = &inp->sctp_ep.shared_keys; /* * clear the cached keys on all * assocs for this key id */ sctp_clear_cachedkeys_ep(inp, sca->sca_keynumber); /* * create the new shared key and * insert/replace it */ if (size > 0) { key = sctp_set_key(sca->sca_key, (uint32_t)size); if (key == NULL) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM); error = ENOMEM; SCTP_INP_WUNLOCK(inp); break; } } shared_key = sctp_alloc_sharedkey(); if (shared_key == NULL) { sctp_free_key(key); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM); error = ENOMEM; SCTP_INP_WUNLOCK(inp); break; } shared_key->key = key; shared_key->keyid = sca->sca_keynumber; error = sctp_insert_sharedkey(shared_keys, shared_key); SCTP_INP_WUNLOCK(inp); } if ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && ((sca->sca_assoc_id == SCTP_CURRENT_ASSOC) || (sca->sca_assoc_id == SCTP_ALL_ASSOC))) { SCTP_INP_RLOCK(inp); LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { SCTP_TCB_LOCK(stcb); shared_keys = &stcb->asoc.shared_keys; /* * clear the cached keys for * this key id */ sctp_clear_cachedkeys(stcb, sca->sca_keynumber); /* * create the new shared key * and insert/replace it */ if (size > 0) { key = sctp_set_key(sca->sca_key, (uint32_t)size); if (key == NULL) { SCTP_TCB_UNLOCK(stcb); continue; } } shared_key = sctp_alloc_sharedkey(); if (shared_key == NULL) { sctp_free_key(key); SCTP_TCB_UNLOCK(stcb); continue; } shared_key->key = key; shared_key->keyid = sca->sca_keynumber; error = sctp_insert_sharedkey(shared_keys, shared_key); SCTP_TCB_UNLOCK(stcb); } SCTP_INP_RUNLOCK(inp); } } break; } case SCTP_HMAC_IDENT: { struct sctp_hmacalgo *shmac; sctp_hmaclist_t *hmaclist; uint16_t hmacid; uint32_t i; SCTP_CHECK_AND_CAST(shmac, optval, struct sctp_hmacalgo, optsize); if ((optsize < sizeof(struct sctp_hmacalgo) + shmac->shmac_number_of_idents * sizeof(uint16_t)) || (shmac->shmac_number_of_idents > 0xffff)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; break; } hmaclist = sctp_alloc_hmaclist((uint16_t)shmac->shmac_number_of_idents); if (hmaclist == NULL) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM); error = ENOMEM; break; } for (i = 0; i < shmac->shmac_number_of_idents; i++) { hmacid = shmac->shmac_idents[i]; if (sctp_auth_add_hmacid(hmaclist, hmacid)) { /* invalid HMACs were found */ ; SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; sctp_free_hmaclist(hmaclist); goto sctp_set_hmac_done; } } for (i = 0; i < hmaclist->num_algo; i++) { if (hmaclist->hmac[i] == SCTP_AUTH_HMAC_ID_SHA1) { /* already in list */ break; } } if (i == hmaclist->num_algo) { /* not found in list */ sctp_free_hmaclist(hmaclist); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; break; } /* set it on the endpoint */ SCTP_INP_WLOCK(inp); if (inp->sctp_ep.local_hmacs) sctp_free_hmaclist(inp->sctp_ep.local_hmacs); inp->sctp_ep.local_hmacs = hmaclist; SCTP_INP_WUNLOCK(inp); sctp_set_hmac_done: break; } case SCTP_AUTH_ACTIVE_KEY: { struct sctp_authkeyid *scact; SCTP_CHECK_AND_CAST(scact, optval, struct sctp_authkeyid, optsize); SCTP_FIND_STCB(inp, stcb, scact->scact_assoc_id); /* set the active key on the right place */ if (stcb) { /* set the active key on the assoc */ if (sctp_auth_setactivekey(stcb, scact->scact_keynumber)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && ((scact->scact_assoc_id == SCTP_FUTURE_ASSOC) || (scact->scact_assoc_id == SCTP_ALL_ASSOC)))) { SCTP_INP_WLOCK(inp); if (sctp_auth_setactivekey_ep(inp, scact->scact_keynumber)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } SCTP_INP_WUNLOCK(inp); } if ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && ((scact->scact_assoc_id == SCTP_CURRENT_ASSOC) || (scact->scact_assoc_id == SCTP_ALL_ASSOC))) { SCTP_INP_RLOCK(inp); LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { SCTP_TCB_LOCK(stcb); sctp_auth_setactivekey(stcb, scact->scact_keynumber); SCTP_TCB_UNLOCK(stcb); } SCTP_INP_RUNLOCK(inp); } } break; } case SCTP_AUTH_DELETE_KEY: { struct sctp_authkeyid *scdel; SCTP_CHECK_AND_CAST(scdel, optval, struct sctp_authkeyid, optsize); SCTP_FIND_STCB(inp, stcb, scdel->scact_assoc_id); /* delete the key from the right place */ if (stcb) { if (sctp_delete_sharedkey(stcb, scdel->scact_keynumber)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && ((scdel->scact_assoc_id == SCTP_FUTURE_ASSOC) || (scdel->scact_assoc_id == SCTP_ALL_ASSOC)))) { SCTP_INP_WLOCK(inp); if (sctp_delete_sharedkey_ep(inp, scdel->scact_keynumber)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } SCTP_INP_WUNLOCK(inp); } if ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && ((scdel->scact_assoc_id == SCTP_CURRENT_ASSOC) || (scdel->scact_assoc_id == SCTP_ALL_ASSOC))) { SCTP_INP_RLOCK(inp); LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { SCTP_TCB_LOCK(stcb); sctp_delete_sharedkey(stcb, scdel->scact_keynumber); SCTP_TCB_UNLOCK(stcb); } SCTP_INP_RUNLOCK(inp); } } break; } case SCTP_AUTH_DEACTIVATE_KEY: { struct sctp_authkeyid *keyid; SCTP_CHECK_AND_CAST(keyid, optval, struct sctp_authkeyid, optsize); SCTP_FIND_STCB(inp, stcb, keyid->scact_assoc_id); /* deactivate the key from the right place */ if (stcb) { if (sctp_deact_sharedkey(stcb, keyid->scact_keynumber)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && ((keyid->scact_assoc_id == SCTP_FUTURE_ASSOC) || (keyid->scact_assoc_id == SCTP_ALL_ASSOC)))) { SCTP_INP_WLOCK(inp); if (sctp_deact_sharedkey_ep(inp, keyid->scact_keynumber)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } SCTP_INP_WUNLOCK(inp); } if ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && ((keyid->scact_assoc_id == SCTP_CURRENT_ASSOC) || (keyid->scact_assoc_id == SCTP_ALL_ASSOC))) { SCTP_INP_RLOCK(inp); LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { SCTP_TCB_LOCK(stcb); sctp_deact_sharedkey(stcb, keyid->scact_keynumber); SCTP_TCB_UNLOCK(stcb); } SCTP_INP_RUNLOCK(inp); } } break; } case SCTP_ENABLE_STREAM_RESET: { struct sctp_assoc_value *av; SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize); if (av->assoc_value & (~SCTP_ENABLE_VALUE_MASK)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; break; } SCTP_FIND_STCB(inp, stcb, av->assoc_id); if (stcb) { stcb->asoc.local_strreset_support = (uint8_t)av->assoc_value; SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && ((av->assoc_id == SCTP_FUTURE_ASSOC) || (av->assoc_id == SCTP_ALL_ASSOC)))) { SCTP_INP_WLOCK(inp); inp->local_strreset_support = (uint8_t)av->assoc_value; SCTP_INP_WUNLOCK(inp); } if ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && ((av->assoc_id == SCTP_CURRENT_ASSOC) || (av->assoc_id == SCTP_ALL_ASSOC))) { SCTP_INP_RLOCK(inp); LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { SCTP_TCB_LOCK(stcb); stcb->asoc.local_strreset_support = (uint8_t)av->assoc_value; SCTP_TCB_UNLOCK(stcb); } SCTP_INP_RUNLOCK(inp); } } break; } case SCTP_RESET_STREAMS: { struct sctp_reset_streams *strrst; int i, send_out = 0; int send_in = 0; SCTP_CHECK_AND_CAST(strrst, optval, struct sctp_reset_streams, optsize); SCTP_FIND_STCB(inp, stcb, strrst->srs_assoc_id); if (stcb == NULL) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT); error = ENOENT; break; } if (stcb->asoc.reconfig_supported == 0) { /* * Peer does not support the chunk type. */ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP); error = EOPNOTSUPP; SCTP_TCB_UNLOCK(stcb); break; } if (SCTP_GET_STATE(stcb) != SCTP_STATE_OPEN) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; SCTP_TCB_UNLOCK(stcb); break; } if (sizeof(struct sctp_reset_streams) + strrst->srs_number_streams * sizeof(uint16_t) > optsize) { error = EINVAL; SCTP_TCB_UNLOCK(stcb); break; } if (strrst->srs_flags & SCTP_STREAM_RESET_INCOMING) { send_in = 1; if (stcb->asoc.stream_reset_outstanding) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EALREADY); error = EALREADY; SCTP_TCB_UNLOCK(stcb); break; } } if (strrst->srs_flags & SCTP_STREAM_RESET_OUTGOING) { send_out = 1; } if ((strrst->srs_number_streams > SCTP_MAX_STREAMS_AT_ONCE_RESET) && send_in) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM); error = ENOMEM; SCTP_TCB_UNLOCK(stcb); break; } if ((send_in == 0) && (send_out == 0)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; SCTP_TCB_UNLOCK(stcb); break; } for (i = 0; i < strrst->srs_number_streams; i++) { if ((send_in) && (strrst->srs_stream_list[i] >= stcb->asoc.streamincnt)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; break; } if ((send_out) && (strrst->srs_stream_list[i] >= stcb->asoc.streamoutcnt)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; break; } } if (error) { SCTP_TCB_UNLOCK(stcb); break; } if (send_out) { int cnt; uint16_t strm; if (strrst->srs_number_streams) { for (i = 0, cnt = 0; i < strrst->srs_number_streams; i++) { strm = strrst->srs_stream_list[i]; if (stcb->asoc.strmout[strm].state == SCTP_STREAM_OPEN) { stcb->asoc.strmout[strm].state = SCTP_STREAM_RESET_PENDING; cnt++; } } } else { /* Its all */ for (i = 0, cnt = 0; i < stcb->asoc.streamoutcnt; i++) { if (stcb->asoc.strmout[i].state == SCTP_STREAM_OPEN) { stcb->asoc.strmout[i].state = SCTP_STREAM_RESET_PENDING; cnt++; } } } } if (send_in) { error = sctp_send_str_reset_req(stcb, strrst->srs_number_streams, strrst->srs_stream_list, send_in, 0, 0, 0, 0, 0); } else { error = sctp_send_stream_reset_out_if_possible(stcb, SCTP_SO_LOCKED); } if (error == 0) { sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_STRRST_REQ, SCTP_SO_LOCKED); } else { /* * For outgoing streams don't report any * problems in sending the request to the * application. XXX: Double check resetting * incoming streams. */ error = 0; } SCTP_TCB_UNLOCK(stcb); break; } case SCTP_ADD_STREAMS: { struct sctp_add_streams *stradd; uint8_t addstream = 0; uint16_t add_o_strmcnt = 0; uint16_t add_i_strmcnt = 0; SCTP_CHECK_AND_CAST(stradd, optval, struct sctp_add_streams, optsize); SCTP_FIND_STCB(inp, stcb, stradd->sas_assoc_id); if (stcb == NULL) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT); error = ENOENT; break; } if (stcb->asoc.reconfig_supported == 0) { /* * Peer does not support the chunk type. */ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP); error = EOPNOTSUPP; SCTP_TCB_UNLOCK(stcb); break; } if (SCTP_GET_STATE(stcb) != SCTP_STATE_OPEN) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; SCTP_TCB_UNLOCK(stcb); break; } if (stcb->asoc.stream_reset_outstanding) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EALREADY); error = EALREADY; SCTP_TCB_UNLOCK(stcb); break; } if ((stradd->sas_outstrms == 0) && (stradd->sas_instrms == 0)) { error = EINVAL; goto skip_stuff; } if (stradd->sas_outstrms) { addstream = 1; /* We allocate here */ add_o_strmcnt = stradd->sas_outstrms; if ((((int)add_o_strmcnt) + ((int)stcb->asoc.streamoutcnt)) > 0x0000ffff) { /* You can't have more than 64k */ error = EINVAL; goto skip_stuff; } } if (stradd->sas_instrms) { int cnt; addstream |= 2; /* * We allocate inside * sctp_send_str_reset_req() */ add_i_strmcnt = stradd->sas_instrms; cnt = add_i_strmcnt; cnt += stcb->asoc.streamincnt; if (cnt > 0x0000ffff) { /* You can't have more than 64k */ error = EINVAL; goto skip_stuff; } if (cnt > (int)stcb->asoc.max_inbound_streams) { /* More than you are allowed */ error = EINVAL; goto skip_stuff; } } error = sctp_send_str_reset_req(stcb, 0, NULL, 0, 0, addstream, add_o_strmcnt, add_i_strmcnt, 0); sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_STRRST_REQ, SCTP_SO_LOCKED); skip_stuff: SCTP_TCB_UNLOCK(stcb); break; } case SCTP_RESET_ASSOC: { int i; uint32_t *value; SCTP_CHECK_AND_CAST(value, optval, uint32_t, optsize); SCTP_FIND_STCB(inp, stcb, (sctp_assoc_t)*value); if (stcb == NULL) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT); error = ENOENT; break; } if (stcb->asoc.reconfig_supported == 0) { /* * Peer does not support the chunk type. */ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP); error = EOPNOTSUPP; SCTP_TCB_UNLOCK(stcb); break; } if (SCTP_GET_STATE(stcb) != SCTP_STATE_OPEN) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; SCTP_TCB_UNLOCK(stcb); break; } if (stcb->asoc.stream_reset_outstanding) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EALREADY); error = EALREADY; SCTP_TCB_UNLOCK(stcb); break; } /* * Is there any data pending in the send or sent * queues? */ if (!TAILQ_EMPTY(&stcb->asoc.send_queue) || !TAILQ_EMPTY(&stcb->asoc.sent_queue)) { busy_out: error = EBUSY; SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); SCTP_TCB_UNLOCK(stcb); break; } /* Do any streams have data queued? */ for (i = 0; i < stcb->asoc.streamoutcnt; i++) { if (!TAILQ_EMPTY(&stcb->asoc.strmout[i].outqueue)) { goto busy_out; } } error = sctp_send_str_reset_req(stcb, 0, NULL, 0, 1, 0, 0, 0, 0); sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_STRRST_REQ, SCTP_SO_LOCKED); SCTP_TCB_UNLOCK(stcb); break; } case SCTP_CONNECT_X: if (optsize < (sizeof(int) + sizeof(struct sockaddr_in))) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; break; } error = sctp_do_connect_x(so, inp, optval, optsize, p, 0); break; case SCTP_CONNECT_X_DELAYED: if (optsize < (sizeof(int) + sizeof(struct sockaddr_in))) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; break; } error = sctp_do_connect_x(so, inp, optval, optsize, p, 1); break; case SCTP_CONNECT_X_COMPLETE: { struct sockaddr *sa; /* FIXME MT: check correct? */ SCTP_CHECK_AND_CAST(sa, optval, struct sockaddr, optsize); /* find tcb */ if (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) { SCTP_INP_RLOCK(inp); stcb = LIST_FIRST(&inp->sctp_asoc_list); if (stcb) { SCTP_TCB_LOCK(stcb); } SCTP_INP_RUNLOCK(inp); } else { /* * We increment here since * sctp_findassociation_ep_addr() wil do a * decrement if it finds the stcb as long as * the locked tcb (last argument) is NOT a * TCB.. aka NULL. */ SCTP_INP_INCR_REF(inp); stcb = sctp_findassociation_ep_addr(&inp, sa, NULL, NULL, NULL); if (stcb == NULL) { SCTP_INP_DECR_REF(inp); } } if (stcb == NULL) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT); error = ENOENT; break; } if (stcb->asoc.delayed_connection == 1) { stcb->asoc.delayed_connection = 0; (void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered); sctp_timer_stop(SCTP_TIMER_TYPE_INIT, inp, stcb, stcb->asoc.primary_destination, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_8); sctp_send_initiate(inp, stcb, SCTP_SO_LOCKED); } else { /* * already expired or did not use delayed * connectx */ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EALREADY); error = EALREADY; } SCTP_TCB_UNLOCK(stcb); break; } case SCTP_MAX_BURST: { struct sctp_assoc_value *av; SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize); SCTP_FIND_STCB(inp, stcb, av->assoc_id); if (stcb) { stcb->asoc.max_burst = av->assoc_value; SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && ((av->assoc_id == SCTP_FUTURE_ASSOC) || (av->assoc_id == SCTP_ALL_ASSOC)))) { SCTP_INP_WLOCK(inp); inp->sctp_ep.max_burst = av->assoc_value; SCTP_INP_WUNLOCK(inp); } if ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && ((av->assoc_id == SCTP_CURRENT_ASSOC) || (av->assoc_id == SCTP_ALL_ASSOC))) { SCTP_INP_RLOCK(inp); LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { SCTP_TCB_LOCK(stcb); stcb->asoc.max_burst = av->assoc_value; SCTP_TCB_UNLOCK(stcb); } SCTP_INP_RUNLOCK(inp); } } break; } case SCTP_MAXSEG: { struct sctp_assoc_value *av; SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize); SCTP_FIND_STCB(inp, stcb, av->assoc_id); if (stcb) { stcb->asoc.sctp_frag_point = av->assoc_value; SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (av->assoc_id == SCTP_FUTURE_ASSOC))) { SCTP_INP_WLOCK(inp); inp->sctp_frag_point = av->assoc_value; SCTP_INP_WUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } break; } case SCTP_EVENTS: { struct sctp_event_subscribe *events; SCTP_CHECK_AND_CAST(events, optval, struct sctp_event_subscribe, optsize); SCTP_INP_WLOCK(inp); if (events->sctp_data_io_event) { sctp_feature_on(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT); } else { sctp_feature_off(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT); } if (events->sctp_association_event) { sctp_feature_on(inp, SCTP_PCB_FLAGS_RECVASSOCEVNT); } else { sctp_feature_off(inp, SCTP_PCB_FLAGS_RECVASSOCEVNT); } if (events->sctp_address_event) { sctp_feature_on(inp, SCTP_PCB_FLAGS_RECVPADDREVNT); } else { sctp_feature_off(inp, SCTP_PCB_FLAGS_RECVPADDREVNT); } if (events->sctp_send_failure_event) { sctp_feature_on(inp, SCTP_PCB_FLAGS_RECVSENDFAILEVNT); } else { sctp_feature_off(inp, SCTP_PCB_FLAGS_RECVSENDFAILEVNT); } if (events->sctp_peer_error_event) { sctp_feature_on(inp, SCTP_PCB_FLAGS_RECVPEERERR); } else { sctp_feature_off(inp, SCTP_PCB_FLAGS_RECVPEERERR); } if (events->sctp_shutdown_event) { sctp_feature_on(inp, SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT); } else { sctp_feature_off(inp, SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT); } if (events->sctp_partial_delivery_event) { sctp_feature_on(inp, SCTP_PCB_FLAGS_PDAPIEVNT); } else { sctp_feature_off(inp, SCTP_PCB_FLAGS_PDAPIEVNT); } if (events->sctp_adaptation_layer_event) { sctp_feature_on(inp, SCTP_PCB_FLAGS_ADAPTATIONEVNT); } else { sctp_feature_off(inp, SCTP_PCB_FLAGS_ADAPTATIONEVNT); } if (events->sctp_authentication_event) { sctp_feature_on(inp, SCTP_PCB_FLAGS_AUTHEVNT); } else { sctp_feature_off(inp, SCTP_PCB_FLAGS_AUTHEVNT); } if (events->sctp_sender_dry_event) { sctp_feature_on(inp, SCTP_PCB_FLAGS_DRYEVNT); } else { sctp_feature_off(inp, SCTP_PCB_FLAGS_DRYEVNT); } if (events->sctp_stream_reset_event) { sctp_feature_on(inp, SCTP_PCB_FLAGS_STREAM_RESETEVNT); } else { sctp_feature_off(inp, SCTP_PCB_FLAGS_STREAM_RESETEVNT); } LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { SCTP_TCB_LOCK(stcb); if (events->sctp_association_event) { sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_RECVASSOCEVNT); } else { sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_RECVASSOCEVNT); } if (events->sctp_address_event) { sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_RECVPADDREVNT); } else { sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_RECVPADDREVNT); } if (events->sctp_send_failure_event) { sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_RECVSENDFAILEVNT); } else { sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_RECVSENDFAILEVNT); } if (events->sctp_peer_error_event) { sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_RECVPEERERR); } else { sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_RECVPEERERR); } if (events->sctp_shutdown_event) { sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT); } else { sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT); } if (events->sctp_partial_delivery_event) { sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_PDAPIEVNT); } else { sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_PDAPIEVNT); } if (events->sctp_adaptation_layer_event) { sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_ADAPTATIONEVNT); } else { sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_ADAPTATIONEVNT); } if (events->sctp_authentication_event) { sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_AUTHEVNT); } else { sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_AUTHEVNT); } if (events->sctp_sender_dry_event) { sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_DRYEVNT); } else { sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_DRYEVNT); } if (events->sctp_stream_reset_event) { sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_STREAM_RESETEVNT); } else { sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_STREAM_RESETEVNT); } SCTP_TCB_UNLOCK(stcb); } /* * Send up the sender dry event only for 1-to-1 * style sockets. */ if (events->sctp_sender_dry_event) { if (((inp->sctp_flags & (SCTP_PCB_FLAGS_TCPTYPE | SCTP_PCB_FLAGS_IN_TCPPOOL)) != 0) && !SCTP_IS_LISTENING(inp)) { stcb = LIST_FIRST(&inp->sctp_asoc_list); if (stcb != NULL) { SCTP_TCB_LOCK(stcb); if (TAILQ_EMPTY(&stcb->asoc.send_queue) && TAILQ_EMPTY(&stcb->asoc.sent_queue) && (stcb->asoc.stream_queue_cnt == 0)) { sctp_ulp_notify(SCTP_NOTIFY_SENDER_DRY, stcb, 0, NULL, SCTP_SO_LOCKED); } SCTP_TCB_UNLOCK(stcb); } } } SCTP_INP_WUNLOCK(inp); break; } case SCTP_ADAPTATION_LAYER: { struct sctp_setadaptation *adap_bits; SCTP_CHECK_AND_CAST(adap_bits, optval, struct sctp_setadaptation, optsize); SCTP_INP_WLOCK(inp); inp->sctp_ep.adaptation_layer_indicator = adap_bits->ssb_adaptation_ind; inp->sctp_ep.adaptation_layer_indicator_provided = 1; SCTP_INP_WUNLOCK(inp); break; } #ifdef SCTP_DEBUG case SCTP_SET_INITIAL_DBG_SEQ: { uint32_t *vvv; SCTP_CHECK_AND_CAST(vvv, optval, uint32_t, optsize); SCTP_INP_WLOCK(inp); inp->sctp_ep.initial_sequence_debug = *vvv; SCTP_INP_WUNLOCK(inp); break; } #endif case SCTP_DEFAULT_SEND_PARAM: { struct sctp_sndrcvinfo *s_info; SCTP_CHECK_AND_CAST(s_info, optval, struct sctp_sndrcvinfo, optsize); SCTP_FIND_STCB(inp, stcb, s_info->sinfo_assoc_id); if (stcb) { if (s_info->sinfo_stream < stcb->asoc.streamoutcnt) { memcpy(&stcb->asoc.def_send, s_info, min(optsize, sizeof(stcb->asoc.def_send))); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && ((s_info->sinfo_assoc_id == SCTP_FUTURE_ASSOC) || (s_info->sinfo_assoc_id == SCTP_ALL_ASSOC)))) { SCTP_INP_WLOCK(inp); memcpy(&inp->def_send, s_info, min(optsize, sizeof(inp->def_send))); SCTP_INP_WUNLOCK(inp); } if ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && ((s_info->sinfo_assoc_id == SCTP_CURRENT_ASSOC) || (s_info->sinfo_assoc_id == SCTP_ALL_ASSOC))) { SCTP_INP_RLOCK(inp); LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { SCTP_TCB_LOCK(stcb); if (s_info->sinfo_stream < stcb->asoc.streamoutcnt) { memcpy(&stcb->asoc.def_send, s_info, min(optsize, sizeof(stcb->asoc.def_send))); } SCTP_TCB_UNLOCK(stcb); } SCTP_INP_RUNLOCK(inp); } } break; } case SCTP_PEER_ADDR_PARAMS: { struct sctp_paddrparams *paddrp; struct sctp_nets *net; struct sockaddr *addr; #if defined(INET) && defined(INET6) struct sockaddr_in sin_store; #endif SCTP_CHECK_AND_CAST(paddrp, optval, struct sctp_paddrparams, optsize); SCTP_FIND_STCB(inp, stcb, paddrp->spp_assoc_id); #if defined(INET) && defined(INET6) if (paddrp->spp_address.ss_family == AF_INET6) { struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)&paddrp->spp_address; if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { in6_sin6_2_sin(&sin_store, sin6); addr = (struct sockaddr *)&sin_store; } else { addr = (struct sockaddr *)&paddrp->spp_address; } } else { addr = (struct sockaddr *)&paddrp->spp_address; } #else addr = (struct sockaddr *)&paddrp->spp_address; #endif if (stcb != NULL) { net = sctp_findnet(stcb, addr); } else { /* * We increment here since * sctp_findassociation_ep_addr() wil do a * decrement if it finds the stcb as long as * the locked tcb (last argument) is NOT a * TCB.. aka NULL. */ net = NULL; SCTP_INP_INCR_REF(inp); stcb = sctp_findassociation_ep_addr(&inp, addr, &net, NULL, NULL); if (stcb == NULL) { SCTP_INP_DECR_REF(inp); } } if ((stcb != NULL) && (net == NULL)) { #ifdef INET if (addr->sa_family == AF_INET) { struct sockaddr_in *sin; sin = (struct sockaddr_in *)addr; if (sin->sin_addr.s_addr != INADDR_ANY) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); SCTP_TCB_UNLOCK(stcb); error = EINVAL; break; } } else #endif #ifdef INET6 if (addr->sa_family == AF_INET6) { struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)addr; if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); SCTP_TCB_UNLOCK(stcb); error = EINVAL; break; } } else #endif { error = EAFNOSUPPORT; SCTP_TCB_UNLOCK(stcb); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); break; } } /* sanity checks */ if ((paddrp->spp_flags & SPP_HB_ENABLE) && (paddrp->spp_flags & SPP_HB_DISABLE)) { if (stcb) SCTP_TCB_UNLOCK(stcb); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); return (EINVAL); } if ((paddrp->spp_flags & SPP_PMTUD_ENABLE) && (paddrp->spp_flags & SPP_PMTUD_DISABLE)) { if (stcb) SCTP_TCB_UNLOCK(stcb); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); return (EINVAL); } if ((paddrp->spp_flags & SPP_PMTUD_DISABLE) && (paddrp->spp_pathmtu > 0) && ((paddrp->spp_pathmtu < SCTP_SMALLEST_PMTU) || (paddrp->spp_pathmtu > SCTP_LARGEST_PMTU))) { if (stcb) SCTP_TCB_UNLOCK(stcb); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); return (EINVAL); } if (stcb != NULL) { /************************TCB SPECIFIC SET ******************/ if (net != NULL) { /************************NET SPECIFIC SET ******************/ if (paddrp->spp_flags & SPP_HB_DISABLE) { if (((net->dest_state & SCTP_ADDR_UNCONFIRMED) == 0) && ((net->dest_state & SCTP_ADDR_NOHB) == 0)) { sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_9); } net->dest_state |= SCTP_ADDR_NOHB; } if (paddrp->spp_flags & SPP_HB_ENABLE) { if (paddrp->spp_hbinterval) { net->heart_beat_delay = paddrp->spp_hbinterval; } else if (paddrp->spp_flags & SPP_HB_TIME_IS_ZERO) { net->heart_beat_delay = 0; } sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_10); sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net); net->dest_state &= ~SCTP_ADDR_NOHB; } if (paddrp->spp_flags & SPP_HB_DEMAND) { if (SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) { sctp_send_hb(stcb, net, SCTP_SO_LOCKED); sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_SOCKOPT, SCTP_SO_LOCKED); sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net); } } if (paddrp->spp_flags & SPP_PMTUD_DISABLE) { if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) { sctp_timer_stop(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_11); } net->dest_state |= SCTP_ADDR_NO_PMTUD; if (paddrp->spp_pathmtu > 0) { net->mtu = paddrp->spp_pathmtu; switch (net->ro._l_addr.sa.sa_family) { #ifdef INET case AF_INET: net->mtu += SCTP_MIN_V4_OVERHEAD; break; #endif #ifdef INET6 case AF_INET6: net->mtu += SCTP_MIN_OVERHEAD; break; #endif default: break; } if (net->mtu < stcb->asoc.smallest_mtu) { sctp_pathmtu_adjustment(stcb, net->mtu, true); } } } if (paddrp->spp_flags & SPP_PMTUD_ENABLE) { if (!SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) { sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net); } net->dest_state &= ~SCTP_ADDR_NO_PMTUD; } if (paddrp->spp_pathmaxrxt > 0) { if (net->dest_state & SCTP_ADDR_PF) { if (net->error_count > paddrp->spp_pathmaxrxt) { net->dest_state &= ~SCTP_ADDR_PF; } } else { if ((net->error_count <= paddrp->spp_pathmaxrxt) && (net->error_count > net->pf_threshold)) { net->dest_state |= SCTP_ADDR_PF; sctp_send_hb(stcb, net, SCTP_SO_LOCKED); sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_12); sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net); } } if (net->dest_state & SCTP_ADDR_REACHABLE) { if (net->error_count > paddrp->spp_pathmaxrxt) { net->dest_state &= ~SCTP_ADDR_REACHABLE; sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN, stcb, 0, net, SCTP_SO_LOCKED); } } else { if (net->error_count <= paddrp->spp_pathmaxrxt) { net->dest_state |= SCTP_ADDR_REACHABLE; sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, 0, net, SCTP_SO_LOCKED); } } net->failure_threshold = paddrp->spp_pathmaxrxt; } if (paddrp->spp_flags & SPP_DSCP) { net->dscp = paddrp->spp_dscp & 0xfc; net->dscp |= 0x01; } #ifdef INET6 if (paddrp->spp_flags & SPP_IPV6_FLOWLABEL) { if (net->ro._l_addr.sa.sa_family == AF_INET6) { net->flowlabel = paddrp->spp_ipv6_flowlabel & 0x000fffff; net->flowlabel |= 0x80000000; } } #endif } else { /************************ASSOC ONLY -- NO NET SPECIFIC SET ******************/ if (paddrp->spp_pathmaxrxt > 0) { stcb->asoc.def_net_failure = paddrp->spp_pathmaxrxt; TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { if (net->dest_state & SCTP_ADDR_PF) { if (net->error_count > paddrp->spp_pathmaxrxt) { net->dest_state &= ~SCTP_ADDR_PF; } } else { if ((net->error_count <= paddrp->spp_pathmaxrxt) && (net->error_count > net->pf_threshold)) { net->dest_state |= SCTP_ADDR_PF; sctp_send_hb(stcb, net, SCTP_SO_LOCKED); sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_13); sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net); } } if (net->dest_state & SCTP_ADDR_REACHABLE) { if (net->error_count > paddrp->spp_pathmaxrxt) { net->dest_state &= ~SCTP_ADDR_REACHABLE; sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN, stcb, 0, net, SCTP_SO_LOCKED); } } else { if (net->error_count <= paddrp->spp_pathmaxrxt) { net->dest_state |= SCTP_ADDR_REACHABLE; sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, 0, net, SCTP_SO_LOCKED); } } net->failure_threshold = paddrp->spp_pathmaxrxt; } } if (paddrp->spp_flags & SPP_HB_ENABLE) { if (paddrp->spp_hbinterval != 0) { stcb->asoc.heart_beat_delay = paddrp->spp_hbinterval; } else if (paddrp->spp_flags & SPP_HB_TIME_IS_ZERO) { stcb->asoc.heart_beat_delay = 0; } /* Turn back on the timer */ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { if (paddrp->spp_hbinterval != 0) { net->heart_beat_delay = paddrp->spp_hbinterval; } else if (paddrp->spp_flags & SPP_HB_TIME_IS_ZERO) { net->heart_beat_delay = 0; } if (net->dest_state & SCTP_ADDR_NOHB) { net->dest_state &= ~SCTP_ADDR_NOHB; } sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_14); sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net); } sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_DONOT_HEARTBEAT); } if (paddrp->spp_flags & SPP_HB_DISABLE) { TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { if ((net->dest_state & SCTP_ADDR_NOHB) == 0) { net->dest_state |= SCTP_ADDR_NOHB; if ((net->dest_state & SCTP_ADDR_UNCONFIRMED) == 0) { sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_15); } } } sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_DONOT_HEARTBEAT); } if (paddrp->spp_flags & SPP_PMTUD_DISABLE) { TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) { sctp_timer_stop(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_16); } net->dest_state |= SCTP_ADDR_NO_PMTUD; if (paddrp->spp_pathmtu > 0) { net->mtu = paddrp->spp_pathmtu; switch (net->ro._l_addr.sa.sa_family) { #ifdef INET case AF_INET: net->mtu += SCTP_MIN_V4_OVERHEAD; break; #endif #ifdef INET6 case AF_INET6: net->mtu += SCTP_MIN_OVERHEAD; break; #endif default: break; } if (net->mtu < stcb->asoc.smallest_mtu) { sctp_pathmtu_adjustment(stcb, net->mtu, true); } } } if (paddrp->spp_pathmtu > 0) { stcb->asoc.default_mtu = paddrp->spp_pathmtu; } sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_DO_NOT_PMTUD); } if (paddrp->spp_flags & SPP_PMTUD_ENABLE) { TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { if (!SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) { sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net); } net->dest_state &= ~SCTP_ADDR_NO_PMTUD; } stcb->asoc.default_mtu = 0; sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_DO_NOT_PMTUD); } if (paddrp->spp_flags & SPP_DSCP) { TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { net->dscp = paddrp->spp_dscp & 0xfc; net->dscp |= 0x01; } stcb->asoc.default_dscp = paddrp->spp_dscp & 0xfc; stcb->asoc.default_dscp |= 0x01; } #ifdef INET6 if (paddrp->spp_flags & SPP_IPV6_FLOWLABEL) { TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { if (net->ro._l_addr.sa.sa_family == AF_INET6) { net->flowlabel = paddrp->spp_ipv6_flowlabel & 0x000fffff; net->flowlabel |= 0x80000000; } } stcb->asoc.default_flowlabel = paddrp->spp_ipv6_flowlabel & 0x000fffff; stcb->asoc.default_flowlabel |= 0x80000000; } #endif } SCTP_TCB_UNLOCK(stcb); } else { /************************NO TCB, SET TO default stuff ******************/ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (paddrp->spp_assoc_id == SCTP_FUTURE_ASSOC))) { SCTP_INP_WLOCK(inp); /* * For the TOS/FLOWLABEL stuff you * set it with the options on the * socket */ if (paddrp->spp_pathmaxrxt > 0) { inp->sctp_ep.def_net_failure = paddrp->spp_pathmaxrxt; } if (paddrp->spp_flags & SPP_HB_TIME_IS_ZERO) inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = 0; else if (paddrp->spp_hbinterval != 0) { if (paddrp->spp_hbinterval > SCTP_MAX_HB_INTERVAL) paddrp->spp_hbinterval = SCTP_MAX_HB_INTERVAL; inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = sctp_msecs_to_ticks(paddrp->spp_hbinterval); } if (paddrp->spp_flags & SPP_HB_ENABLE) { if (paddrp->spp_flags & SPP_HB_TIME_IS_ZERO) { inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = 0; } else if (paddrp->spp_hbinterval) { inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = sctp_msecs_to_ticks(paddrp->spp_hbinterval); } sctp_feature_off(inp, SCTP_PCB_FLAGS_DONOT_HEARTBEAT); } else if (paddrp->spp_flags & SPP_HB_DISABLE) { sctp_feature_on(inp, SCTP_PCB_FLAGS_DONOT_HEARTBEAT); } if (paddrp->spp_flags & SPP_PMTUD_ENABLE) { inp->sctp_ep.default_mtu = 0; sctp_feature_off(inp, SCTP_PCB_FLAGS_DO_NOT_PMTUD); } else if (paddrp->spp_flags & SPP_PMTUD_DISABLE) { if (paddrp->spp_pathmtu > 0) { inp->sctp_ep.default_mtu = paddrp->spp_pathmtu; } sctp_feature_on(inp, SCTP_PCB_FLAGS_DO_NOT_PMTUD); } if (paddrp->spp_flags & SPP_DSCP) { inp->sctp_ep.default_dscp = paddrp->spp_dscp & 0xfc; inp->sctp_ep.default_dscp |= 0x01; } #ifdef INET6 if (paddrp->spp_flags & SPP_IPV6_FLOWLABEL) { if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) { inp->sctp_ep.default_flowlabel = paddrp->spp_ipv6_flowlabel & 0x000fffff; inp->sctp_ep.default_flowlabel |= 0x80000000; } } #endif SCTP_INP_WUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } break; } case SCTP_RTOINFO: { struct sctp_rtoinfo *srto; uint32_t new_init, new_min, new_max; SCTP_CHECK_AND_CAST(srto, optval, struct sctp_rtoinfo, optsize); SCTP_FIND_STCB(inp, stcb, srto->srto_assoc_id); if (stcb) { if (srto->srto_initial) new_init = srto->srto_initial; else new_init = stcb->asoc.initial_rto; if (srto->srto_max) new_max = srto->srto_max; else new_max = stcb->asoc.maxrto; if (srto->srto_min) new_min = srto->srto_min; else new_min = stcb->asoc.minrto; if ((new_min <= new_init) && (new_init <= new_max)) { stcb->asoc.initial_rto = new_init; stcb->asoc.maxrto = new_max; stcb->asoc.minrto = new_min; } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (srto->srto_assoc_id == SCTP_FUTURE_ASSOC))) { SCTP_INP_WLOCK(inp); if (srto->srto_initial) new_init = srto->srto_initial; else new_init = inp->sctp_ep.initial_rto; if (srto->srto_max) new_max = srto->srto_max; else new_max = inp->sctp_ep.sctp_maxrto; if (srto->srto_min) new_min = srto->srto_min; else new_min = inp->sctp_ep.sctp_minrto; if ((new_min <= new_init) && (new_init <= new_max)) { inp->sctp_ep.initial_rto = new_init; inp->sctp_ep.sctp_maxrto = new_max; inp->sctp_ep.sctp_minrto = new_min; } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } SCTP_INP_WUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } break; } case SCTP_ASSOCINFO: { struct sctp_assocparams *sasoc; SCTP_CHECK_AND_CAST(sasoc, optval, struct sctp_assocparams, optsize); SCTP_FIND_STCB(inp, stcb, sasoc->sasoc_assoc_id); if (sasoc->sasoc_cookie_life > 0) { /* boundary check the cookie life */ if (sasoc->sasoc_cookie_life < SCTP_MIN_COOKIE_LIFE) { sasoc->sasoc_cookie_life = SCTP_MIN_COOKIE_LIFE; } if (sasoc->sasoc_cookie_life > SCTP_MAX_COOKIE_LIFE) { sasoc->sasoc_cookie_life = SCTP_MAX_COOKIE_LIFE; } } if (stcb) { if (sasoc->sasoc_asocmaxrxt > 0) { stcb->asoc.max_send_times = sasoc->sasoc_asocmaxrxt; } if (sasoc->sasoc_cookie_life > 0) { stcb->asoc.cookie_life = sctp_msecs_to_ticks(sasoc->sasoc_cookie_life); } SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (sasoc->sasoc_assoc_id == SCTP_FUTURE_ASSOC))) { SCTP_INP_WLOCK(inp); if (sasoc->sasoc_asocmaxrxt > 0) { inp->sctp_ep.max_send_times = sasoc->sasoc_asocmaxrxt; } if (sasoc->sasoc_cookie_life > 0) { inp->sctp_ep.def_cookie_life = sctp_msecs_to_ticks(sasoc->sasoc_cookie_life); } SCTP_INP_WUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } break; } case SCTP_INITMSG: { struct sctp_initmsg *sinit; SCTP_CHECK_AND_CAST(sinit, optval, struct sctp_initmsg, optsize); SCTP_INP_WLOCK(inp); if (sinit->sinit_num_ostreams) inp->sctp_ep.pre_open_stream_count = sinit->sinit_num_ostreams; if (sinit->sinit_max_instreams) inp->sctp_ep.max_open_streams_intome = sinit->sinit_max_instreams; if (sinit->sinit_max_attempts) inp->sctp_ep.max_init_times = sinit->sinit_max_attempts; if (sinit->sinit_max_init_timeo) inp->sctp_ep.initial_init_rto_max = sinit->sinit_max_init_timeo; SCTP_INP_WUNLOCK(inp); break; } case SCTP_PRIMARY_ADDR: { struct sctp_setprim *spa; struct sctp_nets *net; struct sockaddr *addr; #if defined(INET) && defined(INET6) struct sockaddr_in sin_store; #endif SCTP_CHECK_AND_CAST(spa, optval, struct sctp_setprim, optsize); SCTP_FIND_STCB(inp, stcb, spa->ssp_assoc_id); #if defined(INET) && defined(INET6) if (spa->ssp_addr.ss_family == AF_INET6) { struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)&spa->ssp_addr; if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { in6_sin6_2_sin(&sin_store, sin6); addr = (struct sockaddr *)&sin_store; } else { addr = (struct sockaddr *)&spa->ssp_addr; } } else { addr = (struct sockaddr *)&spa->ssp_addr; } #else addr = (struct sockaddr *)&spa->ssp_addr; #endif if (stcb != NULL) { net = sctp_findnet(stcb, addr); } else { /* * We increment here since * sctp_findassociation_ep_addr() wil do a * decrement if it finds the stcb as long as * the locked tcb (last argument) is NOT a * TCB.. aka NULL. */ net = NULL; SCTP_INP_INCR_REF(inp); stcb = sctp_findassociation_ep_addr(&inp, addr, &net, NULL, NULL); if (stcb == NULL) { SCTP_INP_DECR_REF(inp); } } if ((stcb != NULL) && (net != NULL)) { if (net != stcb->asoc.primary_destination) { if ((net->dest_state & SCTP_ADDR_UNCONFIRMED) == 0) { /* Ok we need to set it */ if (sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net) == 0) { if ((stcb->asoc.alternate) && ((net->dest_state & SCTP_ADDR_PF) == 0) && (net->dest_state & SCTP_ADDR_REACHABLE)) { sctp_free_remote_addr(stcb->asoc.alternate); stcb->asoc.alternate = NULL; } } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } if (stcb != NULL) { SCTP_TCB_UNLOCK(stcb); } break; } case SCTP_SET_DYNAMIC_PRIMARY: { union sctp_sockstore *ss; error = priv_check(curthread, PRIV_NETINET_RESERVEDPORT); if (error) break; SCTP_CHECK_AND_CAST(ss, optval, union sctp_sockstore, optsize); /* SUPER USER CHECK? */ error = sctp_dynamic_set_primary(&ss->sa, vrf_id); break; } case SCTP_SET_PEER_PRIMARY_ADDR: { struct sctp_setpeerprim *sspp; struct sockaddr *addr; #if defined(INET) && defined(INET6) struct sockaddr_in sin_store; #endif SCTP_CHECK_AND_CAST(sspp, optval, struct sctp_setpeerprim, optsize); SCTP_FIND_STCB(inp, stcb, sspp->sspp_assoc_id); if (stcb != NULL) { struct sctp_ifa *ifa; #if defined(INET) && defined(INET6) if (sspp->sspp_addr.ss_family == AF_INET6) { struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)&sspp->sspp_addr; if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { in6_sin6_2_sin(&sin_store, sin6); addr = (struct sockaddr *)&sin_store; } else { addr = (struct sockaddr *)&sspp->sspp_addr; } } else { addr = (struct sockaddr *)&sspp->sspp_addr; } #else addr = (struct sockaddr *)&sspp->sspp_addr; #endif ifa = sctp_find_ifa_by_addr(addr, stcb->asoc.vrf_id, SCTP_ADDR_NOT_LOCKED); if (ifa == NULL) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; goto out_of_it; } if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0) { /* * Must validate the ifa found is in * our ep */ struct sctp_laddr *laddr; int found = 0; LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { if (laddr->ifa == NULL) { SCTPDBG(SCTP_DEBUG_OUTPUT1, "%s: NULL ifa\n", __func__); continue; } if ((sctp_is_addr_restricted(stcb, laddr->ifa)) && (!sctp_is_addr_pending(stcb, laddr->ifa))) { continue; } if (laddr->ifa == ifa) { found = 1; break; } } if (!found) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; goto out_of_it; } } else { switch (addr->sa_family) { #ifdef INET case AF_INET: { struct sockaddr_in *sin; sin = (struct sockaddr_in *)addr; if (prison_check_ip4(inp->ip_inp.inp.inp_cred, &sin->sin_addr) != 0) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; goto out_of_it; } break; } #endif #ifdef INET6 case AF_INET6: { struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)addr; if (prison_check_ip6(inp->ip_inp.inp.inp_cred, &sin6->sin6_addr) != 0) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; goto out_of_it; } break; } #endif default: SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; goto out_of_it; } } if (sctp_set_primary_ip_address_sa(stcb, addr) != 0) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_SOCKOPT, SCTP_SO_LOCKED); out_of_it: SCTP_TCB_UNLOCK(stcb); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } break; } case SCTP_BINDX_ADD_ADDR: { struct sockaddr *sa; struct thread *td; td = (struct thread *)p; SCTP_CHECK_AND_CAST(sa, optval, struct sockaddr, optsize); #ifdef INET if (sa->sa_family == AF_INET) { if (optsize < sizeof(struct sockaddr_in)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; break; } if (td != NULL && (error = prison_local_ip4(td->td_ucred, &(((struct sockaddr_in *)sa)->sin_addr)))) { SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, error); break; } } else #endif #ifdef INET6 if (sa->sa_family == AF_INET6) { if (optsize < sizeof(struct sockaddr_in6)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; break; } if (td != NULL && (error = prison_local_ip6(td->td_ucred, &(((struct sockaddr_in6 *)sa)->sin6_addr), (SCTP_IPV6_V6ONLY(inp) != 0))) != 0) { SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, error); break; } } else #endif { error = EAFNOSUPPORT; break; } sctp_bindx_add_address(so, inp, sa, vrf_id, &error, p); break; } case SCTP_BINDX_REM_ADDR: { struct sockaddr *sa; struct thread *td; td = (struct thread *)p; SCTP_CHECK_AND_CAST(sa, optval, struct sockaddr, optsize); #ifdef INET if (sa->sa_family == AF_INET) { if (optsize < sizeof(struct sockaddr_in)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; break; } if (td != NULL && (error = prison_local_ip4(td->td_ucred, &(((struct sockaddr_in *)sa)->sin_addr)))) { SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, error); break; } } else #endif #ifdef INET6 if (sa->sa_family == AF_INET6) { if (optsize < sizeof(struct sockaddr_in6)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; break; } if (td != NULL && (error = prison_local_ip6(td->td_ucred, &(((struct sockaddr_in6 *)sa)->sin6_addr), (SCTP_IPV6_V6ONLY(inp) != 0))) != 0) { SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, error); break; } } else #endif { error = EAFNOSUPPORT; break; } sctp_bindx_delete_address(inp, sa, vrf_id, &error); break; } case SCTP_EVENT: { struct sctp_event *event; uint32_t event_type; SCTP_CHECK_AND_CAST(event, optval, struct sctp_event, optsize); SCTP_FIND_STCB(inp, stcb, event->se_assoc_id); switch (event->se_type) { case SCTP_ASSOC_CHANGE: event_type = SCTP_PCB_FLAGS_RECVASSOCEVNT; break; case SCTP_PEER_ADDR_CHANGE: event_type = SCTP_PCB_FLAGS_RECVPADDREVNT; break; case SCTP_REMOTE_ERROR: event_type = SCTP_PCB_FLAGS_RECVPEERERR; break; case SCTP_SEND_FAILED: event_type = SCTP_PCB_FLAGS_RECVSENDFAILEVNT; break; case SCTP_SHUTDOWN_EVENT: event_type = SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT; break; case SCTP_ADAPTATION_INDICATION: event_type = SCTP_PCB_FLAGS_ADAPTATIONEVNT; break; case SCTP_PARTIAL_DELIVERY_EVENT: event_type = SCTP_PCB_FLAGS_PDAPIEVNT; break; case SCTP_AUTHENTICATION_EVENT: event_type = SCTP_PCB_FLAGS_AUTHEVNT; break; case SCTP_STREAM_RESET_EVENT: event_type = SCTP_PCB_FLAGS_STREAM_RESETEVNT; break; case SCTP_SENDER_DRY_EVENT: event_type = SCTP_PCB_FLAGS_DRYEVNT; break; case SCTP_NOTIFICATIONS_STOPPED_EVENT: event_type = 0; SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTSUP); error = ENOTSUP; break; case SCTP_ASSOC_RESET_EVENT: event_type = SCTP_PCB_FLAGS_ASSOC_RESETEVNT; break; case SCTP_STREAM_CHANGE_EVENT: event_type = SCTP_PCB_FLAGS_STREAM_CHANGEEVNT; break; case SCTP_SEND_FAILED_EVENT: event_type = SCTP_PCB_FLAGS_RECVNSENDFAILEVNT; break; default: event_type = 0; SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; break; } if (event_type > 0) { if (stcb) { if (event->se_on) { sctp_stcb_feature_on(inp, stcb, event_type); if (event_type == SCTP_PCB_FLAGS_DRYEVNT) { if (TAILQ_EMPTY(&stcb->asoc.send_queue) && TAILQ_EMPTY(&stcb->asoc.sent_queue) && (stcb->asoc.stream_queue_cnt == 0)) { sctp_ulp_notify(SCTP_NOTIFY_SENDER_DRY, stcb, 0, NULL, SCTP_SO_LOCKED); } } } else { sctp_stcb_feature_off(inp, stcb, event_type); } SCTP_TCB_UNLOCK(stcb); } else { /* * We don't want to send up a storm * of events, so return an error for * sender dry events */ if ((event_type == SCTP_PCB_FLAGS_DRYEVNT) && (inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && ((event->se_assoc_id == SCTP_ALL_ASSOC) || (event->se_assoc_id == SCTP_CURRENT_ASSOC))) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTSUP); error = ENOTSUP; break; } if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && ((event->se_assoc_id == SCTP_FUTURE_ASSOC) || (event->se_assoc_id == SCTP_ALL_ASSOC)))) { SCTP_INP_WLOCK(inp); if (event->se_on) { sctp_feature_on(inp, event_type); } else { sctp_feature_off(inp, event_type); } SCTP_INP_WUNLOCK(inp); } if ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && ((event->se_assoc_id == SCTP_CURRENT_ASSOC) || (event->se_assoc_id == SCTP_ALL_ASSOC))) { SCTP_INP_RLOCK(inp); LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { SCTP_TCB_LOCK(stcb); if (event->se_on) { sctp_stcb_feature_on(inp, stcb, event_type); } else { sctp_stcb_feature_off(inp, stcb, event_type); } SCTP_TCB_UNLOCK(stcb); } SCTP_INP_RUNLOCK(inp); } } } else { if (stcb) { SCTP_TCB_UNLOCK(stcb); } } break; } case SCTP_RECVRCVINFO: { int *onoff; SCTP_CHECK_AND_CAST(onoff, optval, int, optsize); SCTP_INP_WLOCK(inp); if (*onoff != 0) { sctp_feature_on(inp, SCTP_PCB_FLAGS_RECVRCVINFO); } else { sctp_feature_off(inp, SCTP_PCB_FLAGS_RECVRCVINFO); } SCTP_INP_WUNLOCK(inp); break; } case SCTP_RECVNXTINFO: { int *onoff; SCTP_CHECK_AND_CAST(onoff, optval, int, optsize); SCTP_INP_WLOCK(inp); if (*onoff != 0) { sctp_feature_on(inp, SCTP_PCB_FLAGS_RECVNXTINFO); } else { sctp_feature_off(inp, SCTP_PCB_FLAGS_RECVNXTINFO); } SCTP_INP_WUNLOCK(inp); break; } case SCTP_DEFAULT_SNDINFO: { struct sctp_sndinfo *info; uint16_t policy; SCTP_CHECK_AND_CAST(info, optval, struct sctp_sndinfo, optsize); SCTP_FIND_STCB(inp, stcb, info->snd_assoc_id); if (stcb) { if (info->snd_sid < stcb->asoc.streamoutcnt) { stcb->asoc.def_send.sinfo_stream = info->snd_sid; policy = PR_SCTP_POLICY(stcb->asoc.def_send.sinfo_flags); stcb->asoc.def_send.sinfo_flags = info->snd_flags; stcb->asoc.def_send.sinfo_flags |= policy; stcb->asoc.def_send.sinfo_ppid = info->snd_ppid; stcb->asoc.def_send.sinfo_context = info->snd_context; } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && ((info->snd_assoc_id == SCTP_FUTURE_ASSOC) || (info->snd_assoc_id == SCTP_ALL_ASSOC)))) { SCTP_INP_WLOCK(inp); inp->def_send.sinfo_stream = info->snd_sid; policy = PR_SCTP_POLICY(inp->def_send.sinfo_flags); inp->def_send.sinfo_flags = info->snd_flags; inp->def_send.sinfo_flags |= policy; inp->def_send.sinfo_ppid = info->snd_ppid; inp->def_send.sinfo_context = info->snd_context; SCTP_INP_WUNLOCK(inp); } if ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && ((info->snd_assoc_id == SCTP_CURRENT_ASSOC) || (info->snd_assoc_id == SCTP_ALL_ASSOC))) { SCTP_INP_RLOCK(inp); LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { SCTP_TCB_LOCK(stcb); if (info->snd_sid < stcb->asoc.streamoutcnt) { stcb->asoc.def_send.sinfo_stream = info->snd_sid; policy = PR_SCTP_POLICY(stcb->asoc.def_send.sinfo_flags); stcb->asoc.def_send.sinfo_flags = info->snd_flags; stcb->asoc.def_send.sinfo_flags |= policy; stcb->asoc.def_send.sinfo_ppid = info->snd_ppid; stcb->asoc.def_send.sinfo_context = info->snd_context; } SCTP_TCB_UNLOCK(stcb); } SCTP_INP_RUNLOCK(inp); } } break; } case SCTP_DEFAULT_PRINFO: { struct sctp_default_prinfo *info; SCTP_CHECK_AND_CAST(info, optval, struct sctp_default_prinfo, optsize); SCTP_FIND_STCB(inp, stcb, info->pr_assoc_id); if (info->pr_policy > SCTP_PR_SCTP_MAX) { if (stcb) { SCTP_TCB_UNLOCK(stcb); } SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; break; } if (stcb) { stcb->asoc.def_send.sinfo_flags &= 0xfff0; stcb->asoc.def_send.sinfo_flags |= info->pr_policy; stcb->asoc.def_send.sinfo_timetolive = info->pr_value; SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && ((info->pr_assoc_id == SCTP_FUTURE_ASSOC) || (info->pr_assoc_id == SCTP_ALL_ASSOC)))) { SCTP_INP_WLOCK(inp); inp->def_send.sinfo_flags &= 0xfff0; inp->def_send.sinfo_flags |= info->pr_policy; inp->def_send.sinfo_timetolive = info->pr_value; SCTP_INP_WUNLOCK(inp); } if ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && ((info->pr_assoc_id == SCTP_CURRENT_ASSOC) || (info->pr_assoc_id == SCTP_ALL_ASSOC))) { SCTP_INP_RLOCK(inp); LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { SCTP_TCB_LOCK(stcb); stcb->asoc.def_send.sinfo_flags &= 0xfff0; stcb->asoc.def_send.sinfo_flags |= info->pr_policy; stcb->asoc.def_send.sinfo_timetolive = info->pr_value; SCTP_TCB_UNLOCK(stcb); } SCTP_INP_RUNLOCK(inp); } } break; } case SCTP_PEER_ADDR_THLDS: /* Applies to the specific association */ { struct sctp_paddrthlds *thlds; struct sctp_nets *net; struct sockaddr *addr; #if defined(INET) && defined(INET6) struct sockaddr_in sin_store; #endif SCTP_CHECK_AND_CAST(thlds, optval, struct sctp_paddrthlds, optsize); SCTP_FIND_STCB(inp, stcb, thlds->spt_assoc_id); #if defined(INET) && defined(INET6) if (thlds->spt_address.ss_family == AF_INET6) { struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)&thlds->spt_address; if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { in6_sin6_2_sin(&sin_store, sin6); addr = (struct sockaddr *)&sin_store; } else { addr = (struct sockaddr *)&thlds->spt_address; } } else { addr = (struct sockaddr *)&thlds->spt_address; } #else addr = (struct sockaddr *)&thlds->spt_address; #endif if (stcb != NULL) { net = sctp_findnet(stcb, addr); } else { /* * We increment here since * sctp_findassociation_ep_addr() wil do a * decrement if it finds the stcb as long as * the locked tcb (last argument) is NOT a * TCB.. aka NULL. */ net = NULL; SCTP_INP_INCR_REF(inp); stcb = sctp_findassociation_ep_addr(&inp, addr, &net, NULL, NULL); if (stcb == NULL) { SCTP_INP_DECR_REF(inp); } } if ((stcb != NULL) && (net == NULL)) { #ifdef INET if (addr->sa_family == AF_INET) { struct sockaddr_in *sin; sin = (struct sockaddr_in *)addr; if (sin->sin_addr.s_addr != INADDR_ANY) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); SCTP_TCB_UNLOCK(stcb); error = EINVAL; break; } } else #endif #ifdef INET6 if (addr->sa_family == AF_INET6) { struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)addr; if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); SCTP_TCB_UNLOCK(stcb); error = EINVAL; break; } } else #endif { error = EAFNOSUPPORT; SCTP_TCB_UNLOCK(stcb); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); break; } } if (thlds->spt_pathcpthld != 0xffff) { if (stcb != NULL) { SCTP_TCB_UNLOCK(stcb); } error = EINVAL; SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); break; } if (stcb != NULL) { if (net != NULL) { net->failure_threshold = thlds->spt_pathmaxrxt; net->pf_threshold = thlds->spt_pathpfthld; if (net->dest_state & SCTP_ADDR_PF) { if ((net->error_count > net->failure_threshold) || (net->error_count <= net->pf_threshold)) { net->dest_state &= ~SCTP_ADDR_PF; } } else { if ((net->error_count > net->pf_threshold) && (net->error_count <= net->failure_threshold)) { net->dest_state |= SCTP_ADDR_PF; sctp_send_hb(stcb, net, SCTP_SO_LOCKED); sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_17); sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net); } } if (net->dest_state & SCTP_ADDR_REACHABLE) { if (net->error_count > net->failure_threshold) { net->dest_state &= ~SCTP_ADDR_REACHABLE; sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN, stcb, 0, net, SCTP_SO_LOCKED); } } else { if (net->error_count <= net->failure_threshold) { net->dest_state |= SCTP_ADDR_REACHABLE; sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, 0, net, SCTP_SO_LOCKED); } } } else { TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { net->failure_threshold = thlds->spt_pathmaxrxt; net->pf_threshold = thlds->spt_pathpfthld; if (net->dest_state & SCTP_ADDR_PF) { if ((net->error_count > net->failure_threshold) || (net->error_count <= net->pf_threshold)) { net->dest_state &= ~SCTP_ADDR_PF; } } else { if ((net->error_count > net->pf_threshold) && (net->error_count <= net->failure_threshold)) { net->dest_state |= SCTP_ADDR_PF; sctp_send_hb(stcb, net, SCTP_SO_LOCKED); sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_18); sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net); } } if (net->dest_state & SCTP_ADDR_REACHABLE) { if (net->error_count > net->failure_threshold) { net->dest_state &= ~SCTP_ADDR_REACHABLE; sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN, stcb, 0, net, SCTP_SO_LOCKED); } } else { if (net->error_count <= net->failure_threshold) { net->dest_state |= SCTP_ADDR_REACHABLE; sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, 0, net, SCTP_SO_LOCKED); } } } stcb->asoc.def_net_failure = thlds->spt_pathmaxrxt; stcb->asoc.def_net_pf_threshold = thlds->spt_pathpfthld; } SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (thlds->spt_assoc_id == SCTP_FUTURE_ASSOC))) { SCTP_INP_WLOCK(inp); inp->sctp_ep.def_net_failure = thlds->spt_pathmaxrxt; inp->sctp_ep.def_net_pf_threshold = thlds->spt_pathpfthld; SCTP_INP_WUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } break; } case SCTP_REMOTE_UDP_ENCAPS_PORT: { struct sctp_udpencaps *encaps; struct sctp_nets *net; struct sockaddr *addr; #if defined(INET) && defined(INET6) struct sockaddr_in sin_store; #endif SCTP_CHECK_AND_CAST(encaps, optval, struct sctp_udpencaps, optsize); SCTP_FIND_STCB(inp, stcb, encaps->sue_assoc_id); #if defined(INET) && defined(INET6) if (encaps->sue_address.ss_family == AF_INET6) { struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)&encaps->sue_address; if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { in6_sin6_2_sin(&sin_store, sin6); addr = (struct sockaddr *)&sin_store; } else { addr = (struct sockaddr *)&encaps->sue_address; } } else { addr = (struct sockaddr *)&encaps->sue_address; } #else addr = (struct sockaddr *)&encaps->sue_address; #endif if (stcb != NULL) { net = sctp_findnet(stcb, addr); } else { /* * We increment here since * sctp_findassociation_ep_addr() wil do a * decrement if it finds the stcb as long as * the locked tcb (last argument) is NOT a * TCB.. aka NULL. */ net = NULL; SCTP_INP_INCR_REF(inp); stcb = sctp_findassociation_ep_addr(&inp, addr, &net, NULL, NULL); if (stcb == NULL) { SCTP_INP_DECR_REF(inp); } } if ((stcb != NULL) && (net == NULL)) { #ifdef INET if (addr->sa_family == AF_INET) { struct sockaddr_in *sin; sin = (struct sockaddr_in *)addr; if (sin->sin_addr.s_addr != INADDR_ANY) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); SCTP_TCB_UNLOCK(stcb); error = EINVAL; break; } } else #endif #ifdef INET6 if (addr->sa_family == AF_INET6) { struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)addr; if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); SCTP_TCB_UNLOCK(stcb); error = EINVAL; break; } } else #endif { error = EAFNOSUPPORT; SCTP_TCB_UNLOCK(stcb); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); break; } } if (stcb != NULL) { if (net != NULL) { net->port = encaps->sue_port; } else { stcb->asoc.port = encaps->sue_port; } SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (encaps->sue_assoc_id == SCTP_FUTURE_ASSOC))) { SCTP_INP_WLOCK(inp); inp->sctp_ep.port = encaps->sue_port; SCTP_INP_WUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } break; } case SCTP_ECN_SUPPORTED: { struct sctp_assoc_value *av; SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize); SCTP_FIND_STCB(inp, stcb, av->assoc_id); if (stcb) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (av->assoc_id == SCTP_FUTURE_ASSOC))) { SCTP_INP_WLOCK(inp); if (av->assoc_value == 0) { inp->ecn_supported = 0; } else { inp->ecn_supported = 1; } SCTP_INP_WUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } break; } case SCTP_PR_SUPPORTED: { struct sctp_assoc_value *av; SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize); SCTP_FIND_STCB(inp, stcb, av->assoc_id); if (stcb) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (av->assoc_id == SCTP_FUTURE_ASSOC))) { SCTP_INP_WLOCK(inp); if (av->assoc_value == 0) { inp->prsctp_supported = 0; } else { inp->prsctp_supported = 1; } SCTP_INP_WUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } break; } case SCTP_AUTH_SUPPORTED: { struct sctp_assoc_value *av; SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize); SCTP_FIND_STCB(inp, stcb, av->assoc_id); if (stcb) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (av->assoc_id == SCTP_FUTURE_ASSOC))) { if ((av->assoc_value == 0) && (inp->asconf_supported == 1)) { /* * AUTH is required for * ASCONF */ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } else { SCTP_INP_WLOCK(inp); if (av->assoc_value == 0) { inp->auth_supported = 0; } else { inp->auth_supported = 1; } SCTP_INP_WUNLOCK(inp); } } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } break; } case SCTP_ASCONF_SUPPORTED: { struct sctp_assoc_value *av; SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize); SCTP_FIND_STCB(inp, stcb, av->assoc_id); if (stcb) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (av->assoc_id == SCTP_FUTURE_ASSOC))) { if ((av->assoc_value != 0) && (inp->auth_supported == 0)) { /* * AUTH is required for * ASCONF */ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } else { SCTP_INP_WLOCK(inp); if (av->assoc_value == 0) { inp->asconf_supported = 0; sctp_auth_delete_chunk(SCTP_ASCONF, inp->sctp_ep.local_auth_chunks); sctp_auth_delete_chunk(SCTP_ASCONF_ACK, inp->sctp_ep.local_auth_chunks); } else { inp->asconf_supported = 1; sctp_auth_add_chunk(SCTP_ASCONF, inp->sctp_ep.local_auth_chunks); sctp_auth_add_chunk(SCTP_ASCONF_ACK, inp->sctp_ep.local_auth_chunks); } SCTP_INP_WUNLOCK(inp); } } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } break; } case SCTP_RECONFIG_SUPPORTED: { struct sctp_assoc_value *av; SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize); SCTP_FIND_STCB(inp, stcb, av->assoc_id); if (stcb) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (av->assoc_id == SCTP_FUTURE_ASSOC))) { SCTP_INP_WLOCK(inp); if (av->assoc_value == 0) { inp->reconfig_supported = 0; } else { inp->reconfig_supported = 1; } SCTP_INP_WUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } break; } case SCTP_NRSACK_SUPPORTED: { struct sctp_assoc_value *av; SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize); SCTP_FIND_STCB(inp, stcb, av->assoc_id); if (stcb) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (av->assoc_id == SCTP_FUTURE_ASSOC))) { SCTP_INP_WLOCK(inp); if (av->assoc_value == 0) { inp->nrsack_supported = 0; } else { inp->nrsack_supported = 1; } SCTP_INP_WUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } break; } case SCTP_PKTDROP_SUPPORTED: { struct sctp_assoc_value *av; SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize); SCTP_FIND_STCB(inp, stcb, av->assoc_id); if (stcb) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (av->assoc_id == SCTP_FUTURE_ASSOC))) { SCTP_INP_WLOCK(inp); if (av->assoc_value == 0) { inp->pktdrop_supported = 0; } else { inp->pktdrop_supported = 1; } SCTP_INP_WUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } break; } case SCTP_MAX_CWND: { struct sctp_assoc_value *av; struct sctp_nets *net; SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize); SCTP_FIND_STCB(inp, stcb, av->assoc_id); if (stcb) { stcb->asoc.max_cwnd = av->assoc_value; if (stcb->asoc.max_cwnd > 0) { TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { if ((net->cwnd > stcb->asoc.max_cwnd) && (net->cwnd > (net->mtu - sizeof(struct sctphdr)))) { net->cwnd = stcb->asoc.max_cwnd; if (net->cwnd < (net->mtu - sizeof(struct sctphdr))) { net->cwnd = net->mtu - sizeof(struct sctphdr); } } } } SCTP_TCB_UNLOCK(stcb); } else { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) && (av->assoc_id == SCTP_FUTURE_ASSOC))) { SCTP_INP_WLOCK(inp); inp->max_cwnd = av->assoc_value; SCTP_INP_WUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } } break; } case SCTP_ACCEPT_ZERO_CHECKSUM: { uint32_t *value; SCTP_CHECK_AND_CAST(value, optval, uint32_t, optsize); if ((*value == SCTP_EDMID_NONE) || (*value == SCTP_EDMID_LOWER_LAYER_DTLS)) { SCTP_INP_WLOCK(inp); inp->rcv_edmid = *value; SCTP_INP_WUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } break; } default: SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT); error = ENOPROTOOPT; break; } /* end switch (opt) */ return (error); } int sctp_ctloutput(struct socket *so, struct sockopt *sopt) { struct epoch_tracker et; struct sctp_inpcb *inp; void *optval = NULL; void *p; size_t optsize = 0; int error = 0; if ((sopt->sopt_level == SOL_SOCKET) && (sopt->sopt_name == SO_SETFIB)) { inp = (struct sctp_inpcb *)so->so_pcb; if (inp == NULL) { SCTP_LTRACE_ERR_RET(so->so_pcb, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOBUFS); return (EINVAL); } SCTP_INP_WLOCK(inp); inp->fibnum = so->so_fibnum; SCTP_INP_WUNLOCK(inp); return (0); } if (sopt->sopt_level != IPPROTO_SCTP) { /* wrong proto level... send back up to IP */ #ifdef INET6 if (INP_CHECK_SOCKAF(so, AF_INET6)) error = ip6_ctloutput(so, sopt); #endif /* INET6 */ #if defined(INET) && defined(INET6) else #endif #ifdef INET error = ip_ctloutput(so, sopt); #endif return (error); } optsize = sopt->sopt_valsize; if (optsize > SCTP_SOCKET_OPTION_LIMIT) { SCTP_LTRACE_ERR_RET(so->so_pcb, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOBUFS); return (ENOBUFS); } if (optsize) { SCTP_MALLOC(optval, void *, optsize, SCTP_M_SOCKOPT); if (optval == NULL) { SCTP_LTRACE_ERR_RET(so->so_pcb, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOBUFS); return (ENOBUFS); } error = sooptcopyin(sopt, optval, optsize, optsize); if (error) { SCTP_FREE(optval, SCTP_M_SOCKOPT); goto out; } } p = (void *)sopt->sopt_td; if (sopt->sopt_dir == SOPT_SET) { NET_EPOCH_ENTER(et); error = sctp_setopt(so, sopt->sopt_name, optval, optsize, p); NET_EPOCH_EXIT(et); } else if (sopt->sopt_dir == SOPT_GET) { error = sctp_getopt(so, sopt->sopt_name, optval, &optsize, p); } else { SCTP_LTRACE_ERR_RET(so->so_pcb, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } if ((error == 0) && (optval != NULL)) { error = sooptcopyout(sopt, optval, optsize); SCTP_FREE(optval, SCTP_M_SOCKOPT); } else if (optval != NULL) { SCTP_FREE(optval, SCTP_M_SOCKOPT); } out: return (error); } #ifdef INET static int sctp_connect(struct socket *so, struct sockaddr *addr, struct thread *p) { struct epoch_tracker et; int error = 0; int create_lock_on = 0; uint32_t vrf_id; struct sctp_inpcb *inp; struct sctp_tcb *stcb = NULL; inp = (struct sctp_inpcb *)so->so_pcb; if (inp == NULL) { /* I made the same as TCP since we are not setup? */ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); return (ECONNRESET); } if (addr == NULL) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); return EINVAL; } switch (addr->sa_family) { #ifdef INET6 case AF_INET6: { struct sockaddr_in6 *sin6; if (addr->sa_len != sizeof(struct sockaddr_in6)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); return (EINVAL); } sin6 = (struct sockaddr_in6 *)addr; if (p != NULL && (error = prison_remote_ip6(p->td_ucred, &sin6->sin6_addr)) != 0) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); return (error); } break; } #endif #ifdef INET case AF_INET: { struct sockaddr_in *sin; if (addr->sa_len != sizeof(struct sockaddr_in)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); return (EINVAL); } sin = (struct sockaddr_in *)addr; if (p != NULL && (error = prison_remote_ip4(p->td_ucred, &sin->sin_addr)) != 0) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); return (error); } break; } #endif default: SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EAFNOSUPPORT); return (EAFNOSUPPORT); } SCTP_INP_INCR_REF(inp); SCTP_ASOC_CREATE_LOCK(inp); create_lock_on = 1; NET_EPOCH_ENTER(et); if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) || (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)) { /* Should I really unlock ? */ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EFAULT); error = EFAULT; goto out_now; } #ifdef INET6 if (((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) && (addr->sa_family == AF_INET6)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; goto out_now; } #endif if (inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) { /* Bind a ephemeral port */ error = sctp_inpcb_bind(so, NULL, NULL, p); if (error) { goto out_now; } } /* Now do we connect? */ if ((inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) && (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_PORTREUSE))) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; goto out_now; } if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) && (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED)) { /* We are already connected AND the TCP model */ SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, EADDRINUSE); error = EADDRINUSE; goto out_now; } if (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) { SCTP_INP_RLOCK(inp); stcb = LIST_FIRST(&inp->sctp_asoc_list); SCTP_INP_RUNLOCK(inp); } else { /* * We increment here since sctp_findassociation_ep_addr() * will do a decrement if it finds the stcb as long as the * locked tcb (last argument) is NOT a TCB.. aka NULL. */ SCTP_INP_INCR_REF(inp); stcb = sctp_findassociation_ep_addr(&inp, addr, NULL, NULL, NULL); if (stcb == NULL) { SCTP_INP_DECR_REF(inp); } else { SCTP_TCB_UNLOCK(stcb); } } if (stcb != NULL) { /* Already have or am bring up an association */ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EALREADY); error = EALREADY; goto out_now; } vrf_id = inp->def_vrf_id; /* We are GOOD to go */ stcb = sctp_aloc_assoc_connected(inp, addr, &error, 0, 0, vrf_id, inp->sctp_ep.pre_open_stream_count, inp->sctp_ep.port, p, SCTP_INITIALIZE_AUTH_PARAMS); if (stcb == NULL) { /* Gak! no memory */ goto out_now; } SCTP_SET_STATE(stcb, SCTP_STATE_COOKIE_WAIT); (void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered); sctp_send_initiate(inp, stcb, SCTP_SO_LOCKED); SCTP_TCB_UNLOCK(stcb); out_now: NET_EPOCH_EXIT(et); if (create_lock_on) { SCTP_ASOC_CREATE_UNLOCK(inp); } SCTP_INP_DECR_REF(inp); return (error); } #endif int sctp_listen(struct socket *so, int backlog, struct thread *p) { /* * Note this module depends on the protocol processing being called * AFTER any socket level flags and backlog are applied to the * socket. The traditional way that the socket flags are applied is * AFTER protocol processing. We have made a change to the * sys/kern/uipc_socket.c module to reverse this but this MUST be in * place if the socket API for SCTP is to work properly. */ int error = 0; struct sctp_inpcb *inp; inp = (struct sctp_inpcb *)so->so_pcb; if (inp == NULL) { /* I made the same as TCP since we are not setup? */ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); return (ECONNRESET); } if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PORTREUSE)) { /* See if we have a listener */ struct sctp_inpcb *tinp; union sctp_sockstore store; if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0) { /* not bound all */ struct sctp_laddr *laddr; LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { memcpy(&store, &laddr->ifa->address, sizeof(store)); switch (store.sa.sa_family) { #ifdef INET case AF_INET: store.sin.sin_port = inp->sctp_lport; break; #endif #ifdef INET6 case AF_INET6: store.sin6.sin6_port = inp->sctp_lport; break; #endif default: break; } tinp = sctp_pcb_findep(&store.sa, 0, 0, inp->def_vrf_id); if (tinp && (tinp != inp) && ((tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) == 0) && ((tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) && (SCTP_IS_LISTENING(tinp))) { /* * we have a listener already and * its not this inp. */ SCTP_INP_DECR_REF(tinp); return (EADDRINUSE); } else if (tinp) { SCTP_INP_DECR_REF(tinp); } } } else { /* Setup a local addr bound all */ memset(&store, 0, sizeof(store)); #ifdef INET6 if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) { store.sa.sa_family = AF_INET6; store.sa.sa_len = sizeof(struct sockaddr_in6); } #endif #ifdef INET if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) { store.sa.sa_family = AF_INET; store.sa.sa_len = sizeof(struct sockaddr_in); } #endif switch (store.sa.sa_family) { #ifdef INET case AF_INET: store.sin.sin_port = inp->sctp_lport; break; #endif #ifdef INET6 case AF_INET6: store.sin6.sin6_port = inp->sctp_lport; break; #endif default: break; } tinp = sctp_pcb_findep(&store.sa, 0, 0, inp->def_vrf_id); if (tinp && (tinp != inp) && ((tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) == 0) && ((tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) && (SCTP_IS_LISTENING(tinp))) { /* * we have a listener already and its not * this inp. */ SCTP_INP_DECR_REF(tinp); return (EADDRINUSE); } else if (tinp) { SCTP_INP_DECR_REF(tinp); } } } SCTP_INP_INFO_WLOCK(); SCTP_INP_WLOCK(inp); #ifdef SCTP_LOCK_LOGGING if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOCK_LOGGING_ENABLE) { sctp_log_lock(inp, (struct sctp_tcb *)NULL, SCTP_LOG_LOCK_SOCK); } #endif if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PORTREUSE)) && (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) { /* * The unlucky case - We are in the tcp pool with this guy. * - Someone else is in the main inp slot. - We must move * this guy (the listener) to the main slot - We must then * move the guy that was listener to the TCP Pool. */ if (sctp_swap_inpcb_for_listen(inp)) { error = EADDRINUSE; SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); goto out; } } SOCK_LOCK(so); error = solisten_proto_check(so); if (error) { SOCK_UNLOCK(so); goto out; } if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) && (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED)) { SOCK_UNLOCK(so); solisten_proto_abort(so); error = EADDRINUSE; SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); goto out; } if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) && ((inp->sctp_flags & SCTP_PCB_FLAGS_WAS_CONNECTED) || (inp->sctp_flags & SCTP_PCB_FLAGS_WAS_ABORTED))) { SOCK_UNLOCK(so); solisten_proto_abort(so); error = EINVAL; SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); goto out; } if (inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) { if ((error = sctp_inpcb_bind_locked(inp, NULL, NULL, p))) { SOCK_UNLOCK(so); solisten_proto_abort(so); /* bind error, probably perm */ goto out; } } if ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) == 0) { solisten_proto(so, backlog); SOCK_UNLOCK(so); inp->sctp_flags |= SCTP_PCB_FLAGS_ACCEPTING; } else { solisten_proto_abort(so); SOCK_UNLOCK(so); if (backlog > 0) { inp->sctp_flags |= SCTP_PCB_FLAGS_ACCEPTING; } else { inp->sctp_flags &= ~SCTP_PCB_FLAGS_ACCEPTING; } } out: SCTP_INP_WUNLOCK(inp); SCTP_INP_INFO_WUNLOCK(); return (error); } static int sctp_defered_wakeup_cnt = 0; int -sctp_accept(struct socket *so, struct sockaddr **addr) +sctp_accept(struct socket *so, struct sockaddr *sa) { struct sctp_tcb *stcb; struct sctp_inpcb *inp; union sctp_sockstore store; #ifdef INET6 int error; #endif inp = (struct sctp_inpcb *)so->so_pcb; if (inp == NULL) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); return (ECONNRESET); } SCTP_INP_WLOCK(inp); if (inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) { SCTP_INP_WUNLOCK(inp); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP); return (EOPNOTSUPP); } if (so->so_state & SS_ISDISCONNECTED) { SCTP_INP_WUNLOCK(inp); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ECONNABORTED); return (ECONNABORTED); } stcb = LIST_FIRST(&inp->sctp_asoc_list); if (stcb == NULL) { SCTP_INP_WUNLOCK(inp); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); return (ECONNRESET); } SCTP_TCB_LOCK(stcb); store = stcb->asoc.primary_destination->ro._l_addr; SCTP_CLEAR_SUBSTATE(stcb, SCTP_STATE_IN_ACCEPT_QUEUE); /* Wake any delayed sleep action */ if (inp->sctp_flags & SCTP_PCB_FLAGS_DONT_WAKE) { inp->sctp_flags &= ~SCTP_PCB_FLAGS_DONT_WAKE; if (inp->sctp_flags & SCTP_PCB_FLAGS_WAKEOUTPUT) { inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAKEOUTPUT; SOCKBUF_LOCK(&inp->sctp_socket->so_snd); if (sowriteable(inp->sctp_socket)) { sowwakeup_locked(inp->sctp_socket); } else { SOCKBUF_UNLOCK(&inp->sctp_socket->so_snd); } } if (inp->sctp_flags & SCTP_PCB_FLAGS_WAKEINPUT) { inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAKEINPUT; SOCKBUF_LOCK(&inp->sctp_socket->so_rcv); if (soreadable(inp->sctp_socket)) { sctp_defered_wakeup_cnt++; sorwakeup_locked(inp->sctp_socket); } else { SOCKBUF_UNLOCK(&inp->sctp_socket->so_rcv); } } } SCTP_INP_WUNLOCK(inp); if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) { sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_19); } else { SCTP_TCB_UNLOCK(stcb); } switch (store.sa.sa_family) { #ifdef INET case AF_INET: - { - struct sockaddr_in *sin; - - SCTP_MALLOC_SONAME(sin, struct sockaddr_in *, sizeof *sin); - if (sin == NULL) - return (ENOMEM); - sin->sin_family = AF_INET; - sin->sin_len = sizeof(*sin); - sin->sin_port = store.sin.sin_port; - sin->sin_addr = store.sin.sin_addr; - *addr = (struct sockaddr *)sin; - break; - } + *(struct sockaddr_in *)sa = (struct sockaddr_in ){ + .sin_family = AF_INET, + .sin_len = sizeof(struct sockaddr_in), + .sin_port = store.sin.sin_port, + .sin_addr = store.sin.sin_addr, + }; + break; #endif #ifdef INET6 case AF_INET6: - { - struct sockaddr_in6 *sin6; - - SCTP_MALLOC_SONAME(sin6, struct sockaddr_in6 *, sizeof *sin6); - if (sin6 == NULL) - return (ENOMEM); - sin6->sin6_family = AF_INET6; - sin6->sin6_len = sizeof(*sin6); - sin6->sin6_port = store.sin6.sin6_port; - sin6->sin6_addr = store.sin6.sin6_addr; - if ((error = sa6_recoverscope(sin6)) != 0) { - SCTP_FREE_SONAME(sin6); - return (error); - } - *addr = (struct sockaddr *)sin6; - break; - } + *(struct sockaddr_in6 *)sa = (struct sockaddr_in6 ){ + .sin6_family = AF_INET6, + .sin6_len = sizeof(struct sockaddr_in6), + .sin6_port = store.sin6.sin6_port, + .sin6_addr = store.sin6.sin6_addr, + }; + if ((error = sa6_recoverscope((struct sockaddr_in6 *)sa)) != 0) + return (error); + break; #endif default: /* TSNH */ break; } return (0); } #ifdef INET int sctp_ingetaddr(struct socket *so, struct sockaddr **addr) { struct sockaddr_in *sin; uint32_t vrf_id; struct sctp_inpcb *inp; struct sctp_ifa *sctp_ifa; /* * Do the malloc first in case it blocks. */ SCTP_MALLOC_SONAME(sin, struct sockaddr_in *, sizeof *sin); if (sin == NULL) return (ENOMEM); sin->sin_family = AF_INET; sin->sin_len = sizeof(*sin); inp = (struct sctp_inpcb *)so->so_pcb; if (!inp) { SCTP_FREE_SONAME(sin); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); return (ECONNRESET); } SCTP_INP_RLOCK(inp); sin->sin_port = inp->sctp_lport; if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) { if (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) { struct sctp_tcb *stcb; struct sockaddr_in *sin_a; struct sctp_nets *net; int fnd; stcb = LIST_FIRST(&inp->sctp_asoc_list); if (stcb == NULL) { goto notConn; } fnd = 0; sin_a = NULL; SCTP_TCB_LOCK(stcb); TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { sin_a = (struct sockaddr_in *)&net->ro._l_addr; if (sin_a == NULL) /* this will make coverity happy */ continue; if (sin_a->sin_family == AF_INET) { fnd = 1; break; } } if ((!fnd) || (sin_a == NULL)) { /* punt */ SCTP_TCB_UNLOCK(stcb); goto notConn; } vrf_id = inp->def_vrf_id; sctp_ifa = sctp_source_address_selection(inp, stcb, (sctp_route_t *)&net->ro, net, 0, vrf_id); if (sctp_ifa) { sin->sin_addr = sctp_ifa->address.sin.sin_addr; sctp_free_ifa(sctp_ifa); } SCTP_TCB_UNLOCK(stcb); } else { /* For the bound all case you get back 0 */ notConn: sin->sin_addr.s_addr = 0; } } else { /* Take the first IPv4 address in the list */ struct sctp_laddr *laddr; int fnd = 0; LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { if (laddr->ifa->address.sa.sa_family == AF_INET) { struct sockaddr_in *sin_a; sin_a = &laddr->ifa->address.sin; sin->sin_addr = sin_a->sin_addr; fnd = 1; break; } } if (!fnd) { SCTP_FREE_SONAME(sin); SCTP_INP_RUNLOCK(inp); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT); return (ENOENT); } } SCTP_INP_RUNLOCK(inp); (*addr) = (struct sockaddr *)sin; return (0); } int sctp_peeraddr(struct socket *so, struct sockaddr **addr) { struct sockaddr_in *sin; int fnd; struct sockaddr_in *sin_a; struct sctp_inpcb *inp; struct sctp_tcb *stcb; struct sctp_nets *net; /* Do the malloc first in case it blocks. */ SCTP_MALLOC_SONAME(sin, struct sockaddr_in *, sizeof *sin); if (sin == NULL) return (ENOMEM); sin->sin_family = AF_INET; sin->sin_len = sizeof(*sin); inp = (struct sctp_inpcb *)so->so_pcb; if ((inp == NULL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) == 0)) { /* UDP type and listeners will drop out here */ SCTP_FREE_SONAME(sin); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTCONN); return (ENOTCONN); } SCTP_INP_RLOCK(inp); stcb = LIST_FIRST(&inp->sctp_asoc_list); if (stcb) { SCTP_TCB_LOCK(stcb); } SCTP_INP_RUNLOCK(inp); if (stcb == NULL) { SCTP_FREE_SONAME(sin); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); return (ECONNRESET); } fnd = 0; TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { sin_a = (struct sockaddr_in *)&net->ro._l_addr; if (sin_a->sin_family == AF_INET) { fnd = 1; sin->sin_port = stcb->rport; sin->sin_addr = sin_a->sin_addr; break; } } SCTP_TCB_UNLOCK(stcb); if (!fnd) { /* No IPv4 address */ SCTP_FREE_SONAME(sin); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT); return (ENOENT); } (*addr) = (struct sockaddr *)sin; return (0); } #define SCTP_PROTOSW \ .pr_protocol = IPPROTO_SCTP, \ .pr_ctloutput = sctp_ctloutput, \ .pr_abort = sctp_abort, \ .pr_accept = sctp_accept, \ .pr_attach = sctp_attach, \ .pr_bind = sctp_bind, \ .pr_connect = sctp_connect, \ .pr_control = in_control, \ .pr_close = sctp_close, \ .pr_detach = sctp_close, \ .pr_sopoll = sopoll_generic, \ .pr_flush = sctp_flush, \ .pr_disconnect = sctp_disconnect, \ .pr_listen = sctp_listen, \ .pr_peeraddr = sctp_peeraddr, \ .pr_send = sctp_sendm, \ .pr_shutdown = sctp_shutdown, \ .pr_sockaddr = sctp_ingetaddr, \ .pr_sosend = sctp_sosend, \ .pr_soreceive = sctp_soreceive \ struct protosw sctp_seqpacket_protosw = { .pr_type = SOCK_SEQPACKET, .pr_flags = PR_WANTRCVD, SCTP_PROTOSW }; struct protosw sctp_stream_protosw = { .pr_type = SOCK_STREAM, .pr_flags = PR_CONNREQUIRED | PR_WANTRCVD, SCTP_PROTOSW }; #endif diff --git a/sys/netinet/sctp_var.h b/sys/netinet/sctp_var.h index 3a649a1860e2..ff6672bd0248 100644 --- a/sys/netinet/sctp_var.h +++ b/sys/netinet/sctp_var.h @@ -1,348 +1,348 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved. * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _NETINET_SCTP_VAR_H_ #define _NETINET_SCTP_VAR_H_ #include #if defined(_KERNEL) || defined(__Userspace__) extern struct protosw sctp_seqpacket_protosw, sctp_stream_protosw; #define sctp_feature_on(inp, feature) (inp->sctp_features |= feature) #define sctp_feature_off(inp, feature) (inp->sctp_features &= ~feature) #define sctp_is_feature_on(inp, feature) ((inp->sctp_features & feature) == feature) #define sctp_is_feature_off(inp, feature) ((inp->sctp_features & feature) == 0) #define sctp_stcb_feature_on(inp, stcb, feature) {\ if (stcb) { \ stcb->asoc.sctp_features |= feature; \ } else if (inp) { \ inp->sctp_features |= feature; \ } \ } #define sctp_stcb_feature_off(inp, stcb, feature) {\ if (stcb) { \ stcb->asoc.sctp_features &= ~feature; \ } else if (inp) { \ inp->sctp_features &= ~feature; \ } \ } #define sctp_stcb_is_feature_on(inp, stcb, feature) \ (((stcb != NULL) && \ ((stcb->asoc.sctp_features & feature) == feature)) || \ ((stcb == NULL) && (inp != NULL) && \ ((inp->sctp_features & feature) == feature))) #define sctp_stcb_is_feature_off(inp, stcb, feature) \ (((stcb != NULL) && \ ((stcb->asoc.sctp_features & feature) == 0)) || \ ((stcb == NULL) && (inp != NULL) && \ ((inp->sctp_features & feature) == 0)) || \ ((stcb == NULL) && (inp == NULL))) /* managing mobility_feature in inpcb (by micchie) */ #define sctp_mobility_feature_on(inp, feature) (inp->sctp_mobility_features |= feature) #define sctp_mobility_feature_off(inp, feature) (inp->sctp_mobility_features &= ~feature) #define sctp_is_mobility_feature_on(inp, feature) (inp->sctp_mobility_features & feature) #define sctp_is_mobility_feature_off(inp, feature) ((inp->sctp_mobility_features & feature) == 0) #define sctp_maxspace(sb) (max((sb)->sb_hiwat,SCTP_MINIMAL_RWND)) #define sctp_sbspace(asoc, sb) ((long) ((sctp_maxspace(sb) > (asoc)->sb_cc) ? (sctp_maxspace(sb) - (asoc)->sb_cc) : 0)) #define sctp_sbspace_failedmsgs(sb) ((long) ((sctp_maxspace(sb) > SCTP_SBAVAIL(sb)) ? (sctp_maxspace(sb) - SCTP_SBAVAIL(sb)) : 0)) #define sctp_sbspace_sub(a,b) (((a) > (b)) ? ((a) - (b)) : 0) /* * I tried to cache the readq entries at one point. But the reality * is that it did not add any performance since this meant we had to * lock the STCB on read. And at that point once you have to do an * extra lock, it really does not matter if the lock is in the ZONE * stuff or in our code. Note that this same problem would occur with * an mbuf cache as well so it is not really worth doing, at least * right now :-D */ #ifdef INVARIANTS #define sctp_free_a_readq(_stcb, _readq) { \ if ((_readq)->on_strm_q) \ panic("On strm q stcb:%p readq:%p", (_stcb), (_readq)); \ SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_readq), (_readq)); \ SCTP_DECR_READQ_COUNT(); \ } #else #define sctp_free_a_readq(_stcb, _readq) { \ SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_readq), (_readq)); \ SCTP_DECR_READQ_COUNT(); \ } #endif #define sctp_alloc_a_readq(_stcb, _readq) { \ (_readq) = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_readq), struct sctp_queued_to_read); \ if ((_readq)) { \ SCTP_INCR_READQ_COUNT(); \ } \ } #define sctp_free_a_strmoq(_stcb, _strmoq, _so_locked) { \ if ((_strmoq)->holds_key_ref) { \ sctp_auth_key_release(stcb, sp->auth_keyid, _so_locked); \ (_strmoq)->holds_key_ref = 0; \ } \ SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_strmoq), (_strmoq)); \ SCTP_DECR_STRMOQ_COUNT(); \ } #define sctp_alloc_a_strmoq(_stcb, _strmoq) { \ (_strmoq) = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_strmoq), struct sctp_stream_queue_pending); \ if ((_strmoq)) { \ memset(_strmoq, 0, sizeof(struct sctp_stream_queue_pending)); \ SCTP_INCR_STRMOQ_COUNT(); \ (_strmoq)->holds_key_ref = 0; \ } \ } #define sctp_free_a_chunk(_stcb, _chk, _so_locked) { \ if ((_chk)->holds_key_ref) {\ sctp_auth_key_release((_stcb), (_chk)->auth_keyid, _so_locked); \ (_chk)->holds_key_ref = 0; \ } \ if (_stcb) { \ SCTP_TCB_LOCK_ASSERT((_stcb)); \ if ((_chk)->whoTo) { \ sctp_free_remote_addr((_chk)->whoTo); \ (_chk)->whoTo = NULL; \ } \ if (((_stcb)->asoc.free_chunk_cnt > SCTP_BASE_SYSCTL(sctp_asoc_free_resc_limit)) || \ (SCTP_BASE_INFO(ipi_free_chunks) > SCTP_BASE_SYSCTL(sctp_system_free_resc_limit))) { \ SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), (_chk)); \ SCTP_DECR_CHK_COUNT(); \ } else { \ TAILQ_INSERT_TAIL(&(_stcb)->asoc.free_chunks, (_chk), sctp_next); \ (_stcb)->asoc.free_chunk_cnt++; \ atomic_add_int(&SCTP_BASE_INFO(ipi_free_chunks), 1); \ } \ } else { \ SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), (_chk)); \ SCTP_DECR_CHK_COUNT(); \ } \ } #define sctp_alloc_a_chunk(_stcb, _chk) { \ if (TAILQ_EMPTY(&(_stcb)->asoc.free_chunks)) { \ (_chk) = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_chunk), struct sctp_tmit_chunk); \ if ((_chk)) { \ SCTP_INCR_CHK_COUNT(); \ (_chk)->whoTo = NULL; \ (_chk)->holds_key_ref = 0; \ } \ } else { \ (_chk) = TAILQ_FIRST(&(_stcb)->asoc.free_chunks); \ TAILQ_REMOVE(&(_stcb)->asoc.free_chunks, (_chk), sctp_next); \ atomic_subtract_int(&SCTP_BASE_INFO(ipi_free_chunks), 1); \ (_chk)->holds_key_ref = 0; \ SCTP_STAT_INCR(sctps_cached_chk); \ (_stcb)->asoc.free_chunk_cnt--; \ } \ } #define sctp_free_remote_addr(__net) { \ if ((__net)) { \ if (SCTP_DECREMENT_AND_CHECK_REFCOUNT(&(__net)->ref_count)) { \ RO_NHFREE(&(__net)->ro); \ if ((__net)->src_addr_selected) { \ sctp_free_ifa((__net)->ro._s_addr); \ (__net)->ro._s_addr = NULL; \ } \ (__net)->src_addr_selected = 0; \ (__net)->dest_state &= ~SCTP_ADDR_REACHABLE; \ SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_net), (__net)); \ SCTP_DECR_RADDR_COUNT(); \ } \ } \ } #define sctp_sbfree(ctl, stcb, sb, m) { \ SCTP_SB_DECR(sb, SCTP_BUF_LEN((m))); \ SCTP_SAVE_ATOMIC_DECREMENT(&(sb)->sb_mbcnt, MSIZE); \ if (((ctl)->do_not_ref_stcb == 0) && stcb) {\ SCTP_SAVE_ATOMIC_DECREMENT(&(stcb)->asoc.sb_cc, SCTP_BUF_LEN((m))); \ SCTP_SAVE_ATOMIC_DECREMENT(&(stcb)->asoc.my_rwnd_control_len, MSIZE); \ } \ if (SCTP_BUF_TYPE(m) != MT_DATA && SCTP_BUF_TYPE(m) != MT_HEADER && \ SCTP_BUF_TYPE(m) != MT_OOBDATA) \ atomic_subtract_int(&(sb)->sb_ctl,SCTP_BUF_LEN((m))); \ } #define sctp_sballoc(stcb, sb, m) { \ SCTP_SB_INCR(sb, SCTP_BUF_LEN((m))); \ atomic_add_int(&(sb)->sb_mbcnt, MSIZE); \ if (stcb) { \ atomic_add_int(&(stcb)->asoc.sb_cc, SCTP_BUF_LEN((m))); \ atomic_add_int(&(stcb)->asoc.my_rwnd_control_len, MSIZE); \ } \ if (SCTP_BUF_TYPE(m) != MT_DATA && SCTP_BUF_TYPE(m) != MT_HEADER && \ SCTP_BUF_TYPE(m) != MT_OOBDATA) \ atomic_add_int(&(sb)->sb_ctl,SCTP_BUF_LEN((m))); \ } #define sctp_ucount_incr(val) { \ val++; \ } #define sctp_ucount_decr(val) { \ if (val > 0) { \ val--; \ } else { \ val = 0; \ } \ } #define sctp_mbuf_crush(data) do { \ struct mbuf *_m; \ _m = (data); \ while (_m && (SCTP_BUF_LEN(_m) == 0)) { \ (data) = SCTP_BUF_NEXT(_m); \ SCTP_BUF_NEXT(_m) = NULL; \ sctp_m_free(_m); \ _m = (data); \ } \ } while (0) #define sctp_flight_size_decrease(tp1) do { \ if (tp1->whoTo->flight_size >= tp1->book_size) \ tp1->whoTo->flight_size -= tp1->book_size; \ else \ tp1->whoTo->flight_size = 0; \ } while (0) #define sctp_flight_size_increase(tp1) do { \ (tp1)->whoTo->flight_size += (tp1)->book_size; \ } while (0) #ifdef SCTP_FS_SPEC_LOG #define sctp_total_flight_decrease(stcb, tp1) do { \ if (stcb->asoc.fs_index > SCTP_FS_SPEC_LOG_SIZE) \ stcb->asoc.fs_index = 0;\ stcb->asoc.fslog[stcb->asoc.fs_index].total_flight = stcb->asoc.total_flight; \ stcb->asoc.fslog[stcb->asoc.fs_index].tsn = tp1->rec.data.tsn; \ stcb->asoc.fslog[stcb->asoc.fs_index].book = tp1->book_size; \ stcb->asoc.fslog[stcb->asoc.fs_index].sent = tp1->sent; \ stcb->asoc.fslog[stcb->asoc.fs_index].incr = 0; \ stcb->asoc.fslog[stcb->asoc.fs_index].decr = 1; \ stcb->asoc.fs_index++; \ tp1->window_probe = 0; \ if (stcb->asoc.total_flight >= tp1->book_size) { \ stcb->asoc.total_flight -= tp1->book_size; \ if (stcb->asoc.total_flight_count > 0) \ stcb->asoc.total_flight_count--; \ } else { \ stcb->asoc.total_flight = 0; \ stcb->asoc.total_flight_count = 0; \ } \ } while (0) #define sctp_total_flight_increase(stcb, tp1) do { \ if (stcb->asoc.fs_index > SCTP_FS_SPEC_LOG_SIZE) \ stcb->asoc.fs_index = 0;\ stcb->asoc.fslog[stcb->asoc.fs_index].total_flight = stcb->asoc.total_flight; \ stcb->asoc.fslog[stcb->asoc.fs_index].tsn = tp1->rec.data.tsn; \ stcb->asoc.fslog[stcb->asoc.fs_index].book = tp1->book_size; \ stcb->asoc.fslog[stcb->asoc.fs_index].sent = tp1->sent; \ stcb->asoc.fslog[stcb->asoc.fs_index].incr = 1; \ stcb->asoc.fslog[stcb->asoc.fs_index].decr = 0; \ stcb->asoc.fs_index++; \ (stcb)->asoc.total_flight_count++; \ (stcb)->asoc.total_flight += (tp1)->book_size; \ } while (0) #else #define sctp_total_flight_decrease(stcb, tp1) do { \ tp1->window_probe = 0; \ if (stcb->asoc.total_flight >= tp1->book_size) { \ stcb->asoc.total_flight -= tp1->book_size; \ if (stcb->asoc.total_flight_count > 0) \ stcb->asoc.total_flight_count--; \ } else { \ stcb->asoc.total_flight = 0; \ stcb->asoc.total_flight_count = 0; \ } \ } while (0) #define sctp_total_flight_increase(stcb, tp1) do { \ (stcb)->asoc.total_flight_count++; \ (stcb)->asoc.total_flight += (tp1)->book_size; \ } while (0) #endif #define SCTP_PF_ENABLED(_net) (_net->pf_threshold < _net->failure_threshold) #define SCTP_NET_IS_PF(_net) (_net->pf_threshold < _net->error_count) struct sctp_nets; struct sctp_inpcb; struct sctp_tcb; struct sctphdr; void sctp_close(struct socket *so); void sctp_abort(struct socket *so); int sctp_disconnect(struct socket *so); ipproto_ctlinput_t sctp_ctlinput; int sctp_ctloutput(struct socket *, struct sockopt *); #ifdef INET void sctp_input_with_port(struct mbuf *, int, uint16_t); int sctp_input(struct mbuf **, int *, int); #endif void sctp_pathmtu_adjustment(struct sctp_tcb *, uint32_t, bool); void sctp_notify(struct sctp_inpcb *, struct sctp_tcb *, struct sctp_nets *, uint8_t, uint8_t, uint16_t, uint32_t); int sctp_flush(struct socket *, int); int sctp_shutdown(struct socket *); int sctp_bindx(struct socket *, int, struct sockaddr_storage *, int, int, struct proc *); /* can't use sctp_assoc_t here */ int sctp_peeloff(struct socket *, struct socket *, int, caddr_t, int *); int sctp_ingetaddr(struct socket *, struct sockaddr **); int sctp_peeraddr(struct socket *, struct sockaddr **); int sctp_listen(struct socket *, int, struct thread *); -int sctp_accept(struct socket *, struct sockaddr **); +int sctp_accept(struct socket *, struct sockaddr *); #endif /* _KERNEL */ #endif /* !_NETINET_SCTP_VAR_H_ */ diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c index f89e60cce8cd..14e0b814dec9 100644 --- a/sys/netinet/tcp_usrreq.c +++ b/sys/netinet/tcp_usrreq.c @@ -1,3157 +1,3145 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. * Copyright (c) 2006-2007 Robert N. M. Watson * Copyright (c) 2010-2011 Juniper Networks, Inc. * All rights reserved. * * Portions of this software were developed by Robert N. M. Watson under * contract to Juniper Networks, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include "opt_ddb.h" #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_kern_tls.h" #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #endif /* INET6 */ #include #include #include #include #include #include #ifdef DDB #include #endif #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #include #include #include #endif #include #include #include #include #include #include #include #include #include #include #ifdef TCPPCAP #include #endif #ifdef TCP_OFFLOAD #include #endif #include #include #include #include #include #include #include /* * TCP protocol interface to socket abstraction. */ #ifdef INET static int tcp_connect(struct tcpcb *, struct sockaddr_in *, struct thread *td); #endif /* INET */ #ifdef INET6 static int tcp6_connect(struct tcpcb *, struct sockaddr_in6 *, struct thread *td); #endif /* INET6 */ static void tcp_disconnect(struct tcpcb *); static void tcp_usrclosed(struct tcpcb *); static void tcp_fill_info(const struct tcpcb *, struct tcp_info *); static int tcp_pru_options_support(struct tcpcb *tp, int flags); static void tcp_bblog_pru(struct tcpcb *tp, uint32_t pru, int error) { struct tcp_log_buffer *lgb; KASSERT(tp != NULL, ("tcp_bblog_pru: tp == NULL")); INP_WLOCK_ASSERT(tptoinpcb(tp)); if (tcp_bblogging_on(tp)) { lgb = tcp_log_event(tp, NULL, NULL, NULL, TCP_LOG_PRU, error, 0, NULL, false, NULL, NULL, 0, NULL); } else { lgb = NULL; } if (lgb != NULL) { if (error >= 0) { lgb->tlb_errno = (uint32_t)error; } lgb->tlb_flex1 = pru; } } /* * TCP attaches to socket via pru_attach(), reserving space, * and an internet control block. */ static int tcp_usr_attach(struct socket *so, int proto, struct thread *td) { struct inpcb *inp; struct tcpcb *tp = NULL; int error; inp = sotoinpcb(so); KASSERT(inp == NULL, ("tcp_usr_attach: inp != NULL")); error = soreserve(so, V_tcp_sendspace, V_tcp_recvspace); if (error) goto out; so->so_rcv.sb_flags |= SB_AUTOSIZE; so->so_snd.sb_flags |= SB_AUTOSIZE; error = in_pcballoc(so, &V_tcbinfo); if (error) goto out; inp = sotoinpcb(so); tp = tcp_newtcpcb(inp); if (tp == NULL) { error = ENOBUFS; in_pcbdetach(inp); in_pcbfree(inp); goto out; } tp->t_state = TCPS_CLOSED; tcp_bblog_pru(tp, PRU_ATTACH, error); INP_WUNLOCK(inp); TCPSTATES_INC(TCPS_CLOSED); out: TCP_PROBE2(debug__user, tp, PRU_ATTACH); return (error); } /* * tcp_usr_detach is called when the socket layer loses its final reference * to the socket, be it a file descriptor reference, a reference from TCP, * etc. At this point, there is only one case in which we will keep around * inpcb state: time wait. */ static void tcp_usr_detach(struct socket *so) { struct inpcb *inp; struct tcpcb *tp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("%s: inp == NULL", __func__)); INP_WLOCK(inp); KASSERT(so->so_pcb == inp && inp->inp_socket == so, ("%s: socket %p inp %p mismatch", __func__, so, inp)); tp = intotcpcb(inp); KASSERT(inp->inp_flags & INP_DROPPED || tp->t_state < TCPS_SYN_SENT, ("%s: inp %p not dropped or embryonic", __func__, inp)); tcp_discardcb(tp); in_pcbdetach(inp); in_pcbfree(inp); } #ifdef INET /* * Give the socket an address. */ static int tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { int error = 0; struct inpcb *inp; struct tcpcb *tp; struct sockaddr_in *sinp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp_usr_bind: inp == NULL")); INP_WLOCK(inp); if (inp->inp_flags & INP_DROPPED) { INP_WUNLOCK(inp); return (EINVAL); } tp = intotcpcb(inp); sinp = (struct sockaddr_in *)nam; if (nam->sa_family != AF_INET) { /* * Preserve compatibility with old programs. */ if (nam->sa_family != AF_UNSPEC || nam->sa_len < offsetof(struct sockaddr_in, sin_zero) || sinp->sin_addr.s_addr != INADDR_ANY) { error = EAFNOSUPPORT; goto out; } nam->sa_family = AF_INET; } if (nam->sa_len != sizeof(*sinp)) { error = EINVAL; goto out; } /* * Must check for multicast addresses and disallow binding * to them. */ if (IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) { error = EAFNOSUPPORT; goto out; } INP_HASH_WLOCK(&V_tcbinfo); error = in_pcbbind(inp, sinp, td->td_ucred); INP_HASH_WUNLOCK(&V_tcbinfo); out: tcp_bblog_pru(tp, PRU_BIND, error); TCP_PROBE2(debug__user, tp, PRU_BIND); INP_WUNLOCK(inp); return (error); } #endif /* INET */ #ifdef INET6 static int tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { int error = 0; struct inpcb *inp; struct tcpcb *tp; struct sockaddr_in6 *sin6; u_char vflagsav; inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp6_usr_bind: inp == NULL")); INP_WLOCK(inp); if (inp->inp_flags & INP_DROPPED) { INP_WUNLOCK(inp); return (EINVAL); } tp = intotcpcb(inp); vflagsav = inp->inp_vflag; sin6 = (struct sockaddr_in6 *)nam; if (nam->sa_family != AF_INET6) { error = EAFNOSUPPORT; goto out; } if (nam->sa_len != sizeof(*sin6)) { error = EINVAL; goto out; } /* * Must check for multicast addresses and disallow binding * to them. */ if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { error = EAFNOSUPPORT; goto out; } INP_HASH_WLOCK(&V_tcbinfo); inp->inp_vflag &= ~INP_IPV4; inp->inp_vflag |= INP_IPV6; #ifdef INET if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) inp->inp_vflag |= INP_IPV4; else if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { struct sockaddr_in sin; in6_sin6_2_sin(&sin, sin6); if (IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) { error = EAFNOSUPPORT; INP_HASH_WUNLOCK(&V_tcbinfo); goto out; } inp->inp_vflag |= INP_IPV4; inp->inp_vflag &= ~INP_IPV6; error = in_pcbbind(inp, &sin, td->td_ucred); INP_HASH_WUNLOCK(&V_tcbinfo); goto out; } } #endif error = in6_pcbbind(inp, sin6, td->td_ucred); INP_HASH_WUNLOCK(&V_tcbinfo); out: if (error != 0) inp->inp_vflag = vflagsav; tcp_bblog_pru(tp, PRU_BIND, error); TCP_PROBE2(debug__user, tp, PRU_BIND); INP_WUNLOCK(inp); return (error); } #endif /* INET6 */ #ifdef INET /* * Prepare to accept connections. */ static int tcp_usr_listen(struct socket *so, int backlog, struct thread *td) { int error = 0; struct inpcb *inp; struct tcpcb *tp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp_usr_listen: inp == NULL")); INP_WLOCK(inp); if (inp->inp_flags & INP_DROPPED) { INP_WUNLOCK(inp); return (EINVAL); } tp = intotcpcb(inp); SOCK_LOCK(so); error = solisten_proto_check(so); if (error != 0) { SOCK_UNLOCK(so); goto out; } if (inp->inp_lport == 0) { INP_HASH_WLOCK(&V_tcbinfo); error = in_pcbbind(inp, NULL, td->td_ucred); INP_HASH_WUNLOCK(&V_tcbinfo); } if (error == 0) { tcp_state_change(tp, TCPS_LISTEN); solisten_proto(so, backlog); #ifdef TCP_OFFLOAD if ((so->so_options & SO_NO_OFFLOAD) == 0) tcp_offload_listen_start(tp); #endif } else { solisten_proto_abort(so); } SOCK_UNLOCK(so); if (IS_FASTOPEN(tp->t_flags)) tp->t_tfo_pending = tcp_fastopen_alloc_counter(); out: tcp_bblog_pru(tp, PRU_LISTEN, error); TCP_PROBE2(debug__user, tp, PRU_LISTEN); INP_WUNLOCK(inp); return (error); } #endif /* INET */ #ifdef INET6 static int tcp6_usr_listen(struct socket *so, int backlog, struct thread *td) { int error = 0; struct inpcb *inp; struct tcpcb *tp; u_char vflagsav; inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp6_usr_listen: inp == NULL")); INP_WLOCK(inp); if (inp->inp_flags & INP_DROPPED) { INP_WUNLOCK(inp); return (EINVAL); } tp = intotcpcb(inp); vflagsav = inp->inp_vflag; SOCK_LOCK(so); error = solisten_proto_check(so); if (error != 0) { SOCK_UNLOCK(so); goto out; } INP_HASH_WLOCK(&V_tcbinfo); if (inp->inp_lport == 0) { inp->inp_vflag &= ~INP_IPV4; if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) inp->inp_vflag |= INP_IPV4; error = in6_pcbbind(inp, NULL, td->td_ucred); } INP_HASH_WUNLOCK(&V_tcbinfo); if (error == 0) { tcp_state_change(tp, TCPS_LISTEN); solisten_proto(so, backlog); #ifdef TCP_OFFLOAD if ((so->so_options & SO_NO_OFFLOAD) == 0) tcp_offload_listen_start(tp); #endif } else { solisten_proto_abort(so); } SOCK_UNLOCK(so); if (IS_FASTOPEN(tp->t_flags)) tp->t_tfo_pending = tcp_fastopen_alloc_counter(); if (error != 0) inp->inp_vflag = vflagsav; out: tcp_bblog_pru(tp, PRU_LISTEN, error); TCP_PROBE2(debug__user, tp, PRU_LISTEN); INP_WUNLOCK(inp); return (error); } #endif /* INET6 */ #ifdef INET /* * Initiate connection to peer. * Create a template for use in transmissions on this connection. * Enter SYN_SENT state, and mark socket as connecting. * Start keep-alive timer, and seed output sequence space. * Send initial segment on connection. */ static int tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { struct epoch_tracker et; int error = 0; struct inpcb *inp; struct tcpcb *tp; struct sockaddr_in *sinp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp_usr_connect: inp == NULL")); INP_WLOCK(inp); if (inp->inp_flags & INP_DROPPED) { INP_WUNLOCK(inp); return (ECONNREFUSED); } tp = intotcpcb(inp); sinp = (struct sockaddr_in *)nam; if (nam->sa_family != AF_INET) { error = EAFNOSUPPORT; goto out; } if (nam->sa_len != sizeof (*sinp)) { error = EINVAL; goto out; } /* * Must disallow TCP ``connections'' to multicast addresses. */ if (IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) { error = EAFNOSUPPORT; goto out; } if (ntohl(sinp->sin_addr.s_addr) == INADDR_BROADCAST) { error = EACCES; goto out; } if ((error = prison_remote_ip4(td->td_ucred, &sinp->sin_addr)) != 0) goto out; if (SOLISTENING(so)) { error = EOPNOTSUPP; goto out; } NET_EPOCH_ENTER(et); if ((error = tcp_connect(tp, sinp, td)) != 0) goto out_in_epoch; #ifdef TCP_OFFLOAD if (registered_toedevs > 0 && (so->so_options & SO_NO_OFFLOAD) == 0 && (error = tcp_offload_connect(so, nam)) == 0) goto out_in_epoch; #endif tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp)); error = tcp_output(tp); KASSERT(error >= 0, ("TCP stack %s requested tcp_drop(%p) at connect()" ", error code %d", tp->t_fb->tfb_tcp_block_name, tp, -error)); out_in_epoch: NET_EPOCH_EXIT(et); out: tcp_bblog_pru(tp, PRU_CONNECT, error); TCP_PROBE2(debug__user, tp, PRU_CONNECT); INP_WUNLOCK(inp); return (error); } #endif /* INET */ #ifdef INET6 static int tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { struct epoch_tracker et; int error = 0; struct inpcb *inp; struct tcpcb *tp; struct sockaddr_in6 *sin6; u_int8_t incflagsav; u_char vflagsav; inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp6_usr_connect: inp == NULL")); INP_WLOCK(inp); if (inp->inp_flags & INP_DROPPED) { INP_WUNLOCK(inp); return (ECONNREFUSED); } tp = intotcpcb(inp); vflagsav = inp->inp_vflag; incflagsav = inp->inp_inc.inc_flags; sin6 = (struct sockaddr_in6 *)nam; if (nam->sa_family != AF_INET6) { error = EAFNOSUPPORT; goto out; } if (nam->sa_len != sizeof (*sin6)) { error = EINVAL; goto out; } /* * Must disallow TCP ``connections'' to multicast addresses. */ if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { error = EAFNOSUPPORT; goto out; } if (SOLISTENING(so)) { error = EINVAL; goto out; } #ifdef INET /* * XXXRW: Some confusion: V4/V6 flags relate to binding, and * therefore probably require the hash lock, which isn't held here. * Is this a significant problem? */ if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { struct sockaddr_in sin; if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) { error = EINVAL; goto out; } if ((inp->inp_vflag & INP_IPV4) == 0) { error = EAFNOSUPPORT; goto out; } in6_sin6_2_sin(&sin, sin6); if (IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) { error = EAFNOSUPPORT; goto out; } if (ntohl(sin.sin_addr.s_addr) == INADDR_BROADCAST) { error = EACCES; goto out; } if ((error = prison_remote_ip4(td->td_ucred, &sin.sin_addr)) != 0) goto out; inp->inp_vflag |= INP_IPV4; inp->inp_vflag &= ~INP_IPV6; NET_EPOCH_ENTER(et); if ((error = tcp_connect(tp, &sin, td)) != 0) goto out_in_epoch; #ifdef TCP_OFFLOAD if (registered_toedevs > 0 && (so->so_options & SO_NO_OFFLOAD) == 0 && (error = tcp_offload_connect(so, nam)) == 0) goto out_in_epoch; #endif error = tcp_output(tp); goto out_in_epoch; } else { if ((inp->inp_vflag & INP_IPV6) == 0) { error = EAFNOSUPPORT; goto out; } } #endif if ((error = prison_remote_ip6(td->td_ucred, &sin6->sin6_addr)) != 0) goto out; inp->inp_vflag &= ~INP_IPV4; inp->inp_vflag |= INP_IPV6; inp->inp_inc.inc_flags |= INC_ISIPV6; NET_EPOCH_ENTER(et); if ((error = tcp6_connect(tp, sin6, td)) != 0) goto out_in_epoch; #ifdef TCP_OFFLOAD if (registered_toedevs > 0 && (so->so_options & SO_NO_OFFLOAD) == 0 && (error = tcp_offload_connect(so, nam)) == 0) goto out_in_epoch; #endif tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp)); error = tcp_output(tp); out_in_epoch: NET_EPOCH_EXIT(et); out: KASSERT(error >= 0, ("TCP stack %s requested tcp_drop(%p) at connect()" ", error code %d", tp->t_fb->tfb_tcp_block_name, tp, -error)); /* * If the implicit bind in the connect call fails, restore * the flags we modified. */ if (error != 0 && inp->inp_lport == 0) { inp->inp_vflag = vflagsav; inp->inp_inc.inc_flags = incflagsav; } tcp_bblog_pru(tp, PRU_CONNECT, error); TCP_PROBE2(debug__user, tp, PRU_CONNECT); INP_WUNLOCK(inp); return (error); } #endif /* INET6 */ /* * Initiate disconnect from peer. * If connection never passed embryonic stage, just drop; * else if don't need to let data drain, then can just drop anyways, * else have to begin TCP shutdown process: mark socket disconnecting, * drain unread data, state switch to reflect user close, and * send segment (e.g. FIN) to peer. Socket will be really disconnected * when peer sends FIN and acks ours. * * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. */ static int tcp_usr_disconnect(struct socket *so) { struct inpcb *inp; struct tcpcb *tp = NULL; struct epoch_tracker et; int error = 0; NET_EPOCH_ENTER(et); inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp_usr_disconnect: inp == NULL")); INP_WLOCK(inp); if (inp->inp_flags & INP_DROPPED) { INP_WUNLOCK(inp); NET_EPOCH_EXIT(et); return (ECONNRESET); } tp = intotcpcb(inp); if (tp->t_state == TCPS_TIME_WAIT) goto out; tcp_disconnect(tp); out: tcp_bblog_pru(tp, PRU_DISCONNECT, error); TCP_PROBE2(debug__user, tp, PRU_DISCONNECT); INP_WUNLOCK(inp); NET_EPOCH_EXIT(et); return (error); } #ifdef INET /* * Accept a connection. Essentially all the work is done at higher levels; * just return the address of the peer, storing through addr. */ static int -tcp_usr_accept(struct socket *so, struct sockaddr **nam) +tcp_usr_accept(struct socket *so, struct sockaddr *sa) { - int error = 0; struct inpcb *inp; struct tcpcb *tp; - struct in_addr addr; - in_port_t port = 0; + int error = 0; inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp_usr_accept: inp == NULL")); INP_WLOCK(inp); if (inp->inp_flags & INP_DROPPED) { INP_WUNLOCK(inp); return (ECONNABORTED); } tp = intotcpcb(inp); - if (so->so_state & SS_ISDISCONNECTED) { + if (so->so_state & SS_ISDISCONNECTED) error = ECONNABORTED; - goto out; - } - /* - * We inline in_getpeeraddr and COMMON_END here, so that we can - * copy the data of interest and defer the malloc until after we - * release the lock. - */ - port = inp->inp_fport; - addr = inp->inp_faddr; - -out: + else + *(struct sockaddr_in *)sa = (struct sockaddr_in ){ + .sin_family = AF_INET, + .sin_len = sizeof(struct sockaddr_in), + .sin_port = inp->inp_fport, + .sin_addr = inp->inp_faddr, + }; tcp_bblog_pru(tp, PRU_ACCEPT, error); TCP_PROBE2(debug__user, tp, PRU_ACCEPT); INP_WUNLOCK(inp); - if (error == 0) - *nam = in_sockaddr(port, &addr); - return error; + + return (error); } #endif /* INET */ #ifdef INET6 static int -tcp6_usr_accept(struct socket *so, struct sockaddr **nam) +tcp6_usr_accept(struct socket *so, struct sockaddr *sa) { struct inpcb *inp; - int error = 0; struct tcpcb *tp; - struct in_addr addr; - struct in6_addr addr6; - in_port_t port = 0; - int v4 = 0; + int error = 0; inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp6_usr_accept: inp == NULL")); INP_WLOCK(inp); if (inp->inp_flags & INP_DROPPED) { INP_WUNLOCK(inp); return (ECONNABORTED); } tp = intotcpcb(inp); if (so->so_state & SS_ISDISCONNECTED) { error = ECONNABORTED; - goto out; - } - /* - * We inline in6_mapped_peeraddr and COMMON_END here, so that we can - * copy the data of interest and defer the malloc until after we - * release the lock. - */ - if (inp->inp_vflag & INP_IPV4) { - v4 = 1; - port = inp->inp_fport; - addr = inp->inp_faddr; } else { - port = inp->inp_fport; - addr6 = inp->in6p_faddr; + if (inp->inp_vflag & INP_IPV4) { + struct sockaddr_in sin = { + .sin_family = AF_INET, + .sin_len = sizeof(struct sockaddr_in), + .sin_port = inp->inp_fport, + .sin_addr = inp->inp_faddr, + }; + in6_sin_2_v4mapsin6(&sin, (struct sockaddr_in6 *)sa); + } else { + *(struct sockaddr_in6 *)sa = (struct sockaddr_in6 ){ + .sin6_family = AF_INET6, + .sin6_len = sizeof(struct sockaddr_in6), + .sin6_port = inp->inp_fport, + .sin6_addr = inp->in6p_faddr, + }; + /* XXX: should catch errors */ + (void)sa6_recoverscope((struct sockaddr_in6 *)sa); + } } -out: tcp_bblog_pru(tp, PRU_ACCEPT, error); TCP_PROBE2(debug__user, tp, PRU_ACCEPT); INP_WUNLOCK(inp); - if (error == 0) { - if (v4) - *nam = in6_v4mapsin6_sockaddr(port, &addr); - else - *nam = in6_sockaddr(port, &addr6); - } - return error; + + return (error); } #endif /* INET6 */ /* * Mark the connection as being incapable of further output. */ static int tcp_usr_shutdown(struct socket *so) { int error = 0; struct inpcb *inp; struct tcpcb *tp; struct epoch_tracker et; inp = sotoinpcb(so); KASSERT(inp != NULL, ("inp == NULL")); INP_WLOCK(inp); if (inp->inp_flags & INP_DROPPED) { INP_WUNLOCK(inp); return (ECONNRESET); } tp = intotcpcb(inp); NET_EPOCH_ENTER(et); socantsendmore(so); tcp_usrclosed(tp); if (!(inp->inp_flags & INP_DROPPED)) error = tcp_output_nodrop(tp); tcp_bblog_pru(tp, PRU_SHUTDOWN, error); TCP_PROBE2(debug__user, tp, PRU_SHUTDOWN); error = tcp_unlock_or_drop(tp, error); NET_EPOCH_EXIT(et); return (error); } /* * After a receive, possibly send window update to peer. */ static int tcp_usr_rcvd(struct socket *so, int flags) { struct epoch_tracker et; struct inpcb *inp; struct tcpcb *tp; int outrv = 0, error = 0; inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp_usr_rcvd: inp == NULL")); INP_WLOCK(inp); if (inp->inp_flags & INP_DROPPED) { INP_WUNLOCK(inp); return (ECONNRESET); } tp = intotcpcb(inp); NET_EPOCH_ENTER(et); /* * For passively-created TFO connections, don't attempt a window * update while still in SYN_RECEIVED as this may trigger an early * SYN|ACK. It is preferable to have the SYN|ACK be sent along with * application response data, or failing that, when the DELACK timer * expires. */ if (IS_FASTOPEN(tp->t_flags) && (tp->t_state == TCPS_SYN_RECEIVED)) goto out; #ifdef TCP_OFFLOAD if (tp->t_flags & TF_TOE) tcp_offload_rcvd(tp); else #endif outrv = tcp_output_nodrop(tp); out: tcp_bblog_pru(tp, PRU_RCVD, error); TCP_PROBE2(debug__user, tp, PRU_RCVD); (void) tcp_unlock_or_drop(tp, outrv); NET_EPOCH_EXIT(et); return (error); } /* * Do a send by putting data in output queue and updating urgent * marker if URG set. Possibly send more data. Unlike the other * pru_*() routines, the mbuf chains are our responsibility. We * must either enqueue them or free them. The other pru_* routines * generally are caller-frees. */ static int tcp_usr_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct thread *td) { struct epoch_tracker et; int error = 0; struct inpcb *inp; struct tcpcb *tp; #ifdef INET #ifdef INET6 struct sockaddr_in sin; #endif struct sockaddr_in *sinp; #endif #ifdef INET6 struct sockaddr_in6 *sin6; int isipv6; #endif u_int8_t incflagsav; u_char vflagsav; bool restoreflags; inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL")); INP_WLOCK(inp); if (inp->inp_flags & INP_DROPPED) { if (m != NULL && (flags & PRUS_NOTREADY) == 0) m_freem(m); INP_WUNLOCK(inp); return (ECONNRESET); } tp = intotcpcb(inp); vflagsav = inp->inp_vflag; incflagsav = inp->inp_inc.inc_flags; restoreflags = false; NET_EPOCH_ENTER(et); if (control != NULL) { /* TCP doesn't do control messages (rights, creds, etc) */ if (control->m_len > 0) { m_freem(control); error = EINVAL; goto out; } m_freem(control); /* empty control, just free it */ } if ((flags & PRUS_OOB) != 0 && (error = tcp_pru_options_support(tp, PRUS_OOB)) != 0) goto out; if (nam != NULL && tp->t_state < TCPS_SYN_SENT) { if (tp->t_state == TCPS_LISTEN) { error = EINVAL; goto out; } switch (nam->sa_family) { #ifdef INET case AF_INET: sinp = (struct sockaddr_in *)nam; if (sinp->sin_len != sizeof(struct sockaddr_in)) { error = EINVAL; goto out; } if ((inp->inp_vflag & INP_IPV6) != 0) { error = EAFNOSUPPORT; goto out; } if (IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) { error = EAFNOSUPPORT; goto out; } if (ntohl(sinp->sin_addr.s_addr) == INADDR_BROADCAST) { error = EACCES; goto out; } if ((error = prison_remote_ip4(td->td_ucred, &sinp->sin_addr))) goto out; #ifdef INET6 isipv6 = 0; #endif break; #endif /* INET */ #ifdef INET6 case AF_INET6: sin6 = (struct sockaddr_in6 *)nam; if (sin6->sin6_len != sizeof(*sin6)) { error = EINVAL; goto out; } if ((inp->inp_vflag & INP_IPV6PROTO) == 0) { error = EAFNOSUPPORT; goto out; } if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { error = EAFNOSUPPORT; goto out; } if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { #ifdef INET if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) { error = EINVAL; goto out; } if ((inp->inp_vflag & INP_IPV4) == 0) { error = EAFNOSUPPORT; goto out; } restoreflags = true; inp->inp_vflag &= ~INP_IPV6; sinp = &sin; in6_sin6_2_sin(sinp, sin6); if (IN_MULTICAST( ntohl(sinp->sin_addr.s_addr))) { error = EAFNOSUPPORT; goto out; } if ((error = prison_remote_ip4(td->td_ucred, &sinp->sin_addr))) goto out; isipv6 = 0; #else /* !INET */ error = EAFNOSUPPORT; goto out; #endif /* INET */ } else { if ((inp->inp_vflag & INP_IPV6) == 0) { error = EAFNOSUPPORT; goto out; } restoreflags = true; inp->inp_vflag &= ~INP_IPV4; inp->inp_inc.inc_flags |= INC_ISIPV6; if ((error = prison_remote_ip6(td->td_ucred, &sin6->sin6_addr))) goto out; isipv6 = 1; } break; #endif /* INET6 */ default: error = EAFNOSUPPORT; goto out; } } if (!(flags & PRUS_OOB)) { if (tp->t_acktime == 0) tp->t_acktime = ticks; sbappendstream(&so->so_snd, m, flags); m = NULL; if (nam && tp->t_state < TCPS_SYN_SENT) { KASSERT(tp->t_state == TCPS_CLOSED, ("%s: tp %p is listening", __func__, tp)); /* * Do implied connect if not yet connected, * initialize window to default value, and * initialize maxseg using peer's cached MSS. */ #ifdef INET6 if (isipv6) error = tcp6_connect(tp, sin6, td); #endif /* INET6 */ #if defined(INET6) && defined(INET) else #endif #ifdef INET error = tcp_connect(tp, sinp, td); #endif /* * The bind operation in tcp_connect succeeded. We * no longer want to restore the flags if later * operations fail. */ if (error == 0 || inp->inp_lport != 0) restoreflags = false; if (error) { /* m is freed if PRUS_NOTREADY is unset. */ sbflush(&so->so_snd); goto out; } if (IS_FASTOPEN(tp->t_flags)) tcp_fastopen_connect(tp); else { tp->snd_wnd = TTCP_CLIENT_SND_WND; tcp_mss(tp, -1); } } if (flags & PRUS_EOF) { /* * Close the send side of the connection after * the data is sent. */ socantsendmore(so); tcp_usrclosed(tp); } if (TCPS_HAVEESTABLISHED(tp->t_state) && ((tp->t_flags2 & TF2_FBYTES_COMPLETE) == 0) && (tp->t_fbyte_out == 0) && (so->so_snd.sb_ccc > 0)) { tp->t_fbyte_out = ticks; if (tp->t_fbyte_out == 0) tp->t_fbyte_out = 1; if (tp->t_fbyte_out && tp->t_fbyte_in) tp->t_flags2 |= TF2_FBYTES_COMPLETE; } if (!(inp->inp_flags & INP_DROPPED) && !(flags & PRUS_NOTREADY)) { if (flags & PRUS_MORETOCOME) tp->t_flags |= TF_MORETOCOME; error = tcp_output_nodrop(tp); if (flags & PRUS_MORETOCOME) tp->t_flags &= ~TF_MORETOCOME; } } else { /* * XXXRW: PRUS_EOF not implemented with PRUS_OOB? */ SOCKBUF_LOCK(&so->so_snd); if (sbspace(&so->so_snd) < -512) { SOCKBUF_UNLOCK(&so->so_snd); error = ENOBUFS; goto out; } /* * According to RFC961 (Assigned Protocols), * the urgent pointer points to the last octet * of urgent data. We continue, however, * to consider it to indicate the first octet * of data past the urgent section. * Otherwise, snd_up should be one lower. */ if (tp->t_acktime == 0) tp->t_acktime = ticks; sbappendstream_locked(&so->so_snd, m, flags); SOCKBUF_UNLOCK(&so->so_snd); m = NULL; if (nam && tp->t_state < TCPS_SYN_SENT) { /* * Do implied connect if not yet connected, * initialize window to default value, and * initialize maxseg using peer's cached MSS. */ /* * Not going to contemplate SYN|URG */ if (IS_FASTOPEN(tp->t_flags)) tp->t_flags &= ~TF_FASTOPEN; #ifdef INET6 if (isipv6) error = tcp6_connect(tp, sin6, td); #endif /* INET6 */ #if defined(INET6) && defined(INET) else #endif #ifdef INET error = tcp_connect(tp, sinp, td); #endif /* * The bind operation in tcp_connect succeeded. We * no longer want to restore the flags if later * operations fail. */ if (error == 0 || inp->inp_lport != 0) restoreflags = false; if (error != 0) { /* m is freed if PRUS_NOTREADY is unset. */ sbflush(&so->so_snd); goto out; } tp->snd_wnd = TTCP_CLIENT_SND_WND; tcp_mss(tp, -1); } tp->snd_up = tp->snd_una + sbavail(&so->so_snd); if ((flags & PRUS_NOTREADY) == 0) { tp->t_flags |= TF_FORCEDATA; error = tcp_output_nodrop(tp); tp->t_flags &= ~TF_FORCEDATA; } } TCP_LOG_EVENT(tp, NULL, &inp->inp_socket->so_rcv, &inp->inp_socket->so_snd, TCP_LOG_USERSEND, error, 0, NULL, false); out: /* * In case of PRUS_NOTREADY, the caller or tcp_usr_ready() is * responsible for freeing memory. */ if (m != NULL && (flags & PRUS_NOTREADY) == 0) m_freem(m); /* * If the request was unsuccessful and we changed flags, * restore the original flags. */ if (error != 0 && restoreflags) { inp->inp_vflag = vflagsav; inp->inp_inc.inc_flags = incflagsav; } tcp_bblog_pru(tp, (flags & PRUS_OOB) ? PRU_SENDOOB : ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND), error); TCP_PROBE2(debug__user, tp, (flags & PRUS_OOB) ? PRU_SENDOOB : ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND)); error = tcp_unlock_or_drop(tp, error); NET_EPOCH_EXIT(et); return (error); } static int tcp_usr_ready(struct socket *so, struct mbuf *m, int count) { struct epoch_tracker et; struct inpcb *inp; struct tcpcb *tp; int error; inp = sotoinpcb(so); INP_WLOCK(inp); if (inp->inp_flags & INP_DROPPED) { INP_WUNLOCK(inp); mb_free_notready(m, count); return (ECONNRESET); } tp = intotcpcb(inp); SOCKBUF_LOCK(&so->so_snd); error = sbready(&so->so_snd, m, count); SOCKBUF_UNLOCK(&so->so_snd); if (error) { INP_WUNLOCK(inp); return (error); } NET_EPOCH_ENTER(et); error = tcp_output_unlock(tp); NET_EPOCH_EXIT(et); return (error); } /* * Abort the TCP. Drop the connection abruptly. */ static void tcp_usr_abort(struct socket *so) { struct inpcb *inp; struct tcpcb *tp; struct epoch_tracker et; inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp_usr_abort: inp == NULL")); NET_EPOCH_ENTER(et); INP_WLOCK(inp); KASSERT(inp->inp_socket != NULL, ("tcp_usr_abort: inp_socket == NULL")); /* * If we still have full TCP state, and we're not dropped, drop. */ if (!(inp->inp_flags & INP_DROPPED)) { tp = intotcpcb(inp); tp = tcp_drop(tp, ECONNABORTED); if (tp == NULL) goto dropped; tcp_bblog_pru(tp, PRU_ABORT, 0); TCP_PROBE2(debug__user, tp, PRU_ABORT); } if (!(inp->inp_flags & INP_DROPPED)) { soref(so); inp->inp_flags |= INP_SOCKREF; } INP_WUNLOCK(inp); dropped: NET_EPOCH_EXIT(et); } /* * TCP socket is closed. Start friendly disconnect. */ static void tcp_usr_close(struct socket *so) { struct inpcb *inp; struct tcpcb *tp; struct epoch_tracker et; inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp_usr_close: inp == NULL")); NET_EPOCH_ENTER(et); INP_WLOCK(inp); KASSERT(inp->inp_socket != NULL, ("tcp_usr_close: inp_socket == NULL")); /* * If we are still connected and we're not dropped, initiate * a disconnect. */ if (!(inp->inp_flags & INP_DROPPED)) { tp = intotcpcb(inp); if (tp->t_state != TCPS_TIME_WAIT) { tp->t_flags |= TF_CLOSED; tcp_disconnect(tp); tcp_bblog_pru(tp, PRU_CLOSE, 0); TCP_PROBE2(debug__user, tp, PRU_CLOSE); } } if (!(inp->inp_flags & INP_DROPPED)) { soref(so); inp->inp_flags |= INP_SOCKREF; } INP_WUNLOCK(inp); NET_EPOCH_EXIT(et); } static int tcp_pru_options_support(struct tcpcb *tp, int flags) { /* * If the specific TCP stack has a pru_options * specified then it does not always support * all the PRU_XX options and we must ask it. * If the function is not specified then all * of the PRU_XX options are supported. */ int ret = 0; if (tp->t_fb->tfb_pru_options) { ret = (*tp->t_fb->tfb_pru_options)(tp, flags); } return (ret); } /* * Receive out-of-band data. */ static int tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags) { int error = 0; struct inpcb *inp; struct tcpcb *tp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp_usr_rcvoob: inp == NULL")); INP_WLOCK(inp); if (inp->inp_flags & INP_DROPPED) { INP_WUNLOCK(inp); return (ECONNRESET); } tp = intotcpcb(inp); error = tcp_pru_options_support(tp, PRUS_OOB); if (error) { goto out; } if ((so->so_oobmark == 0 && (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) || so->so_options & SO_OOBINLINE || tp->t_oobflags & TCPOOB_HADDATA) { error = EINVAL; goto out; } if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { error = EWOULDBLOCK; goto out; } m->m_len = 1; *mtod(m, caddr_t) = tp->t_iobc; if ((flags & MSG_PEEK) == 0) tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); out: tcp_bblog_pru(tp, PRU_RCVOOB, error); TCP_PROBE2(debug__user, tp, PRU_RCVOOB); INP_WUNLOCK(inp); return (error); } #ifdef INET struct protosw tcp_protosw = { .pr_type = SOCK_STREAM, .pr_protocol = IPPROTO_TCP, .pr_flags = PR_CONNREQUIRED | PR_IMPLOPCL | PR_WANTRCVD | PR_CAPATTACH, .pr_ctloutput = tcp_ctloutput, .pr_abort = tcp_usr_abort, .pr_accept = tcp_usr_accept, .pr_attach = tcp_usr_attach, .pr_bind = tcp_usr_bind, .pr_connect = tcp_usr_connect, .pr_control = in_control, .pr_detach = tcp_usr_detach, .pr_disconnect = tcp_usr_disconnect, .pr_listen = tcp_usr_listen, .pr_peeraddr = in_getpeeraddr, .pr_rcvd = tcp_usr_rcvd, .pr_rcvoob = tcp_usr_rcvoob, .pr_send = tcp_usr_send, .pr_ready = tcp_usr_ready, .pr_shutdown = tcp_usr_shutdown, .pr_sockaddr = in_getsockaddr, .pr_sosetlabel = in_pcbsosetlabel, .pr_close = tcp_usr_close, }; #endif /* INET */ #ifdef INET6 struct protosw tcp6_protosw = { .pr_type = SOCK_STREAM, .pr_protocol = IPPROTO_TCP, .pr_flags = PR_CONNREQUIRED | PR_IMPLOPCL |PR_WANTRCVD | PR_CAPATTACH, .pr_ctloutput = tcp_ctloutput, .pr_abort = tcp_usr_abort, .pr_accept = tcp6_usr_accept, .pr_attach = tcp_usr_attach, .pr_bind = tcp6_usr_bind, .pr_connect = tcp6_usr_connect, .pr_control = in6_control, .pr_detach = tcp_usr_detach, .pr_disconnect = tcp_usr_disconnect, .pr_listen = tcp6_usr_listen, .pr_peeraddr = in6_mapped_peeraddr, .pr_rcvd = tcp_usr_rcvd, .pr_rcvoob = tcp_usr_rcvoob, .pr_send = tcp_usr_send, .pr_ready = tcp_usr_ready, .pr_shutdown = tcp_usr_shutdown, .pr_sockaddr = in6_mapped_sockaddr, .pr_sosetlabel = in_pcbsosetlabel, .pr_close = tcp_usr_close, }; #endif /* INET6 */ #ifdef INET /* * Common subroutine to open a TCP connection to remote host specified * by struct sockaddr_in. Call in_pcbconnect() to choose local host address * and assign a local port number and install the inpcb into the hash. * Initialize connection parameters and enter SYN-SENT state. */ static int tcp_connect(struct tcpcb *tp, struct sockaddr_in *sin, struct thread *td) { struct inpcb *inp = tptoinpcb(tp); struct socket *so = tptosocket(tp); int error; NET_EPOCH_ASSERT(); INP_WLOCK_ASSERT(inp); if (__predict_false((so->so_state & (SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING | SS_ISDISCONNECTED)) != 0)) return (EISCONN); INP_HASH_WLOCK(&V_tcbinfo); error = in_pcbconnect(inp, sin, td->td_ucred, true); INP_HASH_WUNLOCK(&V_tcbinfo); if (error != 0) return (error); /* * Compute window scaling to request: * Scale to fit into sweet spot. See tcp_syncache.c. * XXX: This should move to tcp_output(). */ while (tp->request_r_scale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << tp->request_r_scale) < sb_max) tp->request_r_scale++; soisconnecting(so); TCPSTAT_INC(tcps_connattempt); tcp_state_change(tp, TCPS_SYN_SENT); tp->iss = tcp_new_isn(&inp->inp_inc); if (tp->t_flags & TF_REQ_TSTMP) tp->ts_offset = tcp_new_ts_offset(&inp->inp_inc); tcp_sendseqinit(tp); return (0); } #endif /* INET */ #ifdef INET6 static int tcp6_connect(struct tcpcb *tp, struct sockaddr_in6 *sin6, struct thread *td) { struct inpcb *inp = tptoinpcb(tp); struct socket *so = tptosocket(tp); int error; NET_EPOCH_ASSERT(); INP_WLOCK_ASSERT(inp); if (__predict_false((so->so_state & (SS_ISCONNECTING | SS_ISCONNECTED)) != 0)) return (EISCONN); INP_HASH_WLOCK(&V_tcbinfo); error = in6_pcbconnect(inp, sin6, td->td_ucred, true); INP_HASH_WUNLOCK(&V_tcbinfo); if (error != 0) return (error); /* Compute window scaling to request. */ while (tp->request_r_scale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << tp->request_r_scale) < sb_max) tp->request_r_scale++; soisconnecting(so); TCPSTAT_INC(tcps_connattempt); tcp_state_change(tp, TCPS_SYN_SENT); tp->iss = tcp_new_isn(&inp->inp_inc); if (tp->t_flags & TF_REQ_TSTMP) tp->ts_offset = tcp_new_ts_offset(&inp->inp_inc); tcp_sendseqinit(tp); return (0); } #endif /* INET6 */ /* * Export TCP internal state information via a struct tcp_info, based on the * Linux 2.6 API. Not ABI compatible as our constants are mapped differently * (TCP state machine, etc). We export all information using FreeBSD-native * constants -- for example, the numeric values for tcpi_state will differ * from Linux. */ void tcp_fill_info(const struct tcpcb *tp, struct tcp_info *ti) { INP_LOCK_ASSERT(tptoinpcb(tp)); bzero(ti, sizeof(*ti)); ti->tcpi_state = tp->t_state; if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP)) ti->tcpi_options |= TCPI_OPT_TIMESTAMPS; if (tp->t_flags & TF_SACK_PERMIT) ti->tcpi_options |= TCPI_OPT_SACK; if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) { ti->tcpi_options |= TCPI_OPT_WSCALE; ti->tcpi_snd_wscale = tp->snd_scale; ti->tcpi_rcv_wscale = tp->rcv_scale; } switch (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT)) { case TF2_ECN_PERMIT: ti->tcpi_options |= TCPI_OPT_ECN; break; case TF2_ACE_PERMIT: /* FALLTHROUGH */ case TF2_ECN_PERMIT | TF2_ACE_PERMIT: ti->tcpi_options |= TCPI_OPT_ACE; break; default: break; } if (IS_FASTOPEN(tp->t_flags)) ti->tcpi_options |= TCPI_OPT_TFO; ti->tcpi_rto = tp->t_rxtcur * tick; ti->tcpi_last_data_recv = ((uint32_t)ticks - tp->t_rcvtime) * tick; ti->tcpi_rtt = ((u_int64_t)tp->t_srtt * tick) >> TCP_RTT_SHIFT; ti->tcpi_rttvar = ((u_int64_t)tp->t_rttvar * tick) >> TCP_RTTVAR_SHIFT; ti->tcpi_snd_ssthresh = tp->snd_ssthresh; ti->tcpi_snd_cwnd = tp->snd_cwnd; /* * FreeBSD-specific extension fields for tcp_info. */ ti->tcpi_rcv_space = tp->rcv_wnd; ti->tcpi_rcv_nxt = tp->rcv_nxt; ti->tcpi_snd_wnd = tp->snd_wnd; ti->tcpi_snd_bwnd = 0; /* Unused, kept for compat. */ ti->tcpi_snd_nxt = tp->snd_nxt; ti->tcpi_snd_mss = tp->t_maxseg; ti->tcpi_rcv_mss = tp->t_maxseg; ti->tcpi_snd_rexmitpack = tp->t_sndrexmitpack; ti->tcpi_rcv_ooopack = tp->t_rcvoopack; ti->tcpi_snd_zerowin = tp->t_sndzerowin; ti->tcpi_snd_una = tp->snd_una; ti->tcpi_snd_max = tp->snd_max; ti->tcpi_rcv_numsacks = tp->rcv_numsacks; ti->tcpi_rcv_adv = tp->rcv_adv; ti->tcpi_dupacks = tp->t_dupacks; #ifdef TCP_OFFLOAD if (tp->t_flags & TF_TOE) { ti->tcpi_options |= TCPI_OPT_TOE; tcp_offload_tcp_info(tp, ti); } #endif /* * AccECN related counters. */ if ((tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT)) == (TF2_ECN_PERMIT | TF2_ACE_PERMIT)) /* * Internal counter starts at 5 for AccECN * but 0 for RFC3168 ECN. */ ti->tcpi_delivered_ce = tp->t_scep - 5; else ti->tcpi_delivered_ce = tp->t_scep; ti->tcpi_received_ce = tp->t_rcep; } /* * tcp_ctloutput() must drop the inpcb lock before performing copyin on * socket option arguments. When it re-acquires the lock after the copy, it * has to revalidate that the connection is still valid for the socket * option. */ #define INP_WLOCK_RECHECK_CLEANUP(inp, cleanup) do { \ INP_WLOCK(inp); \ if (inp->inp_flags & INP_DROPPED) { \ INP_WUNLOCK(inp); \ cleanup; \ return (ECONNRESET); \ } \ tp = intotcpcb(inp); \ } while(0) #define INP_WLOCK_RECHECK(inp) INP_WLOCK_RECHECK_CLEANUP((inp), /* noop */) int tcp_ctloutput_set(struct inpcb *inp, struct sockopt *sopt) { struct socket *so = inp->inp_socket; struct tcpcb *tp = intotcpcb(inp); int error = 0; MPASS(sopt->sopt_dir == SOPT_SET); INP_WLOCK_ASSERT(inp); KASSERT((inp->inp_flags & INP_DROPPED) == 0, ("inp_flags == %x", inp->inp_flags)); KASSERT(so != NULL, ("inp_socket == NULL")); if (sopt->sopt_level != IPPROTO_TCP) { INP_WUNLOCK(inp); #ifdef INET6 if (inp->inp_vflag & INP_IPV6PROTO) error = ip6_ctloutput(so, sopt); #endif #if defined(INET6) && defined(INET) else #endif #ifdef INET error = ip_ctloutput(so, sopt); #endif /* * When an IP-level socket option affects TCP, pass control * down to stack tfb_tcp_ctloutput, otherwise return what * IP level returned. */ switch (sopt->sopt_level) { #ifdef INET6 case IPPROTO_IPV6: if ((inp->inp_vflag & INP_IPV6PROTO) == 0) return (error); switch (sopt->sopt_name) { case IPV6_TCLASS: /* Notify tcp stacks that care (e.g. RACK). */ break; case IPV6_USE_MIN_MTU: /* Update t_maxseg accordingly. */ break; default: return (error); } break; #endif #ifdef INET case IPPROTO_IP: switch (sopt->sopt_name) { case IP_TOS: inp->inp_ip_tos &= ~IPTOS_ECN_MASK; break; case IP_TTL: /* Notify tcp stacks that care (e.g. RACK). */ break; default: return (error); } break; #endif default: return (error); } INP_WLOCK(inp); if (inp->inp_flags & INP_DROPPED) { INP_WUNLOCK(inp); return (ECONNRESET); } } else if (sopt->sopt_name == TCP_FUNCTION_BLK) { /* * Protect the TCP option TCP_FUNCTION_BLK so * that a sub-function can *never* overwrite this. */ struct tcp_function_set fsn; struct tcp_function_block *blk; void *ptr = NULL; INP_WUNLOCK(inp); error = sooptcopyin(sopt, &fsn, sizeof fsn, sizeof fsn); if (error) return (error); INP_WLOCK(inp); tp = intotcpcb(inp); blk = find_and_ref_tcp_functions(&fsn); if (blk == NULL) { INP_WUNLOCK(inp); return (ENOENT); } if (tp->t_fb == blk) { /* You already have this */ refcount_release(&blk->tfb_refcnt); INP_WUNLOCK(inp); return (0); } if (tp->t_state != TCPS_CLOSED) { /* * The user has advanced the state * past the initial point, we may not * be able to switch. */ if (blk->tfb_tcp_handoff_ok != NULL) { /* * Does the stack provide a * query mechanism, if so it may * still be possible? */ error = (*blk->tfb_tcp_handoff_ok)(tp); } else error = EINVAL; if (error) { refcount_release(&blk->tfb_refcnt); INP_WUNLOCK(inp); return(error); } } if (blk->tfb_flags & TCP_FUNC_BEING_REMOVED) { refcount_release(&blk->tfb_refcnt); INP_WUNLOCK(inp); return (ENOENT); } /* * Ensure the new stack takes ownership with a * clean slate on peak rate threshold. */ #ifdef TCPHPTS /* Assure that we are not on any hpts */ tcp_hpts_remove(tp); #endif if (blk->tfb_tcp_fb_init) { error = (*blk->tfb_tcp_fb_init)(tp, &ptr); if (error) { /* * Release the ref count the lookup * acquired. */ refcount_release(&blk->tfb_refcnt); /* * Now there is a chance that the * init() function mucked with some * things before it failed, such as * hpts or inp_flags2 or timer granularity. * It should not of, but lets give the old * stack a chance to reset to a known good state. */ if (tp->t_fb->tfb_switch_failed) { (*tp->t_fb->tfb_switch_failed)(tp); } goto err_out; } } if (tp->t_fb->tfb_tcp_fb_fini) { struct epoch_tracker et; /* * Tell the stack to cleanup with 0 i.e. * the tcb is not going away. */ NET_EPOCH_ENTER(et); (*tp->t_fb->tfb_tcp_fb_fini)(tp, 0); NET_EPOCH_EXIT(et); } /* * Release the old refcnt, the * lookup acquired a ref on the * new one already. */ refcount_release(&tp->t_fb->tfb_refcnt); /* * Set in the new stack. */ tp->t_fb = blk; tp->t_fb_ptr = ptr; #ifdef TCP_OFFLOAD if (tp->t_flags & TF_TOE) { tcp_offload_ctloutput(tp, sopt->sopt_dir, sopt->sopt_name); } #endif err_out: INP_WUNLOCK(inp); return (error); } /* Pass in the INP locked, callee must unlock it. */ return (tp->t_fb->tfb_tcp_ctloutput(tp, sopt)); } static int tcp_ctloutput_get(struct inpcb *inp, struct sockopt *sopt) { struct socket *so = inp->inp_socket; struct tcpcb *tp = intotcpcb(inp); int error = 0; MPASS(sopt->sopt_dir == SOPT_GET); INP_WLOCK_ASSERT(inp); KASSERT((inp->inp_flags & INP_DROPPED) == 0, ("inp_flags == %x", inp->inp_flags)); KASSERT(so != NULL, ("inp_socket == NULL")); if (sopt->sopt_level != IPPROTO_TCP) { INP_WUNLOCK(inp); #ifdef INET6 if (inp->inp_vflag & INP_IPV6PROTO) error = ip6_ctloutput(so, sopt); #endif /* INET6 */ #if defined(INET6) && defined(INET) else #endif #ifdef INET error = ip_ctloutput(so, sopt); #endif return (error); } if (((sopt->sopt_name == TCP_FUNCTION_BLK) || (sopt->sopt_name == TCP_FUNCTION_ALIAS))) { struct tcp_function_set fsn; if (sopt->sopt_name == TCP_FUNCTION_ALIAS) { memset(&fsn, 0, sizeof(fsn)); find_tcp_function_alias(tp->t_fb, &fsn); } else { strncpy(fsn.function_set_name, tp->t_fb->tfb_tcp_block_name, TCP_FUNCTION_NAME_LEN_MAX); fsn.function_set_name[TCP_FUNCTION_NAME_LEN_MAX - 1] = '\0'; } fsn.pcbcnt = tp->t_fb->tfb_refcnt; INP_WUNLOCK(inp); error = sooptcopyout(sopt, &fsn, sizeof fsn); return (error); } /* Pass in the INP locked, callee must unlock it. */ return (tp->t_fb->tfb_tcp_ctloutput(tp, sopt)); } int tcp_ctloutput(struct socket *so, struct sockopt *sopt) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp_ctloutput: inp == NULL")); INP_WLOCK(inp); if (inp->inp_flags & INP_DROPPED) { INP_WUNLOCK(inp); return (ECONNRESET); } if (sopt->sopt_dir == SOPT_SET) return (tcp_ctloutput_set(inp, sopt)); else if (sopt->sopt_dir == SOPT_GET) return (tcp_ctloutput_get(inp, sopt)); else panic("%s: sopt_dir $%d", __func__, sopt->sopt_dir); } /* * If this assert becomes untrue, we need to change the size of the buf * variable in tcp_default_ctloutput(). */ #ifdef CTASSERT CTASSERT(TCP_CA_NAME_MAX <= TCP_LOG_ID_LEN); CTASSERT(TCP_LOG_REASON_LEN <= TCP_LOG_ID_LEN); #endif #ifdef KERN_TLS static int copyin_tls_enable(struct sockopt *sopt, struct tls_enable *tls) { struct tls_enable_v0 tls_v0; int error; if (sopt->sopt_valsize == sizeof(tls_v0)) { error = sooptcopyin(sopt, &tls_v0, sizeof(tls_v0), sizeof(tls_v0)); if (error) return (error); memset(tls, 0, sizeof(*tls)); tls->cipher_key = tls_v0.cipher_key; tls->iv = tls_v0.iv; tls->auth_key = tls_v0.auth_key; tls->cipher_algorithm = tls_v0.cipher_algorithm; tls->cipher_key_len = tls_v0.cipher_key_len; tls->iv_len = tls_v0.iv_len; tls->auth_algorithm = tls_v0.auth_algorithm; tls->auth_key_len = tls_v0.auth_key_len; tls->flags = tls_v0.flags; tls->tls_vmajor = tls_v0.tls_vmajor; tls->tls_vminor = tls_v0.tls_vminor; return (0); } return (sooptcopyin(sopt, tls, sizeof(*tls), sizeof(*tls))); } #endif extern struct cc_algo newreno_cc_algo; static int tcp_set_cc_mod(struct inpcb *inp, struct sockopt *sopt) { struct cc_algo *algo; void *ptr = NULL; struct tcpcb *tp; struct cc_var cc_mem; char buf[TCP_CA_NAME_MAX]; size_t mem_sz; int error; INP_WUNLOCK(inp); error = sooptcopyin(sopt, buf, TCP_CA_NAME_MAX - 1, 1); if (error) return(error); buf[sopt->sopt_valsize] = '\0'; CC_LIST_RLOCK(); STAILQ_FOREACH(algo, &cc_list, entries) { if (strncmp(buf, algo->name, TCP_CA_NAME_MAX) == 0) { if (algo->flags & CC_MODULE_BEING_REMOVED) { /* We can't "see" modules being unloaded */ continue; } break; } } if (algo == NULL) { CC_LIST_RUNLOCK(); return(ESRCH); } /* * With a reference the algorithm cannot be removed * so we hold a reference through the change process. */ cc_refer(algo); CC_LIST_RUNLOCK(); if (algo->cb_init != NULL) { /* We can now pre-get the memory for the CC */ mem_sz = (*algo->cc_data_sz)(); if (mem_sz == 0) { goto no_mem_needed; } ptr = malloc(mem_sz, M_CC_MEM, M_WAITOK); } else { no_mem_needed: mem_sz = 0; ptr = NULL; } /* * Make sure its all clean and zero and also get * back the inplock. */ memset(&cc_mem, 0, sizeof(cc_mem)); INP_WLOCK(inp); if (inp->inp_flags & INP_DROPPED) { INP_WUNLOCK(inp); if (ptr) free(ptr, M_CC_MEM); /* Release our temp reference */ CC_LIST_RLOCK(); cc_release(algo); CC_LIST_RUNLOCK(); return (ECONNRESET); } tp = intotcpcb(inp); if (ptr != NULL) memset(ptr, 0, mem_sz); cc_mem.ccvc.tcp = tp; /* * We once again hold a write lock over the tcb so it's * safe to do these things without ordering concerns. * Note here we init into stack memory. */ if (algo->cb_init != NULL) error = algo->cb_init(&cc_mem, ptr); else error = 0; /* * The CC algorithms, when given their memory * should not fail we could in theory have a * KASSERT here. */ if (error == 0) { /* * Touchdown, lets go ahead and move the * connection to the new CC module by * copying in the cc_mem after we call * the old ones cleanup (if any). */ if (CC_ALGO(tp)->cb_destroy != NULL) CC_ALGO(tp)->cb_destroy(&tp->t_ccv); /* Detach the old CC from the tcpcb */ cc_detach(tp); /* Copy in our temp memory that was inited */ memcpy(&tp->t_ccv, &cc_mem, sizeof(struct cc_var)); /* Now attach the new, which takes a reference */ cc_attach(tp, algo); /* Ok now are we where we have gotten past any conn_init? */ if (TCPS_HAVEESTABLISHED(tp->t_state) && (CC_ALGO(tp)->conn_init != NULL)) { /* Yep run the connection init for the new CC */ CC_ALGO(tp)->conn_init(&tp->t_ccv); } } else if (ptr) free(ptr, M_CC_MEM); INP_WUNLOCK(inp); /* Now lets release our temp reference */ CC_LIST_RLOCK(); cc_release(algo); CC_LIST_RUNLOCK(); return (error); } int tcp_default_ctloutput(struct tcpcb *tp, struct sockopt *sopt) { struct inpcb *inp = tptoinpcb(tp); int error, opt, optval; u_int ui; struct tcp_info ti; #ifdef KERN_TLS struct tls_enable tls; struct socket *so = inp->inp_socket; #endif char *pbuf, buf[TCP_LOG_ID_LEN]; #ifdef STATS struct statsblob *sbp; #endif size_t len; INP_WLOCK_ASSERT(inp); KASSERT((inp->inp_flags & INP_DROPPED) == 0, ("inp_flags == %x", inp->inp_flags)); KASSERT(inp->inp_socket != NULL, ("inp_socket == NULL")); switch (sopt->sopt_level) { #ifdef INET6 case IPPROTO_IPV6: MPASS(inp->inp_vflag & INP_IPV6PROTO); switch (sopt->sopt_name) { case IPV6_USE_MIN_MTU: tcp6_use_min_mtu(tp); /* FALLTHROUGH */ } INP_WUNLOCK(inp); return (0); #endif #ifdef INET case IPPROTO_IP: INP_WUNLOCK(inp); return (0); #endif } /* * For TCP_CCALGOOPT forward the control to CC module, for both * SOPT_SET and SOPT_GET. */ switch (sopt->sopt_name) { case TCP_CCALGOOPT: INP_WUNLOCK(inp); if (sopt->sopt_valsize > CC_ALGOOPT_LIMIT) return (EINVAL); pbuf = malloc(sopt->sopt_valsize, M_TEMP, M_WAITOK | M_ZERO); error = sooptcopyin(sopt, pbuf, sopt->sopt_valsize, sopt->sopt_valsize); if (error) { free(pbuf, M_TEMP); return (error); } INP_WLOCK_RECHECK_CLEANUP(inp, free(pbuf, M_TEMP)); if (CC_ALGO(tp)->ctl_output != NULL) error = CC_ALGO(tp)->ctl_output(&tp->t_ccv, sopt, pbuf); else error = ENOENT; INP_WUNLOCK(inp); if (error == 0 && sopt->sopt_dir == SOPT_GET) error = sooptcopyout(sopt, pbuf, sopt->sopt_valsize); free(pbuf, M_TEMP); return (error); } switch (sopt->sopt_dir) { case SOPT_SET: switch (sopt->sopt_name) { #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) case TCP_MD5SIG: INP_WUNLOCK(inp); if (!TCPMD5_ENABLED()) return (ENOPROTOOPT); error = TCPMD5_PCBCTL(inp, sopt); if (error) return (error); INP_WLOCK_RECHECK(inp); goto unlock_and_done; #endif /* IPSEC */ case TCP_NODELAY: case TCP_NOOPT: case TCP_LRD: INP_WUNLOCK(inp); error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) return (error); INP_WLOCK_RECHECK(inp); switch (sopt->sopt_name) { case TCP_NODELAY: opt = TF_NODELAY; break; case TCP_NOOPT: opt = TF_NOOPT; break; case TCP_LRD: opt = TF_LRD; break; default: opt = 0; /* dead code to fool gcc */ break; } if (optval) tp->t_flags |= opt; else tp->t_flags &= ~opt; unlock_and_done: #ifdef TCP_OFFLOAD if (tp->t_flags & TF_TOE) { tcp_offload_ctloutput(tp, sopt->sopt_dir, sopt->sopt_name); } #endif INP_WUNLOCK(inp); break; case TCP_NOPUSH: INP_WUNLOCK(inp); error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) return (error); INP_WLOCK_RECHECK(inp); if (optval) tp->t_flags |= TF_NOPUSH; else if (tp->t_flags & TF_NOPUSH) { tp->t_flags &= ~TF_NOPUSH; if (TCPS_HAVEESTABLISHED(tp->t_state)) { struct epoch_tracker et; NET_EPOCH_ENTER(et); error = tcp_output_nodrop(tp); NET_EPOCH_EXIT(et); } } goto unlock_and_done; case TCP_REMOTE_UDP_ENCAPS_PORT: INP_WUNLOCK(inp); error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) return (error); if ((optval < TCP_TUNNELING_PORT_MIN) || (optval > TCP_TUNNELING_PORT_MAX)) { /* Its got to be in range */ return (EINVAL); } if ((V_tcp_udp_tunneling_port == 0) && (optval != 0)) { /* You have to have enabled a UDP tunneling port first */ return (EINVAL); } INP_WLOCK_RECHECK(inp); if (tp->t_state != TCPS_CLOSED) { /* You can't change after you are connected */ error = EINVAL; } else { /* Ok we are all good set the port */ tp->t_port = htons(optval); } goto unlock_and_done; case TCP_MAXSEG: INP_WUNLOCK(inp); error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) return (error); INP_WLOCK_RECHECK(inp); if (optval > 0 && optval <= tp->t_maxseg && optval + 40 >= V_tcp_minmss) tp->t_maxseg = optval; else error = EINVAL; goto unlock_and_done; case TCP_INFO: INP_WUNLOCK(inp); error = EINVAL; break; case TCP_STATS: INP_WUNLOCK(inp); #ifdef STATS error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) return (error); if (optval > 0) sbp = stats_blob_alloc( V_tcp_perconn_stats_dflt_tpl, 0); else sbp = NULL; INP_WLOCK_RECHECK(inp); if ((tp->t_stats != NULL && sbp == NULL) || (tp->t_stats == NULL && sbp != NULL)) { struct statsblob *t = tp->t_stats; tp->t_stats = sbp; sbp = t; } INP_WUNLOCK(inp); stats_blob_destroy(sbp); #else return (EOPNOTSUPP); #endif /* !STATS */ break; case TCP_CONGESTION: error = tcp_set_cc_mod(inp, sopt); break; case TCP_REUSPORT_LB_NUMA: INP_WUNLOCK(inp); error = sooptcopyin(sopt, &optval, sizeof(optval), sizeof(optval)); INP_WLOCK_RECHECK(inp); if (!error) error = in_pcblbgroup_numa(inp, optval); INP_WUNLOCK(inp); break; #ifdef KERN_TLS case TCP_TXTLS_ENABLE: INP_WUNLOCK(inp); error = copyin_tls_enable(sopt, &tls); if (error) break; error = ktls_enable_tx(so, &tls); break; case TCP_TXTLS_MODE: INP_WUNLOCK(inp); error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui)); if (error) return (error); INP_WLOCK_RECHECK(inp); error = ktls_set_tx_mode(so, ui); INP_WUNLOCK(inp); break; case TCP_RXTLS_ENABLE: INP_WUNLOCK(inp); error = sooptcopyin(sopt, &tls, sizeof(tls), sizeof(tls)); if (error) break; error = ktls_enable_rx(so, &tls); break; #endif case TCP_MAXUNACKTIME: case TCP_KEEPIDLE: case TCP_KEEPINTVL: case TCP_KEEPINIT: INP_WUNLOCK(inp); error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui)); if (error) return (error); if (ui > (UINT_MAX / hz)) { error = EINVAL; break; } ui *= hz; INP_WLOCK_RECHECK(inp); switch (sopt->sopt_name) { case TCP_MAXUNACKTIME: tp->t_maxunacktime = ui; break; case TCP_KEEPIDLE: tp->t_keepidle = ui; /* * XXX: better check current remaining * timeout and "merge" it with new value. */ if ((tp->t_state > TCPS_LISTEN) && (tp->t_state <= TCPS_CLOSING)) tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp)); break; case TCP_KEEPINTVL: tp->t_keepintvl = ui; if ((tp->t_state == TCPS_FIN_WAIT_2) && (TP_MAXIDLE(tp) > 0)) tcp_timer_activate(tp, TT_2MSL, TP_MAXIDLE(tp)); break; case TCP_KEEPINIT: tp->t_keepinit = ui; if (tp->t_state == TCPS_SYN_RECEIVED || tp->t_state == TCPS_SYN_SENT) tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp)); break; } goto unlock_and_done; case TCP_KEEPCNT: INP_WUNLOCK(inp); error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui)); if (error) return (error); INP_WLOCK_RECHECK(inp); tp->t_keepcnt = ui; if ((tp->t_state == TCPS_FIN_WAIT_2) && (TP_MAXIDLE(tp) > 0)) tcp_timer_activate(tp, TT_2MSL, TP_MAXIDLE(tp)); goto unlock_and_done; #ifdef TCPPCAP case TCP_PCAP_OUT: case TCP_PCAP_IN: INP_WUNLOCK(inp); error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) return (error); INP_WLOCK_RECHECK(inp); if (optval >= 0) tcp_pcap_set_sock_max( (sopt->sopt_name == TCP_PCAP_OUT) ? &(tp->t_outpkts) : &(tp->t_inpkts), optval); else error = EINVAL; goto unlock_and_done; #endif case TCP_FASTOPEN: { struct tcp_fastopen tfo_optval; INP_WUNLOCK(inp); if (!V_tcp_fastopen_client_enable && !V_tcp_fastopen_server_enable) return (EPERM); error = sooptcopyin(sopt, &tfo_optval, sizeof(tfo_optval), sizeof(int)); if (error) return (error); INP_WLOCK_RECHECK(inp); if ((tp->t_state != TCPS_CLOSED) && (tp->t_state != TCPS_LISTEN)) { error = EINVAL; goto unlock_and_done; } if (tfo_optval.enable) { if (tp->t_state == TCPS_LISTEN) { if (!V_tcp_fastopen_server_enable) { error = EPERM; goto unlock_and_done; } if (tp->t_tfo_pending == NULL) tp->t_tfo_pending = tcp_fastopen_alloc_counter(); } else { /* * If a pre-shared key was provided, * stash it in the client cookie * field of the tcpcb for use during * connect. */ if (sopt->sopt_valsize == sizeof(tfo_optval)) { memcpy(tp->t_tfo_cookie.client, tfo_optval.psk, TCP_FASTOPEN_PSK_LEN); tp->t_tfo_client_cookie_len = TCP_FASTOPEN_PSK_LEN; } } tp->t_flags |= TF_FASTOPEN; } else tp->t_flags &= ~TF_FASTOPEN; goto unlock_and_done; } #ifdef TCP_BLACKBOX case TCP_LOG: INP_WUNLOCK(inp); error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) return (error); INP_WLOCK_RECHECK(inp); error = tcp_log_state_change(tp, optval); goto unlock_and_done; case TCP_LOGBUF: INP_WUNLOCK(inp); error = EINVAL; break; case TCP_LOGID: INP_WUNLOCK(inp); error = sooptcopyin(sopt, buf, TCP_LOG_ID_LEN - 1, 0); if (error) break; buf[sopt->sopt_valsize] = '\0'; INP_WLOCK_RECHECK(inp); error = tcp_log_set_id(tp, buf); /* tcp_log_set_id() unlocks the INP. */ break; case TCP_LOGDUMP: case TCP_LOGDUMPID: INP_WUNLOCK(inp); error = sooptcopyin(sopt, buf, TCP_LOG_REASON_LEN - 1, 0); if (error) break; buf[sopt->sopt_valsize] = '\0'; INP_WLOCK_RECHECK(inp); if (sopt->sopt_name == TCP_LOGDUMP) { error = tcp_log_dump_tp_logbuf(tp, buf, M_WAITOK, true); INP_WUNLOCK(inp); } else { tcp_log_dump_tp_bucket_logbufs(tp, buf); /* * tcp_log_dump_tp_bucket_logbufs() drops the * INP lock. */ } break; #endif default: INP_WUNLOCK(inp); error = ENOPROTOOPT; break; } break; case SOPT_GET: tp = intotcpcb(inp); switch (sopt->sopt_name) { #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) case TCP_MD5SIG: INP_WUNLOCK(inp); if (!TCPMD5_ENABLED()) return (ENOPROTOOPT); error = TCPMD5_PCBCTL(inp, sopt); break; #endif case TCP_NODELAY: optval = tp->t_flags & TF_NODELAY; INP_WUNLOCK(inp); error = sooptcopyout(sopt, &optval, sizeof optval); break; case TCP_MAXSEG: optval = tp->t_maxseg; INP_WUNLOCK(inp); error = sooptcopyout(sopt, &optval, sizeof optval); break; case TCP_REMOTE_UDP_ENCAPS_PORT: optval = ntohs(tp->t_port); INP_WUNLOCK(inp); error = sooptcopyout(sopt, &optval, sizeof optval); break; case TCP_NOOPT: optval = tp->t_flags & TF_NOOPT; INP_WUNLOCK(inp); error = sooptcopyout(sopt, &optval, sizeof optval); break; case TCP_NOPUSH: optval = tp->t_flags & TF_NOPUSH; INP_WUNLOCK(inp); error = sooptcopyout(sopt, &optval, sizeof optval); break; case TCP_INFO: tcp_fill_info(tp, &ti); INP_WUNLOCK(inp); error = sooptcopyout(sopt, &ti, sizeof ti); break; case TCP_STATS: { #ifdef STATS int nheld; TYPEOF_MEMBER(struct statsblob, flags) sbflags = 0; error = 0; socklen_t outsbsz = sopt->sopt_valsize; if (tp->t_stats == NULL) error = ENOENT; else if (outsbsz >= tp->t_stats->cursz) outsbsz = tp->t_stats->cursz; else if (outsbsz >= sizeof(struct statsblob)) outsbsz = sizeof(struct statsblob); else error = EINVAL; INP_WUNLOCK(inp); if (error) break; sbp = sopt->sopt_val; nheld = atop(round_page(((vm_offset_t)sbp) + (vm_size_t)outsbsz) - trunc_page((vm_offset_t)sbp)); vm_page_t ma[nheld]; if (vm_fault_quick_hold_pages( &curproc->p_vmspace->vm_map, (vm_offset_t)sbp, outsbsz, VM_PROT_READ | VM_PROT_WRITE, ma, nheld) < 0) { error = EFAULT; break; } if ((error = copyin_nofault(&(sbp->flags), &sbflags, SIZEOF_MEMBER(struct statsblob, flags)))) goto unhold; INP_WLOCK_RECHECK(inp); error = stats_blob_snapshot(&sbp, outsbsz, tp->t_stats, sbflags | SB_CLONE_USRDSTNOFAULT); INP_WUNLOCK(inp); sopt->sopt_valsize = outsbsz; unhold: vm_page_unhold_pages(ma, nheld); #else INP_WUNLOCK(inp); error = EOPNOTSUPP; #endif /* !STATS */ break; } case TCP_CONGESTION: len = strlcpy(buf, CC_ALGO(tp)->name, TCP_CA_NAME_MAX); INP_WUNLOCK(inp); error = sooptcopyout(sopt, buf, len + 1); break; case TCP_MAXUNACKTIME: case TCP_KEEPIDLE: case TCP_KEEPINTVL: case TCP_KEEPINIT: case TCP_KEEPCNT: switch (sopt->sopt_name) { case TCP_MAXUNACKTIME: ui = TP_MAXUNACKTIME(tp) / hz; break; case TCP_KEEPIDLE: ui = TP_KEEPIDLE(tp) / hz; break; case TCP_KEEPINTVL: ui = TP_KEEPINTVL(tp) / hz; break; case TCP_KEEPINIT: ui = TP_KEEPINIT(tp) / hz; break; case TCP_KEEPCNT: ui = TP_KEEPCNT(tp); break; } INP_WUNLOCK(inp); error = sooptcopyout(sopt, &ui, sizeof(ui)); break; #ifdef TCPPCAP case TCP_PCAP_OUT: case TCP_PCAP_IN: optval = tcp_pcap_get_sock_max( (sopt->sopt_name == TCP_PCAP_OUT) ? &(tp->t_outpkts) : &(tp->t_inpkts)); INP_WUNLOCK(inp); error = sooptcopyout(sopt, &optval, sizeof optval); break; #endif case TCP_FASTOPEN: optval = tp->t_flags & TF_FASTOPEN; INP_WUNLOCK(inp); error = sooptcopyout(sopt, &optval, sizeof optval); break; #ifdef TCP_BLACKBOX case TCP_LOG: optval = tcp_get_bblog_state(tp); INP_WUNLOCK(inp); error = sooptcopyout(sopt, &optval, sizeof(optval)); break; case TCP_LOGBUF: /* tcp_log_getlogbuf() does INP_WUNLOCK(inp) */ error = tcp_log_getlogbuf(sopt, tp); break; case TCP_LOGID: len = tcp_log_get_id(tp, buf); INP_WUNLOCK(inp); error = sooptcopyout(sopt, buf, len + 1); break; case TCP_LOGDUMP: case TCP_LOGDUMPID: INP_WUNLOCK(inp); error = EINVAL; break; #endif #ifdef KERN_TLS case TCP_TXTLS_MODE: error = ktls_get_tx_mode(so, &optval); INP_WUNLOCK(inp); if (error == 0) error = sooptcopyout(sopt, &optval, sizeof(optval)); break; case TCP_RXTLS_MODE: error = ktls_get_rx_mode(so, &optval); INP_WUNLOCK(inp); if (error == 0) error = sooptcopyout(sopt, &optval, sizeof(optval)); break; #endif case TCP_LRD: optval = tp->t_flags & TF_LRD; INP_WUNLOCK(inp); error = sooptcopyout(sopt, &optval, sizeof optval); break; default: INP_WUNLOCK(inp); error = ENOPROTOOPT; break; } break; } return (error); } #undef INP_WLOCK_RECHECK #undef INP_WLOCK_RECHECK_CLEANUP /* * Initiate (or continue) disconnect. * If embryonic state, just send reset (once). * If in ``let data drain'' option and linger null, just drop. * Otherwise (hard), mark socket disconnecting and drop * current input data; switch states based on user close, and * send segment to peer (with FIN). */ static void tcp_disconnect(struct tcpcb *tp) { struct inpcb *inp = tptoinpcb(tp); struct socket *so = tptosocket(tp); NET_EPOCH_ASSERT(); INP_WLOCK_ASSERT(inp); /* * Neither tcp_close() nor tcp_drop() should return NULL, as the * socket is still open. */ if (tp->t_state < TCPS_ESTABLISHED && !(tp->t_state > TCPS_LISTEN && IS_FASTOPEN(tp->t_flags))) { tp = tcp_close(tp); KASSERT(tp != NULL, ("tcp_disconnect: tcp_close() returned NULL")); } else if ((so->so_options & SO_LINGER) && so->so_linger == 0) { tp = tcp_drop(tp, 0); KASSERT(tp != NULL, ("tcp_disconnect: tcp_drop() returned NULL")); } else { soisdisconnecting(so); sbflush(&so->so_rcv); tcp_usrclosed(tp); if (!(inp->inp_flags & INP_DROPPED)) /* Ignore stack's drop request, we already at it. */ (void)tcp_output_nodrop(tp); } } /* * User issued close, and wish to trail through shutdown states: * if never received SYN, just forget it. If got a SYN from peer, * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. * If already got a FIN from peer, then almost done; go to LAST_ACK * state. In all other cases, have already sent FIN to peer (e.g. * after PRU_SHUTDOWN), and just have to play tedious game waiting * for peer to send FIN or not respond to keep-alives, etc. * We can let the user exit from the close as soon as the FIN is acked. */ static void tcp_usrclosed(struct tcpcb *tp) { NET_EPOCH_ASSERT(); INP_WLOCK_ASSERT(tptoinpcb(tp)); switch (tp->t_state) { case TCPS_LISTEN: #ifdef TCP_OFFLOAD tcp_offload_listen_stop(tp); #endif tcp_state_change(tp, TCPS_CLOSED); /* FALLTHROUGH */ case TCPS_CLOSED: tp = tcp_close(tp); /* * tcp_close() should never return NULL here as the socket is * still open. */ KASSERT(tp != NULL, ("tcp_usrclosed: tcp_close() returned NULL")); break; case TCPS_SYN_SENT: case TCPS_SYN_RECEIVED: tp->t_flags |= TF_NEEDFIN; break; case TCPS_ESTABLISHED: tcp_state_change(tp, TCPS_FIN_WAIT_1); break; case TCPS_CLOSE_WAIT: tcp_state_change(tp, TCPS_LAST_ACK); break; } if (tp->t_acktime == 0) tp->t_acktime = ticks; if (tp->t_state >= TCPS_FIN_WAIT_2) { soisdisconnected(tptosocket(tp)); /* Prevent the connection hanging in FIN_WAIT_2 forever. */ if (tp->t_state == TCPS_FIN_WAIT_2) { int timeout; timeout = (tcp_fast_finwait2_recycle) ? tcp_finwait2_timeout : TP_MAXIDLE(tp); tcp_timer_activate(tp, TT_2MSL, timeout); } } } #ifdef DDB static void db_print_indent(int indent) { int i; for (i = 0; i < indent; i++) db_printf(" "); } static void db_print_tstate(int t_state) { switch (t_state) { case TCPS_CLOSED: db_printf("TCPS_CLOSED"); return; case TCPS_LISTEN: db_printf("TCPS_LISTEN"); return; case TCPS_SYN_SENT: db_printf("TCPS_SYN_SENT"); return; case TCPS_SYN_RECEIVED: db_printf("TCPS_SYN_RECEIVED"); return; case TCPS_ESTABLISHED: db_printf("TCPS_ESTABLISHED"); return; case TCPS_CLOSE_WAIT: db_printf("TCPS_CLOSE_WAIT"); return; case TCPS_FIN_WAIT_1: db_printf("TCPS_FIN_WAIT_1"); return; case TCPS_CLOSING: db_printf("TCPS_CLOSING"); return; case TCPS_LAST_ACK: db_printf("TCPS_LAST_ACK"); return; case TCPS_FIN_WAIT_2: db_printf("TCPS_FIN_WAIT_2"); return; case TCPS_TIME_WAIT: db_printf("TCPS_TIME_WAIT"); return; default: db_printf("unknown"); return; } } static void db_print_tflags(u_int t_flags) { int comma; comma = 0; if (t_flags & TF_ACKNOW) { db_printf("%sTF_ACKNOW", comma ? ", " : ""); comma = 1; } if (t_flags & TF_DELACK) { db_printf("%sTF_DELACK", comma ? ", " : ""); comma = 1; } if (t_flags & TF_NODELAY) { db_printf("%sTF_NODELAY", comma ? ", " : ""); comma = 1; } if (t_flags & TF_NOOPT) { db_printf("%sTF_NOOPT", comma ? ", " : ""); comma = 1; } if (t_flags & TF_SENTFIN) { db_printf("%sTF_SENTFIN", comma ? ", " : ""); comma = 1; } if (t_flags & TF_REQ_SCALE) { db_printf("%sTF_REQ_SCALE", comma ? ", " : ""); comma = 1; } if (t_flags & TF_RCVD_SCALE) { db_printf("%sTF_RECVD_SCALE", comma ? ", " : ""); comma = 1; } if (t_flags & TF_REQ_TSTMP) { db_printf("%sTF_REQ_TSTMP", comma ? ", " : ""); comma = 1; } if (t_flags & TF_RCVD_TSTMP) { db_printf("%sTF_RCVD_TSTMP", comma ? ", " : ""); comma = 1; } if (t_flags & TF_SACK_PERMIT) { db_printf("%sTF_SACK_PERMIT", comma ? ", " : ""); comma = 1; } if (t_flags & TF_NEEDSYN) { db_printf("%sTF_NEEDSYN", comma ? ", " : ""); comma = 1; } if (t_flags & TF_NEEDFIN) { db_printf("%sTF_NEEDFIN", comma ? ", " : ""); comma = 1; } if (t_flags & TF_NOPUSH) { db_printf("%sTF_NOPUSH", comma ? ", " : ""); comma = 1; } if (t_flags & TF_PREVVALID) { db_printf("%sTF_PREVVALID", comma ? ", " : ""); comma = 1; } if (t_flags & TF_MORETOCOME) { db_printf("%sTF_MORETOCOME", comma ? ", " : ""); comma = 1; } if (t_flags & TF_SONOTCONN) { db_printf("%sTF_SONOTCONN", comma ? ", " : ""); comma = 1; } if (t_flags & TF_LASTIDLE) { db_printf("%sTF_LASTIDLE", comma ? ", " : ""); comma = 1; } if (t_flags & TF_RXWIN0SENT) { db_printf("%sTF_RXWIN0SENT", comma ? ", " : ""); comma = 1; } if (t_flags & TF_FASTRECOVERY) { db_printf("%sTF_FASTRECOVERY", comma ? ", " : ""); comma = 1; } if (t_flags & TF_CONGRECOVERY) { db_printf("%sTF_CONGRECOVERY", comma ? ", " : ""); comma = 1; } if (t_flags & TF_WASFRECOVERY) { db_printf("%sTF_WASFRECOVERY", comma ? ", " : ""); comma = 1; } if (t_flags & TF_WASCRECOVERY) { db_printf("%sTF_WASCRECOVERY", comma ? ", " : ""); comma = 1; } if (t_flags & TF_SIGNATURE) { db_printf("%sTF_SIGNATURE", comma ? ", " : ""); comma = 1; } if (t_flags & TF_FORCEDATA) { db_printf("%sTF_FORCEDATA", comma ? ", " : ""); comma = 1; } if (t_flags & TF_TSO) { db_printf("%sTF_TSO", comma ? ", " : ""); comma = 1; } if (t_flags & TF_FASTOPEN) { db_printf("%sTF_FASTOPEN", comma ? ", " : ""); comma = 1; } } static void db_print_tflags2(u_int t_flags2) { int comma; comma = 0; if (t_flags2 & TF2_PLPMTU_BLACKHOLE) { db_printf("%sTF2_PLPMTU_BLACKHOLE", comma ? ", " : ""); comma = 1; } if (t_flags2 & TF2_PLPMTU_PMTUD) { db_printf("%sTF2_PLPMTU_PMTUD", comma ? ", " : ""); comma = 1; } if (t_flags2 & TF2_PLPMTU_MAXSEGSNT) { db_printf("%sTF2_PLPMTU_MAXSEGSNT", comma ? ", " : ""); comma = 1; } if (t_flags2 & TF2_LOG_AUTO) { db_printf("%sTF2_LOG_AUTO", comma ? ", " : ""); comma = 1; } if (t_flags2 & TF2_DROP_AF_DATA) { db_printf("%sTF2_DROP_AF_DATA", comma ? ", " : ""); comma = 1; } if (t_flags2 & TF2_ECN_PERMIT) { db_printf("%sTF2_ECN_PERMIT", comma ? ", " : ""); comma = 1; } if (t_flags2 & TF2_ECN_SND_CWR) { db_printf("%sTF2_ECN_SND_CWR", comma ? ", " : ""); comma = 1; } if (t_flags2 & TF2_ECN_SND_ECE) { db_printf("%sTF2_ECN_SND_ECE", comma ? ", " : ""); comma = 1; } if (t_flags2 & TF2_ACE_PERMIT) { db_printf("%sTF2_ACE_PERMIT", comma ? ", " : ""); comma = 1; } if (t_flags2 & TF2_FBYTES_COMPLETE) { db_printf("%sTF2_FBYTES_COMPLETE", comma ? ", " : ""); comma = 1; } } static void db_print_toobflags(char t_oobflags) { int comma; comma = 0; if (t_oobflags & TCPOOB_HAVEDATA) { db_printf("%sTCPOOB_HAVEDATA", comma ? ", " : ""); comma = 1; } if (t_oobflags & TCPOOB_HADDATA) { db_printf("%sTCPOOB_HADDATA", comma ? ", " : ""); comma = 1; } } static void db_print_tcpcb(struct tcpcb *tp, const char *name, int indent) { db_print_indent(indent); db_printf("%s at %p\n", name, tp); indent += 2; db_print_indent(indent); db_printf("t_segq first: %p t_segqlen: %d t_dupacks: %d\n", TAILQ_FIRST(&tp->t_segq), tp->t_segqlen, tp->t_dupacks); db_print_indent(indent); db_printf("t_callout: %p t_timers: %p\n", &tp->t_callout, &tp->t_timers); db_print_indent(indent); db_printf("t_state: %d (", tp->t_state); db_print_tstate(tp->t_state); db_printf(")\n"); db_print_indent(indent); db_printf("t_flags: 0x%x (", tp->t_flags); db_print_tflags(tp->t_flags); db_printf(")\n"); db_print_indent(indent); db_printf("t_flags2: 0x%x (", tp->t_flags2); db_print_tflags2(tp->t_flags2); db_printf(")\n"); db_print_indent(indent); db_printf("snd_una: 0x%08x snd_max: 0x%08x snd_nxt: 0x%08x\n", tp->snd_una, tp->snd_max, tp->snd_nxt); db_print_indent(indent); db_printf("snd_up: 0x%08x snd_wl1: 0x%08x snd_wl2: 0x%08x\n", tp->snd_up, tp->snd_wl1, tp->snd_wl2); db_print_indent(indent); db_printf("iss: 0x%08x irs: 0x%08x rcv_nxt: 0x%08x\n", tp->iss, tp->irs, tp->rcv_nxt); db_print_indent(indent); db_printf("rcv_adv: 0x%08x rcv_wnd: %u rcv_up: 0x%08x\n", tp->rcv_adv, tp->rcv_wnd, tp->rcv_up); db_print_indent(indent); db_printf("snd_wnd: %u snd_cwnd: %u\n", tp->snd_wnd, tp->snd_cwnd); db_print_indent(indent); db_printf("snd_ssthresh: %u snd_recover: " "0x%08x\n", tp->snd_ssthresh, tp->snd_recover); db_print_indent(indent); db_printf("t_rcvtime: %u t_startime: %u\n", tp->t_rcvtime, tp->t_starttime); db_print_indent(indent); db_printf("t_rttime: %u t_rtsq: 0x%08x\n", tp->t_rtttime, tp->t_rtseq); db_print_indent(indent); db_printf("t_rxtcur: %d t_maxseg: %u t_srtt: %d\n", tp->t_rxtcur, tp->t_maxseg, tp->t_srtt); db_print_indent(indent); db_printf("t_rttvar: %d t_rxtshift: %d t_rttmin: %u\n", tp->t_rttvar, tp->t_rxtshift, tp->t_rttmin); db_print_indent(indent); db_printf("t_rttupdated: %u max_sndwnd: %u t_softerror: %d\n", tp->t_rttupdated, tp->max_sndwnd, tp->t_softerror); db_print_indent(indent); db_printf("t_oobflags: 0x%x (", tp->t_oobflags); db_print_toobflags(tp->t_oobflags); db_printf(") t_iobc: 0x%02x\n", tp->t_iobc); db_print_indent(indent); db_printf("snd_scale: %u rcv_scale: %u request_r_scale: %u\n", tp->snd_scale, tp->rcv_scale, tp->request_r_scale); db_print_indent(indent); db_printf("ts_recent: %u ts_recent_age: %u\n", tp->ts_recent, tp->ts_recent_age); db_print_indent(indent); db_printf("ts_offset: %u last_ack_sent: 0x%08x snd_cwnd_prev: " "%u\n", tp->ts_offset, tp->last_ack_sent, tp->snd_cwnd_prev); db_print_indent(indent); db_printf("snd_ssthresh_prev: %u snd_recover_prev: 0x%08x " "t_badrxtwin: %u\n", tp->snd_ssthresh_prev, tp->snd_recover_prev, tp->t_badrxtwin); db_print_indent(indent); db_printf("snd_numholes: %d snd_holes first: %p\n", tp->snd_numholes, TAILQ_FIRST(&tp->snd_holes)); db_print_indent(indent); db_printf("snd_fack: 0x%08x rcv_numsacks: %d\n", tp->snd_fack, tp->rcv_numsacks); /* Skip sackblks, sackhint. */ db_print_indent(indent); db_printf("t_rttlow: %d rfbuf_ts: %u rfbuf_cnt: %d\n", tp->t_rttlow, tp->rfbuf_ts, tp->rfbuf_cnt); } DB_SHOW_COMMAND(tcpcb, db_show_tcpcb) { struct tcpcb *tp; if (!have_addr) { db_printf("usage: show tcpcb \n"); return; } tp = (struct tcpcb *)addr; db_print_tcpcb(tp, "tcpcb", 0); } #endif diff --git a/sys/netinet6/in6_pcb.c b/sys/netinet6/in6_pcb.c index 375f762e1cc1..d44e5c2cddb5 100644 --- a/sys/netinet6/in6_pcb.c +++ b/sys/netinet6/in6_pcb.c @@ -1,1263 +1,1244 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * Copyright (c) 2010-2011 Juniper Networks, Inc. * All rights reserved. * * Portions of this software were developed by Robert N. M. Watson under * contract to Juniper Networks, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: in6_pcb.c,v 1.31 2001/05/21 05:45:10 jinmei Exp $ */ /*- * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_route.h" #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include int in6_pcbsetport(struct in6_addr *laddr, struct inpcb *inp, struct ucred *cred) { struct socket *so = inp->inp_socket; u_int16_t lport = 0; int error, lookupflags = 0; #ifdef INVARIANTS struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; #endif INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(pcbinfo); error = prison_local_ip6(cred, laddr, ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)); if (error) return(error); /* XXX: this is redundant when called from in6_pcbbind */ if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT|SO_REUSEPORT_LB)) == 0) lookupflags = INPLOOKUP_WILDCARD; inp->inp_flags |= INP_ANONPORT; error = in_pcb_lport(inp, NULL, &lport, cred, lookupflags); if (error != 0) return (error); inp->inp_lport = lport; if (in_pcbinshash(inp) != 0) { inp->in6p_laddr = in6addr_any; inp->inp_lport = 0; return (EAGAIN); } return (0); } int in6_pcbbind(struct inpcb *inp, struct sockaddr_in6 *sin6, struct ucred *cred) { struct socket *so = inp->inp_socket; struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; u_short lport = 0; int error, lookupflags = 0; int reuseport = (so->so_options & SO_REUSEPORT); /* * XXX: Maybe we could let SO_REUSEPORT_LB set SO_REUSEPORT bit here * so that we don't have to add to the (already messy) code below. */ int reuseport_lb = (so->so_options & SO_REUSEPORT_LB); INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(pcbinfo); if (inp->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) return (EINVAL); if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT|SO_REUSEPORT_LB)) == 0) lookupflags = INPLOOKUP_WILDCARD; if (sin6 == NULL) { if ((error = prison_local_ip6(cred, &inp->in6p_laddr, ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0) return (error); } else { KASSERT(sin6->sin6_family == AF_INET6, ("%s: invalid address family for %p", __func__, sin6)); KASSERT(sin6->sin6_len == sizeof(*sin6), ("%s: invalid address length for %p", __func__, sin6)); if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0) return(error); if ((error = prison_local_ip6(cred, &sin6->sin6_addr, ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0) return (error); lport = sin6->sin6_port; if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { /* * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; * allow compepte duplication of binding if * SO_REUSEPORT is set, or if SO_REUSEADDR is set * and a multicast address is bound on both * new and duplicated sockets. */ if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) != 0) reuseport = SO_REUSEADDR|SO_REUSEPORT; /* * XXX: How to deal with SO_REUSEPORT_LB here? * Treat same as SO_REUSEPORT for now. */ if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT_LB)) != 0) reuseport_lb = SO_REUSEADDR|SO_REUSEPORT_LB; } else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { struct epoch_tracker et; struct ifaddr *ifa; sin6->sin6_port = 0; /* yech... */ NET_EPOCH_ENTER(et); if ((ifa = ifa_ifwithaddr((struct sockaddr *)sin6)) == NULL && (inp->inp_flags & INP_BINDANY) == 0) { NET_EPOCH_EXIT(et); return (EADDRNOTAVAIL); } /* * XXX: bind to an anycast address might accidentally * cause sending a packet with anycast source address. * We should allow to bind to a deprecated address, since * the application dares to use it. */ if (ifa != NULL && ((struct in6_ifaddr *)ifa)->ia6_flags & (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY|IN6_IFF_DETACHED)) { NET_EPOCH_EXIT(et); return (EADDRNOTAVAIL); } NET_EPOCH_EXIT(et); } if (lport) { struct inpcb *t; /* GROSS */ if (ntohs(lport) <= V_ipport_reservedhigh && ntohs(lport) >= V_ipport_reservedlow && priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT)) return (EACCES); if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) && priv_check_cred(inp->inp_cred, PRIV_NETINET_REUSEPORT) != 0) { t = in6_pcblookup_local(pcbinfo, &sin6->sin6_addr, lport, INPLOOKUP_WILDCARD, cred); if (t != NULL && (so->so_type != SOCK_STREAM || IN6_IS_ADDR_UNSPECIFIED(&t->in6p_faddr)) && (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || !IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) || (t->inp_socket->so_options & SO_REUSEPORT) || (t->inp_socket->so_options & SO_REUSEPORT_LB) == 0) && (inp->inp_cred->cr_uid != t->inp_cred->cr_uid)) return (EADDRINUSE); #ifdef INET if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 && IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { struct sockaddr_in sin; in6_sin6_2_sin(&sin, sin6); t = in_pcblookup_local(pcbinfo, sin.sin_addr, lport, INPLOOKUP_WILDCARD, cred); if (t != NULL && (so->so_type != SOCK_STREAM || ntohl(t->inp_faddr.s_addr) == INADDR_ANY) && (inp->inp_cred->cr_uid != t->inp_cred->cr_uid)) return (EADDRINUSE); } #endif } t = in6_pcblookup_local(pcbinfo, &sin6->sin6_addr, lport, lookupflags, cred); if (t && (reuseport & t->inp_socket->so_options) == 0 && (reuseport_lb & t->inp_socket->so_options) == 0) { return (EADDRINUSE); } #ifdef INET if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 && IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { struct sockaddr_in sin; in6_sin6_2_sin(&sin, sin6); t = in_pcblookup_local(pcbinfo, sin.sin_addr, lport, lookupflags, cred); if (t && (reuseport & t->inp_socket->so_options) == 0 && (reuseport_lb & t->inp_socket->so_options) == 0 && (ntohl(t->inp_laddr.s_addr) != INADDR_ANY || (t->inp_vflag & INP_IPV6PROTO) != 0)) { return (EADDRINUSE); } } #endif } inp->in6p_laddr = sin6->sin6_addr; } if (lport == 0) { if ((error = in6_pcbsetport(&inp->in6p_laddr, inp, cred)) != 0) { /* Undo an address bind that may have occurred. */ inp->in6p_laddr = in6addr_any; return (error); } } else { inp->inp_lport = lport; if (in_pcbinshash(inp) != 0) { inp->in6p_laddr = in6addr_any; inp->inp_lport = 0; return (EAGAIN); } } return (0); } /* * Transform old in6_pcbconnect() into an inner subroutine for new * in6_pcbconnect(): Do some validity-checking on the remote * address (in mbuf 'nam') and then determine local host address * (i.e., which interface) to use to access that remote host. * * This preserves definition of in6_pcbconnect(), while supporting a * slightly different version for T/TCP. (This is more than * a bit of a kludge, but cleaning up the internal interfaces would * have forced minor changes in every protocol). */ static int in6_pcbladdr(struct inpcb *inp, struct sockaddr_in6 *sin6, struct in6_addr *plocal_addr6, bool sas_required) { int error = 0; int scope_ambiguous = 0; struct in6_addr in6a; NET_EPOCH_ASSERT(); INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo); /* XXXRW: why? */ if (sin6->sin6_port == 0) return (EADDRNOTAVAIL); if (sin6->sin6_scope_id == 0 && !V_ip6_use_defzone) scope_ambiguous = 1; if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0) return(error); if (!CK_STAILQ_EMPTY(&V_in6_ifaddrhead)) { /* * If the destination address is UNSPECIFIED addr, * use the loopback addr, e.g ::1. */ if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) sin6->sin6_addr = in6addr_loopback; } if ((error = prison_remote_ip6(inp->inp_cred, &sin6->sin6_addr)) != 0) return (error); if (sas_required) { error = in6_selectsrc_socket(sin6, inp->in6p_outputopts, inp, inp->inp_cred, scope_ambiguous, &in6a, NULL); if (error) return (error); } else { /* * Source address selection isn't required when syncache * has already established connection and both source and * destination addresses was chosen. * * This also includes the case when fwd_tag was used to * select source address in tcp_input(). */ in6a = inp->in6p_laddr; } if (IN6_IS_ADDR_UNSPECIFIED(&in6a)) return (EHOSTUNREACH); /* * Do not update this earlier, in case we return with an error. * * XXX: this in6_selectsrc_socket result might replace the bound local * address with the address specified by setsockopt(IPV6_PKTINFO). * Is it the intended behavior? */ *plocal_addr6 = in6a; /* * Don't do pcblookup call here; return interface in * plocal_addr6 * and exit to caller, that will do the lookup. */ return (0); } /* * Outer subroutine: * Connect from a socket to a specified address. * Both address and port must be specified in argument sin. * If don't have a local address for this socket yet, * then pick one. */ int in6_pcbconnect(struct inpcb *inp, struct sockaddr_in6 *sin6, struct ucred *cred, bool sas_required) { struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; struct sockaddr_in6 laddr6; int error; NET_EPOCH_ASSERT(); INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(pcbinfo); KASSERT(sin6->sin6_family == AF_INET6, ("%s: invalid address family for %p", __func__, sin6)); KASSERT(sin6->sin6_len == sizeof(*sin6), ("%s: invalid address length for %p", __func__, sin6)); KASSERT(IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr), ("%s: inp is already connected", __func__)); bzero(&laddr6, sizeof(laddr6)); laddr6.sin6_family = AF_INET6; #ifdef ROUTE_MPATH if (CALC_FLOWID_OUTBOUND) { uint32_t hash_type, hash_val; hash_val = fib6_calc_software_hash(&inp->in6p_laddr, &sin6->sin6_addr, 0, sin6->sin6_port, inp->inp_socket->so_proto->pr_protocol, &hash_type); inp->inp_flowid = hash_val; inp->inp_flowtype = hash_type; } #endif /* * Call inner routine, to assign local interface address. * in6_pcbladdr() may automatically fill in sin6_scope_id. */ if ((error = in6_pcbladdr(inp, sin6, &laddr6.sin6_addr, sas_required)) != 0) return (error); if (in6_pcblookup_hash_locked(pcbinfo, &sin6->sin6_addr, sin6->sin6_port, IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) ? &laddr6.sin6_addr : &inp->in6p_laddr, inp->inp_lport, 0, M_NODOM) != NULL) return (EADDRINUSE); if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { if (inp->inp_lport == 0) { error = in_pcb_lport_dest(inp, (struct sockaddr *) &laddr6, &inp->inp_lport, (struct sockaddr *) sin6, sin6->sin6_port, cred, INPLOOKUP_WILDCARD); if (error) return (error); } inp->in6p_laddr = laddr6.sin6_addr; } inp->in6p_faddr = sin6->sin6_addr; inp->inp_fport = sin6->sin6_port; /* update flowinfo - draft-itojun-ipv6-flowlabel-api-00 */ inp->inp_flow &= ~IPV6_FLOWLABEL_MASK; if (inp->inp_flags & IN6P_AUTOFLOWLABEL) inp->inp_flow |= (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK); if ((inp->inp_flags & INP_INHASHLIST) != 0) { in_pcbrehash(inp); } else { in_pcbinshash(inp); } return (0); } void in6_pcbdisconnect(struct inpcb *inp) { INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo); KASSERT(inp->inp_smr == SMR_SEQ_INVALID, ("%s: inp %p was already disconnected", __func__, inp)); in_pcbremhash_locked(inp); /* See the comment in in_pcbinshash(). */ inp->inp_smr = smr_advance(inp->inp_pcbinfo->ipi_smr); /* XXX-MJ torn writes are visible to SMR lookup */ memset(&inp->in6p_laddr, 0, sizeof(inp->in6p_laddr)); memset(&inp->in6p_faddr, 0, sizeof(inp->in6p_faddr)); inp->inp_fport = 0; /* clear flowinfo - draft-itojun-ipv6-flowlabel-api-00 */ inp->inp_flow &= ~IPV6_FLOWLABEL_MASK; } struct sockaddr * in6_sockaddr(in_port_t port, struct in6_addr *addr_p) { struct sockaddr_in6 *sin6; sin6 = malloc(sizeof *sin6, M_SONAME, M_WAITOK); bzero(sin6, sizeof *sin6); sin6->sin6_family = AF_INET6; sin6->sin6_len = sizeof(*sin6); sin6->sin6_port = port; sin6->sin6_addr = *addr_p; (void)sa6_recoverscope(sin6); /* XXX: should catch errors */ return (struct sockaddr *)sin6; } -struct sockaddr * -in6_v4mapsin6_sockaddr(in_port_t port, struct in_addr *addr_p) -{ - struct sockaddr_in sin; - struct sockaddr_in6 *sin6_p; - - bzero(&sin, sizeof sin); - sin.sin_family = AF_INET; - sin.sin_len = sizeof(sin); - sin.sin_port = port; - sin.sin_addr = *addr_p; - - sin6_p = malloc(sizeof *sin6_p, M_SONAME, - M_WAITOK); - in6_sin_2_v4mapsin6(&sin, sin6_p); - - return (struct sockaddr *)sin6_p; -} - int in6_getsockaddr(struct socket *so, struct sockaddr **nam) { struct inpcb *inp; struct in6_addr addr; in_port_t port; inp = sotoinpcb(so); KASSERT(inp != NULL, ("in6_getsockaddr: inp == NULL")); INP_RLOCK(inp); port = inp->inp_lport; addr = inp->in6p_laddr; INP_RUNLOCK(inp); *nam = in6_sockaddr(port, &addr); return 0; } int in6_getpeeraddr(struct socket *so, struct sockaddr **nam) { struct inpcb *inp; struct in6_addr addr; in_port_t port; inp = sotoinpcb(so); KASSERT(inp != NULL, ("in6_getpeeraddr: inp == NULL")); INP_RLOCK(inp); port = inp->inp_fport; addr = inp->in6p_faddr; INP_RUNLOCK(inp); *nam = in6_sockaddr(port, &addr); return 0; } int in6_mapped_sockaddr(struct socket *so, struct sockaddr **nam) { struct inpcb *inp; int error; inp = sotoinpcb(so); KASSERT(inp != NULL, ("in6_mapped_sockaddr: inp == NULL")); #ifdef INET if ((inp->inp_vflag & (INP_IPV4 | INP_IPV6)) == INP_IPV4) { error = in_getsockaddr(so, nam); if (error == 0) in6_sin_2_v4mapsin6_in_sock(nam); } else #endif { /* scope issues will be handled in in6_getsockaddr(). */ error = in6_getsockaddr(so, nam); } return error; } int in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam) { struct inpcb *inp; int error; inp = sotoinpcb(so); KASSERT(inp != NULL, ("in6_mapped_peeraddr: inp == NULL")); #ifdef INET if ((inp->inp_vflag & (INP_IPV4 | INP_IPV6)) == INP_IPV4) { error = in_getpeeraddr(so, nam); if (error == 0) in6_sin_2_v4mapsin6_in_sock(nam); } else #endif /* scope issues will be handled in in6_getpeeraddr(). */ error = in6_getpeeraddr(so, nam); return error; } /* * Pass some notification to all connections of a protocol * associated with address dst. The local address and/or port numbers * may be specified to limit the search. The "usual action" will be * taken, depending on the ctlinput cmd. The caller must filter any * cmds that are uninteresting (e.g., no error in the map). * Call the protocol specific routine (if any) to report * any errors for each matching socket. */ static bool inp_match6(const struct inpcb *inp, void *v __unused) { return ((inp->inp_vflag & INP_IPV6) != 0); } void in6_pcbnotify(struct inpcbinfo *pcbinfo, struct sockaddr_in6 *sa6_dst, u_int fport_arg, const struct sockaddr_in6 *src, u_int lport_arg, int errno, void *cmdarg, struct inpcb *(*notify)(struct inpcb *, int)) { struct inpcb_iterator inpi = INP_ITERATOR(pcbinfo, INPLOOKUP_WLOCKPCB, inp_match6, NULL); struct inpcb *inp; struct sockaddr_in6 sa6_src; u_short fport = fport_arg, lport = lport_arg; u_int32_t flowinfo; if (IN6_IS_ADDR_UNSPECIFIED(&sa6_dst->sin6_addr)) return; /* * note that src can be NULL when we get notify by local fragmentation. */ sa6_src = (src == NULL) ? sa6_any : *src; flowinfo = sa6_src.sin6_flowinfo; while ((inp = inp_next(&inpi)) != NULL) { INP_WLOCK_ASSERT(inp); /* * If the error designates a new path MTU for a destination * and the application (associated with this socket) wanted to * know the value, notify. * XXX: should we avoid to notify the value to TCP sockets? */ if (errno == EMSGSIZE && cmdarg != NULL) ip6_notify_pmtu(inp, sa6_dst, *(uint32_t *)cmdarg); /* * Detect if we should notify the error. If no source and * destination ports are specified, but non-zero flowinfo and * local address match, notify the error. This is the case * when the error is delivered with an encrypted buffer * by ESP. Otherwise, just compare addresses and ports * as usual. */ if (lport == 0 && fport == 0 && flowinfo && inp->inp_socket != NULL && flowinfo == (inp->inp_flow & IPV6_FLOWLABEL_MASK) && IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &sa6_src.sin6_addr)) goto do_notify; else if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &sa6_dst->sin6_addr) || inp->inp_socket == 0 || (lport && inp->inp_lport != lport) || (!IN6_IS_ADDR_UNSPECIFIED(&sa6_src.sin6_addr) && !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &sa6_src.sin6_addr)) || (fport && inp->inp_fport != fport)) { continue; } do_notify: if (notify) (*notify)(inp, errno); } } /* * Lookup a PCB based on the local address and port. Caller must hold the * hash lock. No inpcb locks or references are acquired. */ struct inpcb * in6_pcblookup_local(struct inpcbinfo *pcbinfo, struct in6_addr *laddr, u_short lport, int lookupflags, struct ucred *cred) { struct inpcb *inp; int matchwild = 3, wildcard; KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0, ("%s: invalid lookup flags %d", __func__, lookupflags)); INP_HASH_LOCK_ASSERT(pcbinfo); if ((lookupflags & INPLOOKUP_WILDCARD) == 0) { struct inpcbhead *head; /* * Look for an unconnected (wildcard foreign addr) PCB that * matches the local address and port we're looking for. */ head = &pcbinfo->ipi_hash_wild[INP_PCBHASH_WILD(lport, pcbinfo->ipi_hashmask)]; CK_LIST_FOREACH(inp, head, inp_hash_wild) { /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV6) == 0) continue; if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) && IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) && inp->inp_lport == lport) { /* Found. */ if (prison_equal_ip6(cred->cr_prison, inp->inp_cred->cr_prison)) return (inp); } } /* * Not found. */ return (NULL); } else { struct inpcbporthead *porthash; struct inpcbport *phd; struct inpcb *match = NULL; /* * Best fit PCB lookup. * * First see if this local port is in use by looking on the * port hash list. */ porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport, pcbinfo->ipi_porthashmask)]; CK_LIST_FOREACH(phd, porthash, phd_hash) { if (phd->phd_port == lport) break; } if (phd != NULL) { /* * Port is in use by one or more PCBs. Look for best * fit. */ CK_LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { wildcard = 0; if (!prison_equal_ip6(cred->cr_prison, inp->inp_cred->cr_prison)) continue; /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV6) == 0) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) wildcard++; if (!IN6_IS_ADDR_UNSPECIFIED( &inp->in6p_laddr)) { if (IN6_IS_ADDR_UNSPECIFIED(laddr)) wildcard++; else if (!IN6_ARE_ADDR_EQUAL( &inp->in6p_laddr, laddr)) continue; } else { if (!IN6_IS_ADDR_UNSPECIFIED(laddr)) wildcard++; } if (wildcard < matchwild) { match = inp; matchwild = wildcard; if (matchwild == 0) break; } } } return (match); } } static bool in6_multi_match(const struct inpcb *inp, void *v __unused) { if ((inp->inp_vflag & INP_IPV6) && inp->in6p_moptions != NULL) return (true); else return (false); } void in6_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp) { struct inpcb_iterator inpi = INP_ITERATOR(pcbinfo, INPLOOKUP_RLOCKPCB, in6_multi_match, NULL); struct inpcb *inp; struct in6_multi *inm; struct in6_mfilter *imf; struct ip6_moptions *im6o; IN6_MULTI_LOCK_ASSERT(); while ((inp = inp_next(&inpi)) != NULL) { INP_RLOCK_ASSERT(inp); im6o = inp->in6p_moptions; /* * Unselect the outgoing ifp for multicast if it * is being detached. */ if (im6o->im6o_multicast_ifp == ifp) im6o->im6o_multicast_ifp = NULL; /* * Drop multicast group membership if we joined * through the interface being detached. */ restart: IP6_MFILTER_FOREACH(imf, &im6o->im6o_head) { if ((inm = imf->im6f_in6m) == NULL) continue; if (inm->in6m_ifp != ifp) continue; ip6_mfilter_remove(&im6o->im6o_head, imf); in6_leavegroup_locked(inm, NULL); ip6_mfilter_free(imf); goto restart; } } } /* * Check for alternatives when higher level complains * about service problems. For now, invalidate cached * routing information. If the route was created dynamically * (by a redirect), time to try a default gateway again. */ void in6_losing(struct inpcb *inp) { RO_INVALIDATE_CACHE(&inp->inp_route6); } /* * After a routing change, flush old routing * and allocate a (hopefully) better one. */ struct inpcb * in6_rtchange(struct inpcb *inp, int errno __unused) { RO_INVALIDATE_CACHE(&inp->inp_route6); return inp; } static bool in6_pcblookup_lb_numa_match(const struct inpcblbgroup *grp, int domain) { return (domain == M_NODOM || domain == grp->il_numa_domain); } static struct inpcb * in6_pcblookup_lbgroup(const struct inpcbinfo *pcbinfo, const struct in6_addr *faddr, uint16_t fport, const struct in6_addr *laddr, uint16_t lport, uint8_t domain) { const struct inpcblbgrouphead *hdr; struct inpcblbgroup *grp; struct inpcblbgroup *jail_exact, *jail_wild, *local_exact, *local_wild; INP_HASH_LOCK_ASSERT(pcbinfo); hdr = &pcbinfo->ipi_lbgrouphashbase[ INP_PCBPORTHASH(lport, pcbinfo->ipi_lbgrouphashmask)]; /* * Search for an LB group match based on the following criteria: * - prefer jailed groups to non-jailed groups * - prefer exact source address matches to wildcard matches * - prefer groups bound to the specified NUMA domain */ jail_exact = jail_wild = local_exact = local_wild = NULL; CK_LIST_FOREACH(grp, hdr, il_list) { bool injail; #ifdef INET if (!(grp->il_vflag & INP_IPV6)) continue; #endif if (grp->il_lport != lport) continue; injail = prison_flag(grp->il_cred, PR_IP6) != 0; if (injail && prison_check_ip6_locked(grp->il_cred->cr_prison, laddr) != 0) continue; if (IN6_ARE_ADDR_EQUAL(&grp->il6_laddr, laddr)) { if (injail) { jail_exact = grp; if (in6_pcblookup_lb_numa_match(grp, domain)) /* This is a perfect match. */ goto out; } else if (local_exact == NULL || in6_pcblookup_lb_numa_match(grp, domain)) { local_exact = grp; } } else if (IN6_IS_ADDR_UNSPECIFIED(&grp->il6_laddr)) { if (injail) { if (jail_wild == NULL || in6_pcblookup_lb_numa_match(grp, domain)) jail_wild = grp; } else if (local_wild == NULL || in6_pcblookup_lb_numa_match(grp, domain)) { local_wild = grp; } } } if (jail_exact != NULL) grp = jail_exact; else if (jail_wild != NULL) grp = jail_wild; else if (local_exact != NULL) grp = local_exact; else grp = local_wild; if (grp == NULL) return (NULL); out: return (grp->il_inp[INP6_PCBLBGROUP_PKTHASH(faddr, lport, fport) % grp->il_inpcnt]); } static bool in6_pcblookup_exact_match(const struct inpcb *inp, const struct in6_addr *faddr, u_short fport, const struct in6_addr *laddr, u_short lport) { /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV6) == 0) return (false); if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) && IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) && inp->inp_fport == fport && inp->inp_lport == lport) return (true); return (false); } static struct inpcb * in6_pcblookup_hash_exact(struct inpcbinfo *pcbinfo, const struct in6_addr *faddr, u_short fport, const struct in6_addr *laddr, u_short lport) { struct inpcbhead *head; struct inpcb *inp; INP_HASH_LOCK_ASSERT(pcbinfo); /* * First look for an exact match. */ head = &pcbinfo->ipi_hash_exact[INP6_PCBHASH(faddr, lport, fport, pcbinfo->ipi_hashmask)]; CK_LIST_FOREACH(inp, head, inp_hash_exact) { if (in6_pcblookup_exact_match(inp, faddr, fport, laddr, lport)) return (inp); } return (NULL); } typedef enum { INPLOOKUP_MATCH_NONE = 0, INPLOOKUP_MATCH_WILD = 1, INPLOOKUP_MATCH_LADDR = 2, } inp_lookup_match_t; static inp_lookup_match_t in6_pcblookup_wild_match(const struct inpcb *inp, const struct in6_addr *laddr, u_short lport) { /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV6) == 0) return (INPLOOKUP_MATCH_NONE); if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) || inp->inp_lport != lport) return (INPLOOKUP_MATCH_NONE); if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) return (INPLOOKUP_MATCH_WILD); if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr)) return (INPLOOKUP_MATCH_LADDR); return (INPLOOKUP_MATCH_NONE); } #define INP_LOOKUP_AGAIN ((struct inpcb *)(uintptr_t)-1) static struct inpcb * in6_pcblookup_hash_wild_smr(struct inpcbinfo *pcbinfo, const struct in6_addr *faddr, u_short fport, const struct in6_addr *laddr, u_short lport, const inp_lookup_t lockflags) { struct inpcbhead *head; struct inpcb *inp; KASSERT(SMR_ENTERED(pcbinfo->ipi_smr), ("%s: not in SMR read section", __func__)); head = &pcbinfo->ipi_hash_wild[INP_PCBHASH_WILD(lport, pcbinfo->ipi_hashmask)]; CK_LIST_FOREACH(inp, head, inp_hash_wild) { inp_lookup_match_t match; match = in6_pcblookup_wild_match(inp, laddr, lport); if (match == INPLOOKUP_MATCH_NONE) continue; if (__predict_true(inp_smr_lock(inp, lockflags))) { match = in6_pcblookup_wild_match(inp, laddr, lport); if (match != INPLOOKUP_MATCH_NONE && prison_check_ip6_locked(inp->inp_cred->cr_prison, laddr) == 0) return (inp); inp_unlock(inp, lockflags); } /* * The matching socket disappeared out from under us. Fall back * to a serialized lookup. */ return (INP_LOOKUP_AGAIN); } return (NULL); } static struct inpcb * in6_pcblookup_hash_wild_locked(struct inpcbinfo *pcbinfo, const struct in6_addr *faddr, u_short fport, const struct in6_addr *laddr, u_short lport) { struct inpcbhead *head; struct inpcb *inp, *jail_wild, *local_exact, *local_wild; INP_HASH_LOCK_ASSERT(pcbinfo); /* * Order of socket selection - we always prefer jails. * 1. jailed, non-wild. * 2. jailed, wild. * 3. non-jailed, non-wild. * 4. non-jailed, wild. */ head = &pcbinfo->ipi_hash_wild[INP_PCBHASH_WILD(lport, pcbinfo->ipi_hashmask)]; local_wild = local_exact = jail_wild = NULL; CK_LIST_FOREACH(inp, head, inp_hash_wild) { inp_lookup_match_t match; bool injail; match = in6_pcblookup_wild_match(inp, laddr, lport); if (match == INPLOOKUP_MATCH_NONE) continue; injail = prison_flag(inp->inp_cred, PR_IP6) != 0; if (injail) { if (prison_check_ip6_locked( inp->inp_cred->cr_prison, laddr) != 0) continue; } else { if (local_exact != NULL) continue; } if (match == INPLOOKUP_MATCH_LADDR) { if (injail) return (inp); else local_exact = inp; } else { if (injail) jail_wild = inp; else local_wild = inp; } } if (jail_wild != NULL) return (jail_wild); if (local_exact != NULL) return (local_exact); if (local_wild != NULL) return (local_wild); return (NULL); } struct inpcb * in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, const struct in6_addr *faddr, u_int fport_arg, const struct in6_addr *laddr, u_int lport_arg, int lookupflags, uint8_t numa_domain) { struct inpcb *inp; u_short fport = fport_arg, lport = lport_arg; KASSERT((lookupflags & ~INPLOOKUP_WILDCARD) == 0, ("%s: invalid lookup flags %d", __func__, lookupflags)); KASSERT(!IN6_IS_ADDR_UNSPECIFIED(faddr), ("%s: invalid foreign address", __func__)); KASSERT(!IN6_IS_ADDR_UNSPECIFIED(laddr), ("%s: invalid local address", __func__)); INP_HASH_LOCK_ASSERT(pcbinfo); inp = in6_pcblookup_hash_exact(pcbinfo, faddr, fport, laddr, lport); if (inp != NULL) return (inp); if ((lookupflags & INPLOOKUP_WILDCARD) != 0) { inp = in6_pcblookup_lbgroup(pcbinfo, faddr, fport, laddr, lport, numa_domain); if (inp == NULL) { inp = in6_pcblookup_hash_wild_locked(pcbinfo, faddr, fport, laddr, lport); } } return (inp); } static struct inpcb * in6_pcblookup_hash(struct inpcbinfo *pcbinfo, const struct in6_addr *faddr, u_int fport, const struct in6_addr *laddr, u_int lport, int lookupflags, uint8_t numa_domain) { struct inpcb *inp; const inp_lookup_t lockflags = lookupflags & INPLOOKUP_LOCKMASK; KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0, ("%s: LOCKPCB not set", __func__)); INP_HASH_WLOCK(pcbinfo); inp = in6_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport, lookupflags & ~INPLOOKUP_LOCKMASK, numa_domain); if (inp != NULL && !inp_trylock(inp, lockflags)) { in_pcbref(inp); INP_HASH_WUNLOCK(pcbinfo); inp_lock(inp, lockflags); if (in_pcbrele(inp, lockflags)) /* XXX-MJ or retry until we get a negative match? */ inp = NULL; } else { INP_HASH_WUNLOCK(pcbinfo); } return (inp); } static struct inpcb * in6_pcblookup_hash_smr(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, u_int fport_arg, struct in6_addr *laddr, u_int lport_arg, int lookupflags, uint8_t numa_domain) { struct inpcb *inp; const inp_lookup_t lockflags = lookupflags & INPLOOKUP_LOCKMASK; const u_short fport = fport_arg, lport = lport_arg; KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0, ("%s: invalid lookup flags %d", __func__, lookupflags)); KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0, ("%s: LOCKPCB not set", __func__)); smr_enter(pcbinfo->ipi_smr); inp = in6_pcblookup_hash_exact(pcbinfo, faddr, fport, laddr, lport); if (inp != NULL) { if (__predict_true(inp_smr_lock(inp, lockflags))) { if (__predict_true(in6_pcblookup_exact_match(inp, faddr, fport, laddr, lport))) return (inp); inp_unlock(inp, lockflags); } /* * We failed to lock the inpcb, or its connection state changed * out from under us. Fall back to a precise search. */ return (in6_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport, lookupflags, numa_domain)); } if ((lookupflags & INPLOOKUP_WILDCARD) != 0) { inp = in6_pcblookup_lbgroup(pcbinfo, faddr, fport, laddr, lport, numa_domain); if (inp != NULL) { if (__predict_true(inp_smr_lock(inp, lockflags))) { if (__predict_true(in6_pcblookup_wild_match(inp, laddr, lport) != INPLOOKUP_MATCH_NONE)) return (inp); inp_unlock(inp, lockflags); } inp = INP_LOOKUP_AGAIN; } else { inp = in6_pcblookup_hash_wild_smr(pcbinfo, faddr, fport, laddr, lport, lockflags); } if (inp == INP_LOOKUP_AGAIN) { return (in6_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport, lookupflags, numa_domain)); } } if (inp == NULL) smr_exit(pcbinfo->ipi_smr); return (inp); } /* * Public inpcb lookup routines, accepting a 4-tuple, and optionally, an mbuf * from which a pre-calculated hash value may be extracted. */ struct inpcb * in6_pcblookup(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, u_int fport, struct in6_addr *laddr, u_int lport, int lookupflags, struct ifnet *ifp __unused) { return (in6_pcblookup_hash_smr(pcbinfo, faddr, fport, laddr, lport, lookupflags, M_NODOM)); } struct inpcb * in6_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, u_int fport, struct in6_addr *laddr, u_int lport, int lookupflags, struct ifnet *ifp __unused, struct mbuf *m) { return (in6_pcblookup_hash_smr(pcbinfo, faddr, fport, laddr, lport, lookupflags, m->m_pkthdr.numa_domain)); } void init_sin6(struct sockaddr_in6 *sin6, struct mbuf *m, int srcordst) { struct ip6_hdr *ip; ip = mtod(m, struct ip6_hdr *); bzero(sin6, sizeof(*sin6)); sin6->sin6_len = sizeof(*sin6); sin6->sin6_family = AF_INET6; sin6->sin6_addr = srcordst ? ip->ip6_dst : ip->ip6_src; (void)sa6_recoverscope(sin6); /* XXX: should catch errors... */ return; } diff --git a/sys/netinet6/in6_pcb.h b/sys/netinet6/in6_pcb.h index 4eba1e47decb..acccc279fdc7 100644 --- a/sys/netinet6/in6_pcb.h +++ b/sys/netinet6/in6_pcb.h @@ -1,112 +1,110 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: in6_pcb.h,v 1.13 2001/02/06 09:16:53 itojun Exp $ */ /*- * Copyright (c) 1982, 1986, 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _NETINET6_IN6_PCB_H_ #define _NETINET6_IN6_PCB_H_ #ifdef _KERNEL #define satosin6(sa) ((struct sockaddr_in6 *)(sa)) #define sin6tosa(sin6) ((struct sockaddr *)(sin6)) #define ifatoia6(ifa) ((struct in6_ifaddr *)(ifa)) void in6_pcbpurgeif0(struct inpcbinfo *, struct ifnet *); void in6_losing(struct inpcb *); int in6_pcbbind(struct inpcb *, struct sockaddr_in6 *, struct ucred *); int in6_pcbconnect(struct inpcb *, struct sockaddr_in6 *, struct ucred *, bool); void in6_pcbdisconnect(struct inpcb *); struct inpcb * in6_pcblookup_local(struct inpcbinfo *, struct in6_addr *, u_short, int, struct ucred *); struct inpcb * in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, const struct in6_addr *faddr, u_int fport_arg, const struct in6_addr *laddr, u_int lport_arg, int lookupflags, uint8_t); struct inpcb * in6_pcblookup(struct inpcbinfo *, struct in6_addr *, u_int, struct in6_addr *, u_int, int, struct ifnet *); struct inpcb * in6_pcblookup_mbuf(struct inpcbinfo *, struct in6_addr *, u_int, struct in6_addr *, u_int, int, struct ifnet *ifp, struct mbuf *); void in6_pcbnotify(struct inpcbinfo *, struct sockaddr_in6 *, u_int, const struct sockaddr_in6 *, u_int, int, void *, struct inpcb *(*)(struct inpcb *, int)); struct inpcb * in6_rtchange(struct inpcb *, int); struct sockaddr * in6_sockaddr(in_port_t port, struct in6_addr *addr_p); -struct sockaddr * - in6_v4mapsin6_sockaddr(in_port_t port, struct in_addr *addr_p); int in6_getpeeraddr(struct socket *so, struct sockaddr **nam); int in6_getsockaddr(struct socket *so, struct sockaddr **nam); int in6_mapped_sockaddr(struct socket *so, struct sockaddr **nam); int in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam); int in6_selecthlim(struct inpcb *, struct ifnet *); int in6_pcbsetport(struct in6_addr *, struct inpcb *, struct ucred *); void init_sin6(struct sockaddr_in6 *sin6, struct mbuf *m, int); #endif /* _KERNEL */ #endif /* !_NETINET6_IN6_PCB_H_ */ diff --git a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c index f70c92e8b295..a2d5f12f27ac 100644 --- a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c +++ b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c @@ -1,1921 +1,1918 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 * The Regents of the University of California. All rights reserved. * Copyright (c) 2004 The FreeBSD Foundation. All rights reserved. * Copyright (c) 2004-2008 Robert N. M. Watson. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Excerpts taken from tcp_subr.c, tcp_usrreq.c, uipc_socket.c */ /* * * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ #include #include #include #include #include "sdp.h" #include #include #include #include uma_zone_t sdp_zone; struct rwlock sdp_lock; LIST_HEAD(, sdp_sock) sdp_list; struct workqueue_struct *rx_comp_wq; RW_SYSINIT(sdplockinit, &sdp_lock, "SDP lock"); #define SDP_LIST_WLOCK() rw_wlock(&sdp_lock) #define SDP_LIST_RLOCK() rw_rlock(&sdp_lock) #define SDP_LIST_WUNLOCK() rw_wunlock(&sdp_lock) #define SDP_LIST_RUNLOCK() rw_runlock(&sdp_lock) #define SDP_LIST_WLOCK_ASSERT() rw_assert(&sdp_lock, RW_WLOCKED) #define SDP_LIST_RLOCK_ASSERT() rw_assert(&sdp_lock, RW_RLOCKED) #define SDP_LIST_LOCK_ASSERT() rw_assert(&sdp_lock, RW_LOCKED) MALLOC_DEFINE(M_SDP, "sdp", "Sockets Direct Protocol"); static void sdp_stop_keepalive_timer(struct socket *so); /* * SDP protocol interface to socket abstraction. */ /* * sdp_sendspace and sdp_recvspace are the default send and receive window * sizes, respectively. */ u_long sdp_sendspace = 1024*32; u_long sdp_recvspace = 1024*64; static int sdp_count; /* * Disable async. CMA events for sockets which are being torn down. */ static void sdp_destroy_cma(struct sdp_sock *ssk) { if (ssk->id == NULL) return; rdma_destroy_id(ssk->id); ssk->id = NULL; } static int sdp_pcbbind(struct sdp_sock *ssk, struct sockaddr *nam, struct ucred *cred) { struct sockaddr_in *sin; struct sockaddr_in null; int error; SDP_WLOCK_ASSERT(ssk); if (ssk->lport != 0 || ssk->laddr != INADDR_ANY) return (EINVAL); /* rdma_bind_addr handles bind races. */ SDP_WUNLOCK(ssk); if (ssk->id == NULL) ssk->id = rdma_create_id(&init_net, sdp_cma_handler, ssk, RDMA_PS_SDP, IB_QPT_RC); if (ssk->id == NULL) { SDP_WLOCK(ssk); return (ENOMEM); } if (nam == NULL) { null.sin_family = AF_INET; null.sin_len = sizeof(null); null.sin_addr.s_addr = INADDR_ANY; null.sin_port = 0; bzero(&null.sin_zero, sizeof(null.sin_zero)); nam = (struct sockaddr *)&null; } error = -rdma_bind_addr(ssk->id, nam); SDP_WLOCK(ssk); if (error == 0) { sin = (struct sockaddr_in *)&ssk->id->route.addr.src_addr; ssk->laddr = sin->sin_addr.s_addr; ssk->lport = sin->sin_port; } else sdp_destroy_cma(ssk); return (error); } static void sdp_pcbfree(struct sdp_sock *ssk) { KASSERT(ssk->socket == NULL, ("ssk %p socket still attached", ssk)); KASSERT((ssk->flags & SDP_DESTROY) == 0, ("ssk %p already destroyed", ssk)); sdp_dbg(ssk->socket, "Freeing pcb"); SDP_WLOCK_ASSERT(ssk); ssk->flags |= SDP_DESTROY; SDP_WUNLOCK(ssk); SDP_LIST_WLOCK(); sdp_count--; LIST_REMOVE(ssk, list); SDP_LIST_WUNLOCK(); crfree(ssk->cred); ssk->qp_active = 0; if (ssk->qp) { ib_destroy_qp(ssk->qp); ssk->qp = NULL; } sdp_tx_ring_destroy(ssk); sdp_rx_ring_destroy(ssk); sdp_destroy_cma(ssk); rw_destroy(&ssk->rx_ring.destroyed_lock); rw_destroy(&ssk->lock); uma_zfree(sdp_zone, ssk); } /* * Common routines to return a socket address. */ static struct sockaddr * sdp_sockaddr(in_port_t port, struct in_addr *addr_p) { struct sockaddr_in *sin; sin = malloc(sizeof *sin, M_SONAME, M_WAITOK | M_ZERO); sin->sin_family = AF_INET; sin->sin_len = sizeof(*sin); sin->sin_addr = *addr_p; sin->sin_port = port; return (struct sockaddr *)sin; } static int sdp_getsockaddr(struct socket *so, struct sockaddr **nam) { struct sdp_sock *ssk; struct in_addr addr; in_port_t port; ssk = sdp_sk(so); SDP_RLOCK(ssk); port = ssk->lport; addr.s_addr = ssk->laddr; SDP_RUNLOCK(ssk); *nam = sdp_sockaddr(port, &addr); return 0; } static int sdp_getpeeraddr(struct socket *so, struct sockaddr **nam) { struct sdp_sock *ssk; struct in_addr addr; in_port_t port; ssk = sdp_sk(so); SDP_RLOCK(ssk); port = ssk->fport; addr.s_addr = ssk->faddr; SDP_RUNLOCK(ssk); *nam = sdp_sockaddr(port, &addr); return 0; } #if 0 static void sdp_apply_all(void (*func)(struct sdp_sock *, void *), void *arg) { struct sdp_sock *ssk; SDP_LIST_RLOCK(); LIST_FOREACH(ssk, &sdp_list, list) { SDP_WLOCK(ssk); func(ssk, arg); SDP_WUNLOCK(ssk); } SDP_LIST_RUNLOCK(); } #endif static void sdp_output_reset(struct sdp_sock *ssk) { struct rdma_cm_id *id; SDP_WLOCK_ASSERT(ssk); if (ssk->id) { id = ssk->id; ssk->qp_active = 0; SDP_WUNLOCK(ssk); rdma_disconnect(id); SDP_WLOCK(ssk); } ssk->state = TCPS_CLOSED; } /* * Attempt to close a SDP socket, marking it as dropped, and freeing * the socket if we hold the only reference. */ static struct sdp_sock * sdp_closed(struct sdp_sock *ssk) { struct socket *so; SDP_WLOCK_ASSERT(ssk); ssk->flags |= SDP_DROPPED; so = ssk->socket; soisdisconnected(so); if (ssk->flags & SDP_SOCKREF) { ssk->flags &= ~SDP_SOCKREF; SDP_WUNLOCK(ssk); sorele(so); return (NULL); } return (ssk); } /* * Perform timer based shutdowns which can not operate in * callout context. */ static void sdp_shutdown_task(void *data, int pending) { struct sdp_sock *ssk; ssk = data; SDP_WLOCK(ssk); /* * I don't think this can race with another call to pcbfree() * because SDP_TIMEWAIT protects it. SDP_DESTROY may be redundant. */ if (ssk->flags & SDP_DESTROY) panic("sdp_shutdown_task: Racing with pcbfree for ssk %p", ssk); if (ssk->flags & SDP_DISCON) sdp_output_reset(ssk); /* We have to clear this so sdp_detach() will call pcbfree(). */ ssk->flags &= ~(SDP_TIMEWAIT | SDP_DREQWAIT); if ((ssk->flags & SDP_DROPPED) == 0 && sdp_closed(ssk) == NULL) return; if (ssk->socket == NULL) { sdp_pcbfree(ssk); return; } SDP_WUNLOCK(ssk); } /* * 2msl has expired, schedule the shutdown task. */ static void sdp_2msl_timeout(void *data) { struct sdp_sock *ssk; ssk = data; /* Callout canceled. */ if (!callout_active(&ssk->keep2msl)) goto out; callout_deactivate(&ssk->keep2msl); /* Should be impossible, defensive programming. */ if ((ssk->flags & SDP_TIMEWAIT) == 0) goto out; taskqueue_enqueue(taskqueue_thread, &ssk->shutdown_task); out: SDP_WUNLOCK(ssk); return; } /* * Schedule the 2msl wait timer. */ static void sdp_2msl_wait(struct sdp_sock *ssk) { SDP_WLOCK_ASSERT(ssk); ssk->flags |= SDP_TIMEWAIT; ssk->state = TCPS_TIME_WAIT; soisdisconnected(ssk->socket); callout_reset(&ssk->keep2msl, TCPTV_MSL, sdp_2msl_timeout, ssk); } /* * Timed out waiting for the final fin/ack from rdma_disconnect(). */ static void sdp_dreq_timeout(void *data) { struct sdp_sock *ssk; ssk = data; /* Callout canceled. */ if (!callout_active(&ssk->keep2msl)) goto out; /* Callout rescheduled, probably as a different timer. */ if (callout_pending(&ssk->keep2msl)) goto out; callout_deactivate(&ssk->keep2msl); if (ssk->state != TCPS_FIN_WAIT_1 && ssk->state != TCPS_LAST_ACK) goto out; if ((ssk->flags & SDP_DREQWAIT) == 0) goto out; ssk->flags &= ~SDP_DREQWAIT; ssk->flags |= SDP_DISCON; sdp_2msl_wait(ssk); ssk->qp_active = 0; out: SDP_WUNLOCK(ssk); } /* * Received the final fin/ack. Cancel the 2msl. */ void sdp_cancel_dreq_wait_timeout(struct sdp_sock *ssk) { sdp_dbg(ssk->socket, "cancelling dreq wait timeout\n"); ssk->flags &= ~SDP_DREQWAIT; sdp_2msl_wait(ssk); } static int sdp_init_sock(struct socket *sk) { struct sdp_sock *ssk = sdp_sk(sk); sdp_dbg(sk, "%s\n", __func__); callout_init_rw(&ssk->keep2msl, &ssk->lock, CALLOUT_RETURNUNLOCKED); TASK_INIT(&ssk->shutdown_task, 0, sdp_shutdown_task, ssk); #ifdef SDP_ZCOPY INIT_DELAYED_WORK(&ssk->srcavail_cancel_work, srcavail_cancel_timeout); ssk->zcopy_thresh = -1; /* use global sdp_zcopy_thresh */ ssk->tx_ring.rdma_inflight = NULL; #endif atomic_set(&ssk->mseq_ack, 0); sdp_rx_ring_init(ssk); ssk->tx_ring.buffer = NULL; return 0; } /* * Allocate an sdp_sock for the socket and reserve socket buffer space. */ static int sdp_attach(struct socket *so, int proto, struct thread *td) { struct sdp_sock *ssk; int error; ssk = sdp_sk(so); KASSERT(ssk == NULL, ("sdp_attach: ssk already set on so %p", so)); if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { error = soreserve(so, sdp_sendspace, sdp_recvspace); if (error) return (error); } so->so_rcv.sb_flags |= SB_AUTOSIZE; so->so_snd.sb_flags |= SB_AUTOSIZE; ssk = uma_zalloc(sdp_zone, M_NOWAIT | M_ZERO); if (ssk == NULL) return (ENOBUFS); rw_init(&ssk->lock, "sdpsock"); ssk->socket = so; ssk->cred = crhold(so->so_cred); so->so_pcb = (caddr_t)ssk; sdp_init_sock(so); ssk->flags = 0; ssk->qp_active = 0; ssk->state = TCPS_CLOSED; mbufq_init(&ssk->rxctlq, INT_MAX); SDP_LIST_WLOCK(); LIST_INSERT_HEAD(&sdp_list, ssk, list); sdp_count++; SDP_LIST_WUNLOCK(); return (0); } /* * Detach SDP from the socket, potentially leaving it around for the * timewait to expire. */ static void sdp_detach(struct socket *so) { struct sdp_sock *ssk; ssk = sdp_sk(so); SDP_WLOCK(ssk); KASSERT(ssk->socket != NULL, ("sdp_detach: socket is NULL")); ssk->socket->so_pcb = NULL; ssk->socket = NULL; if (ssk->flags & (SDP_TIMEWAIT | SDP_DREQWAIT)) SDP_WUNLOCK(ssk); else if (ssk->flags & SDP_DROPPED || ssk->state < TCPS_SYN_SENT) sdp_pcbfree(ssk); else panic("sdp_detach: Unexpected state, ssk %p.\n", ssk); } /* * Allocate a local address for the socket. */ static int sdp_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { int error = 0; struct sdp_sock *ssk; struct sockaddr_in *sin; sin = (struct sockaddr_in *)nam; if (sin->sin_family != AF_INET) return (EAFNOSUPPORT); if (nam->sa_len != sizeof(*sin)) return (EINVAL); if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) return (EAFNOSUPPORT); ssk = sdp_sk(so); SDP_WLOCK(ssk); if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) { error = EINVAL; goto out; } error = sdp_pcbbind(ssk, nam, td->td_ucred); out: SDP_WUNLOCK(ssk); return (error); } /* * Prepare to accept connections. */ static int sdp_listen(struct socket *so, int backlog, struct thread *td) { int error = 0; struct sdp_sock *ssk; ssk = sdp_sk(so); SDP_WLOCK(ssk); if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) { error = EINVAL; goto out; } if (error == 0 && ssk->lport == 0) error = sdp_pcbbind(ssk, (struct sockaddr *)0, td->td_ucred); SOCK_LOCK(so); if (error == 0) error = solisten_proto_check(so); if (error == 0) { solisten_proto(so, backlog); ssk->state = TCPS_LISTEN; } SOCK_UNLOCK(so); out: SDP_WUNLOCK(ssk); if (error == 0) error = -rdma_listen(ssk->id, backlog); return (error); } /* * Initiate a SDP connection to nam. */ static int sdp_start_connect(struct sdp_sock *ssk, struct sockaddr *nam, struct thread *td) { struct sockaddr_in src; struct socket *so; int error; so = ssk->socket; SDP_WLOCK_ASSERT(ssk); if (ssk->lport == 0) { error = sdp_pcbbind(ssk, (struct sockaddr *)0, td->td_ucred); if (error) return error; } src.sin_family = AF_INET; src.sin_len = sizeof(src); bzero(&src.sin_zero, sizeof(src.sin_zero)); src.sin_port = ssk->lport; src.sin_addr.s_addr = ssk->laddr; soisconnecting(so); SDP_WUNLOCK(ssk); error = -rdma_resolve_addr(ssk->id, (struct sockaddr *)&src, nam, SDP_RESOLVE_TIMEOUT); SDP_WLOCK(ssk); if (error == 0) ssk->state = TCPS_SYN_SENT; return 0; } /* * Initiate SDP connection. */ static int sdp_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { int error = 0; struct sdp_sock *ssk; struct sockaddr_in *sin; sin = (struct sockaddr_in *)nam; if (nam->sa_len != sizeof(*sin)) return (EINVAL); if (sin->sin_family != AF_INET) return (EAFNOSUPPORT); if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) return (EAFNOSUPPORT); if ((error = prison_remote_ip4(td->td_ucred, &sin->sin_addr)) != 0) return (error); ssk = sdp_sk(so); SDP_WLOCK(ssk); if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) error = EINVAL; else error = sdp_start_connect(ssk, nam, td); SDP_WUNLOCK(ssk); return (error); } /* * Drop a SDP socket, reporting * the specified error. If connection is synchronized, * then send a RST to peer. */ static struct sdp_sock * sdp_drop(struct sdp_sock *ssk, int errno) { struct socket *so; SDP_WLOCK_ASSERT(ssk); so = ssk->socket; if (TCPS_HAVERCVDSYN(ssk->state)) sdp_output_reset(ssk); if (errno == ETIMEDOUT && ssk->softerror) errno = ssk->softerror; so->so_error = errno; return (sdp_closed(ssk)); } /* * User issued close, and wish to trail through shutdown states: * if never received SYN, just forget it. If got a SYN from peer, * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. * If already got a FIN from peer, then almost done; go to LAST_ACK * state. In all other cases, have already sent FIN to peer (e.g. * after PRU_SHUTDOWN), and just have to play tedious game waiting * for peer to send FIN or not respond to keep-alives, etc. * We can let the user exit from the close as soon as the FIN is acked. */ static void sdp_usrclosed(struct sdp_sock *ssk) { SDP_WLOCK_ASSERT(ssk); switch (ssk->state) { case TCPS_LISTEN: ssk->state = TCPS_CLOSED; SDP_WUNLOCK(ssk); sdp_destroy_cma(ssk); SDP_WLOCK(ssk); /* FALLTHROUGH */ case TCPS_CLOSED: ssk = sdp_closed(ssk); /* * sdp_closed() should never return NULL here as the socket is * still open. */ KASSERT(ssk != NULL, ("sdp_usrclosed: sdp_closed() returned NULL")); break; case TCPS_SYN_SENT: /* FALLTHROUGH */ case TCPS_SYN_RECEIVED: ssk->flags |= SDP_NEEDFIN; break; case TCPS_ESTABLISHED: ssk->flags |= SDP_NEEDFIN; ssk->state = TCPS_FIN_WAIT_1; break; case TCPS_CLOSE_WAIT: ssk->state = TCPS_LAST_ACK; break; } if (ssk->state >= TCPS_FIN_WAIT_2) { /* Prevent the connection hanging in FIN_WAIT_2 forever. */ if (ssk->state == TCPS_FIN_WAIT_2) sdp_2msl_wait(ssk); else soisdisconnected(ssk->socket); } } static void sdp_output_disconnect(struct sdp_sock *ssk) { SDP_WLOCK_ASSERT(ssk); callout_reset(&ssk->keep2msl, SDP_FIN_WAIT_TIMEOUT, sdp_dreq_timeout, ssk); ssk->flags |= SDP_NEEDFIN | SDP_DREQWAIT; sdp_post_sends(ssk, M_NOWAIT); } /* * Initiate or continue a disconnect. * If embryonic state, just send reset (once). * If in ``let data drain'' option and linger null, just drop. * Otherwise (hard), mark socket disconnecting and drop * current input data; switch states based on user close, and * send segment to peer (with FIN). */ static void sdp_start_disconnect(struct sdp_sock *ssk) { struct socket *so; int unread; so = ssk->socket; SDP_WLOCK_ASSERT(ssk); sdp_stop_keepalive_timer(so); /* * Neither sdp_closed() nor sdp_drop() should return NULL, as the * socket is still open. */ if (ssk->state < TCPS_ESTABLISHED) { ssk = sdp_closed(ssk); KASSERT(ssk != NULL, ("sdp_start_disconnect: sdp_close() returned NULL")); } else if ((so->so_options & SO_LINGER) && so->so_linger == 0) { ssk = sdp_drop(ssk, 0); KASSERT(ssk != NULL, ("sdp_start_disconnect: sdp_drop() returned NULL")); } else { soisdisconnecting(so); unread = sbused(&so->so_rcv); sbflush(&so->so_rcv); sdp_usrclosed(ssk); if (!(ssk->flags & SDP_DROPPED)) { if (unread) sdp_output_reset(ssk); else sdp_output_disconnect(ssk); } } } /* * User initiated disconnect. */ static int sdp_disconnect(struct socket *so) { struct sdp_sock *ssk; int error = 0; ssk = sdp_sk(so); SDP_WLOCK(ssk); if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) { error = ECONNRESET; goto out; } sdp_start_disconnect(ssk); out: SDP_WUNLOCK(ssk); return (error); } /* * Accept a connection. Essentially all the work is done at higher levels; * just return the address of the peer, storing through addr. * * * XXX This is broken XXX * * The rationale for acquiring the sdp lock here is somewhat complicated, * and is described in detail in the commit log entry for r175612. Acquiring * it delays an accept(2) racing with sonewconn(), which inserts the socket * before the address/port fields are initialized. A better fix would * prevent the socket from being placed in the listen queue until all fields * are fully initialized. */ static int -sdp_accept(struct socket *so, struct sockaddr **nam) +sdp_accept(struct socket *so, struct sockaddr *sa) { struct sdp_sock *ssk = NULL; - struct in_addr addr; - in_port_t port; int error; if (so->so_state & SS_ISDISCONNECTED) return (ECONNABORTED); - port = 0; - addr.s_addr = 0; error = 0; ssk = sdp_sk(so); SDP_WLOCK(ssk); - if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) { + if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) error = ECONNABORTED; - goto out; - } - port = ssk->fport; - addr.s_addr = ssk->faddr; -out: + else + *(struct sockaddr_in *)sa = (struct sockaddr_in ){ + .sin_family = AF_INET, + .sin_len = sizeof(struct sockaddr_in), + .sin_addr.s_addr = ssk->faddr, + .sin_port = ssk->fport, + }; SDP_WUNLOCK(ssk); - if (error == 0) - *nam = sdp_sockaddr(port, &addr); - return error; + + return (error); } /* * Mark the connection as being incapable of further output. */ static int sdp_shutdown(struct socket *so) { int error = 0; struct sdp_sock *ssk; ssk = sdp_sk(so); SDP_WLOCK(ssk); if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) { error = ECONNRESET; goto out; } socantsendmore(so); sdp_usrclosed(ssk); if (!(ssk->flags & SDP_DROPPED)) sdp_output_disconnect(ssk); out: SDP_WUNLOCK(ssk); return (error); } static void sdp_append(struct sdp_sock *ssk, struct sockbuf *sb, struct mbuf *mb, int cnt) { struct mbuf *n; int ncnt; SOCKBUF_LOCK_ASSERT(sb); SBLASTRECORDCHK(sb); KASSERT(mb->m_flags & M_PKTHDR, ("sdp_append: %p Missing packet header.\n", mb)); n = sb->sb_lastrecord; /* * If the queue is empty just set all pointers and proceed. */ if (n == NULL) { sb->sb_lastrecord = sb->sb_mb = sb->sb_sndptr = mb; for (; mb; mb = mb->m_next) { sb->sb_mbtail = mb; sballoc(sb, mb); } return; } /* * Count the number of mbufs in the current tail. */ for (ncnt = 0; n->m_next; n = n->m_next) ncnt++; n = sb->sb_lastrecord; /* * If the two chains can fit in a single sdp packet and * the last record has not been sent yet (WRITABLE) coalesce * them. The lastrecord remains the same but we must strip the * packet header and then let sbcompress do the hard part. */ if (M_WRITABLE(n) && ncnt + cnt < SDP_MAX_SEND_SGES && n->m_pkthdr.len + mb->m_pkthdr.len - SDP_HEAD_SIZE < ssk->xmit_size_goal) { m_adj(mb, SDP_HEAD_SIZE); n->m_pkthdr.len += mb->m_pkthdr.len; n->m_flags |= mb->m_flags & (M_PUSH | M_URG); m_demote(mb, 1, 0); sbcompress(sb, mb, sb->sb_mbtail); return; } /* * Not compressible, just append to the end and adjust counters. */ sb->sb_lastrecord->m_flags |= M_PUSH; sb->sb_lastrecord->m_nextpkt = mb; sb->sb_lastrecord = mb; if (sb->sb_sndptr == NULL) sb->sb_sndptr = mb; for (; mb; mb = mb->m_next) { sb->sb_mbtail = mb; sballoc(sb, mb); } } /* * Do a send by putting data in output queue and updating urgent * marker if URG set. Possibly send more data. Unlike the other * pru_*() routines, the mbuf chains are our responsibility. We * must either enqueue them or free them. The other pru_* routines * generally are caller-frees. * * This comes from sendfile, normal sends will come from sdp_sosend(). */ static int sdp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct thread *td) { struct sdp_sock *ssk; struct mbuf *n; int error; int cnt; if (nam != NULL) { if (nam->sa_family != AF_INET) { if (control) m_freem(control); m_freem(m); return (EAFNOSUPPORT); } if (nam->sa_len != sizeof(struct sockaddr_in)) { if (control) m_freem(control); m_freem(m); return (EINVAL); } } error = 0; ssk = sdp_sk(so); KASSERT(m->m_flags & M_PKTHDR, ("sdp_send: %p no packet header", m)); M_PREPEND(m, SDP_HEAD_SIZE, M_WAITOK); mtod(m, struct sdp_bsdh *)->mid = SDP_MID_DATA; for (n = m, cnt = 0; n->m_next; n = n->m_next) cnt++; if (cnt > SDP_MAX_SEND_SGES) { n = m_collapse(m, M_WAITOK, SDP_MAX_SEND_SGES); if (n == NULL) { m_freem(m); return (EMSGSIZE); } m = n; for (cnt = 0; n->m_next; n = n->m_next) cnt++; } SDP_WLOCK(ssk); if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) { if (control) m_freem(control); if (m) m_freem(m); error = ECONNRESET; goto out; } if (control) { /* SDP doesn't support control messages. */ if (control->m_len) { m_freem(control); if (m) m_freem(m); error = EINVAL; goto out; } m_freem(control); /* empty control, just free it */ } if (!(flags & PRUS_OOB)) { SOCKBUF_LOCK(&so->so_snd); sdp_append(ssk, &so->so_snd, m, cnt); SOCKBUF_UNLOCK(&so->so_snd); if (nam && ssk->state < TCPS_SYN_SENT) { /* * Do implied connect if not yet connected. */ error = sdp_start_connect(ssk, nam, td); if (error) goto out; } if (flags & PRUS_EOF) { /* * Close the send side of the connection after * the data is sent. */ socantsendmore(so); sdp_usrclosed(ssk); if (!(ssk->flags & SDP_DROPPED)) sdp_output_disconnect(ssk); } else if (!(ssk->flags & SDP_DROPPED) && !(flags & PRUS_MORETOCOME)) sdp_post_sends(ssk, M_NOWAIT); SDP_WUNLOCK(ssk); return (0); } else { SOCKBUF_LOCK(&so->so_snd); if (sbspace(&so->so_snd) < -512) { SOCKBUF_UNLOCK(&so->so_snd); m_freem(m); error = ENOBUFS; goto out; } /* * According to RFC961 (Assigned Protocols), * the urgent pointer points to the last octet * of urgent data. We continue, however, * to consider it to indicate the first octet * of data past the urgent section. * Otherwise, snd_up should be one lower. */ m->m_flags |= M_URG | M_PUSH; sdp_append(ssk, &so->so_snd, m, cnt); SOCKBUF_UNLOCK(&so->so_snd); if (nam && ssk->state < TCPS_SYN_SENT) { /* * Do implied connect if not yet connected. */ error = sdp_start_connect(ssk, nam, td); if (error) goto out; } sdp_post_sends(ssk, M_NOWAIT); SDP_WUNLOCK(ssk); return (0); } out: SDP_WUNLOCK(ssk); return (error); } /* * Send on a socket. If send must go all at once and message is larger than * send buffering, then hard error. Lock against other senders. If must go * all at once and not enough room now, then inform user that this would * block and do nothing. Otherwise, if nonblocking, send as much as * possible. The data to be sent is described by "uio" if nonzero, otherwise * by the mbuf chain "top" (which must be null if uio is not). Data provided * in mbuf chain must be small enough to send all at once. * * Returns nonzero on error, timeout or signal; callers must check for short * counts if EINTR/ERESTART are returned. Data and control buffers are freed * on return. */ static int sdp_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags, struct thread *td) { struct sdp_sock *ssk; long space, resid; int atomic; int error; int copy; if (uio != NULL) resid = uio->uio_resid; else resid = top->m_pkthdr.len; atomic = top != NULL; if (control != NULL) { if (control->m_len) { m_freem(control); if (top) m_freem(top); return (EINVAL); } m_freem(control); control = NULL; } /* * In theory resid should be unsigned. However, space must be * signed, as it might be less than 0 if we over-committed, and we * must use a signed comparison of space and resid. On the other * hand, a negative resid causes us to loop sending 0-length * segments to the protocol. * * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM * type sockets since that's an error. */ if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) { error = EINVAL; goto out; } if (td != NULL) td->td_ru.ru_msgsnd++; ssk = sdp_sk(so); error = SOCK_IO_SEND_LOCK(so, SBLOCKWAIT(flags)); if (error) goto out; restart: do { SOCKBUF_LOCK(&so->so_snd); if (so->so_snd.sb_state & SBS_CANTSENDMORE) { SOCKBUF_UNLOCK(&so->so_snd); error = EPIPE; goto release; } if (so->so_error) { error = so->so_error; so->so_error = 0; SOCKBUF_UNLOCK(&so->so_snd); goto release; } if ((so->so_state & SS_ISCONNECTED) == 0 && addr == NULL) { SOCKBUF_UNLOCK(&so->so_snd); error = ENOTCONN; goto release; } space = sbspace(&so->so_snd); if (flags & MSG_OOB) space += 1024; if (atomic && resid > ssk->xmit_size_goal - SDP_HEAD_SIZE) { SOCKBUF_UNLOCK(&so->so_snd); error = EMSGSIZE; goto release; } if (space < resid && (atomic || space < so->so_snd.sb_lowat)) { if ((so->so_state & SS_NBIO) || (flags & (MSG_NBIO | MSG_DONTWAIT)) != 0) { SOCKBUF_UNLOCK(&so->so_snd); error = EWOULDBLOCK; goto release; } error = sbwait(so, SO_SND); SOCKBUF_UNLOCK(&so->so_snd); if (error) goto release; goto restart; } SOCKBUF_UNLOCK(&so->so_snd); do { if (uio == NULL) { resid = 0; if (flags & MSG_EOR) top->m_flags |= M_EOR; } else { /* * Copy the data from userland into a mbuf * chain. If no data is to be copied in, * a single empty mbuf is returned. */ copy = min(space, ssk->xmit_size_goal - SDP_HEAD_SIZE); top = m_uiotombuf(uio, M_WAITOK, copy, 0, M_PKTHDR | ((flags & MSG_EOR) ? M_EOR : 0)); if (top == NULL) { /* only possible error */ error = EFAULT; goto release; } space -= resid - uio->uio_resid; resid = uio->uio_resid; } /* * XXX all the SBS_CANTSENDMORE checks previously * done could be out of date after dropping the * socket lock. */ error = sdp_send(so, (flags & MSG_OOB) ? PRUS_OOB : /* * Set EOF on the last send if the user specified * MSG_EOF. */ ((flags & MSG_EOF) && (resid <= 0)) ? PRUS_EOF : /* If there is more to send set PRUS_MORETOCOME. */ (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0, top, addr, NULL, td); top = NULL; if (error) goto release; } while (resid && space > 0); } while (resid); release: SOCK_IO_SEND_UNLOCK(so); out: if (top != NULL) m_freem(top); return (error); } /* * The part of soreceive() that implements reading non-inline out-of-band * data from a socket. For more complete comments, see soreceive(), from * which this code originated. * * Note that soreceive_rcvoob(), unlike the remainder of soreceive(), is * unable to return an mbuf chain to the caller. */ static int soreceive_rcvoob(struct socket *so, struct uio *uio, int flags) { struct protosw *pr = so->so_proto; struct mbuf *m; int error; KASSERT(flags & MSG_OOB, ("soreceive_rcvoob: (flags & MSG_OOB) == 0")); m = m_get(M_WAITOK, MT_DATA); error = pr->pr_rcvoob(so, m, flags & MSG_PEEK); if (error) goto bad; do { error = uiomove(mtod(m, void *), (int) min(uio->uio_resid, m->m_len), uio); m = m_free(m); } while (uio->uio_resid && error == 0 && m); bad: if (m != NULL) m_freem(m); return (error); } /* * Optimized version of soreceive() for stream (TCP) sockets. */ static int sdp_sorecv(struct socket *so, struct sockaddr **psa, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) { int len = 0, error = 0, flags, oresid; struct sockbuf *sb; struct mbuf *m, *n = NULL; struct sdp_sock *ssk; /* We only do stream sockets. */ if (so->so_type != SOCK_STREAM) return (EINVAL); if (psa != NULL) *psa = NULL; if (controlp != NULL) return (EINVAL); if (flagsp != NULL) flags = *flagsp &~ MSG_EOR; else flags = 0; if (flags & MSG_OOB) return (soreceive_rcvoob(so, uio, flags)); if (mp0 != NULL) *mp0 = NULL; sb = &so->so_rcv; ssk = sdp_sk(so); /* Prevent other readers from entering the socket. */ error = SOCK_IO_RECV_LOCK(so, SBLOCKWAIT(flags)); if (error) return (error); SOCKBUF_LOCK(sb); /* Easy one, no space to copyout anything. */ if (uio->uio_resid == 0) { error = EINVAL; goto out; } oresid = uio->uio_resid; /* We will never ever get anything unless we are connected. */ if (!(so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED))) { /* When disconnecting there may be still some data left. */ if (sbavail(sb)) goto deliver; if (!(so->so_state & SS_ISDISCONNECTED)) error = ENOTCONN; goto out; } /* Socket buffer is empty and we shall not block. */ if (sbavail(sb) == 0 && ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) { error = EAGAIN; goto out; } restart: SOCKBUF_LOCK_ASSERT(&so->so_rcv); /* Abort if socket has reported problems. */ if (so->so_error) { if (sbavail(sb)) goto deliver; if (oresid > uio->uio_resid) goto out; error = so->so_error; if (!(flags & MSG_PEEK)) so->so_error = 0; goto out; } /* Door is closed. Deliver what is left, if any. */ if (sb->sb_state & SBS_CANTRCVMORE) { if (sbavail(sb)) goto deliver; else goto out; } /* Socket buffer got some data that we shall deliver now. */ if (sbavail(sb) && !(flags & MSG_WAITALL) && ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)) || sbavail(sb) >= sb->sb_lowat || sbavail(sb) >= uio->uio_resid || sbavail(sb) >= sb->sb_hiwat) ) { goto deliver; } /* On MSG_WAITALL we must wait until all data or error arrives. */ if ((flags & MSG_WAITALL) && (sbavail(sb) >= uio->uio_resid || sbavail(sb) >= sb->sb_lowat)) goto deliver; /* * Wait and block until (more) data comes in. * NB: Drops the sockbuf lock during wait. */ error = sbwait(so, SO_RCV); if (error) goto out; goto restart; deliver: SOCKBUF_LOCK_ASSERT(&so->so_rcv); KASSERT(sbavail(sb), ("%s: sockbuf empty", __func__)); KASSERT(sb->sb_mb != NULL, ("%s: sb_mb == NULL", __func__)); /* Statistics. */ if (uio->uio_td) uio->uio_td->td_ru.ru_msgrcv++; /* Fill uio until full or current end of socket buffer is reached. */ len = min(uio->uio_resid, sbavail(sb)); if (mp0 != NULL) { /* Dequeue as many mbufs as possible. */ if (!(flags & MSG_PEEK) && len >= sb->sb_mb->m_len) { for (*mp0 = m = sb->sb_mb; m != NULL && m->m_len <= len; m = m->m_next) { len -= m->m_len; uio->uio_resid -= m->m_len; sbfree(sb, m); n = m; } sb->sb_mb = m; if (sb->sb_mb == NULL) SB_EMPTY_FIXUP(sb); n->m_next = NULL; } /* Copy the remainder. */ if (len > 0) { KASSERT(sb->sb_mb != NULL, ("%s: len > 0 && sb->sb_mb empty", __func__)); m = m_copym(sb->sb_mb, 0, len, M_NOWAIT); if (m == NULL) len = 0; /* Don't flush data from sockbuf. */ else uio->uio_resid -= m->m_len; if (*mp0 != NULL) n->m_next = m; else *mp0 = m; if (*mp0 == NULL) { error = ENOBUFS; goto out; } } } else { /* NB: Must unlock socket buffer as uiomove may sleep. */ SOCKBUF_UNLOCK(sb); error = m_mbuftouio(uio, sb->sb_mb, len); SOCKBUF_LOCK(sb); if (error) goto out; } SBLASTRECORDCHK(sb); SBLASTMBUFCHK(sb); /* * Remove the delivered data from the socket buffer unless we * were only peeking. */ if (!(flags & MSG_PEEK)) { if (len > 0) sbdrop_locked(sb, len); /* Notify protocol that we drained some data. */ SOCKBUF_UNLOCK(sb); SDP_WLOCK(ssk); sdp_do_posts(ssk); SDP_WUNLOCK(ssk); SOCKBUF_LOCK(sb); } /* * For MSG_WAITALL we may have to loop again and wait for * more data to come in. */ if ((flags & MSG_WAITALL) && uio->uio_resid > 0) goto restart; out: SBLASTRECORDCHK(sb); SBLASTMBUFCHK(sb); SOCKBUF_UNLOCK(sb); SOCK_IO_RECV_UNLOCK(so); return (error); } /* * Abort is used to teardown a connection typically while sitting in * the accept queue. */ void sdp_abort(struct socket *so) { struct sdp_sock *ssk; ssk = sdp_sk(so); SDP_WLOCK(ssk); /* * If we have not yet dropped, do it now. */ if (!(ssk->flags & SDP_TIMEWAIT) && !(ssk->flags & SDP_DROPPED)) sdp_drop(ssk, ECONNABORTED); KASSERT(ssk->flags & SDP_DROPPED, ("sdp_abort: %p not dropped 0x%X", ssk, ssk->flags)); SDP_WUNLOCK(ssk); } /* * Close a SDP socket and initiate a friendly disconnect. */ static void sdp_close(struct socket *so) { struct sdp_sock *ssk; ssk = sdp_sk(so); SDP_WLOCK(ssk); /* * If we have not yet dropped, do it now. */ if (!(ssk->flags & SDP_TIMEWAIT) && !(ssk->flags & SDP_DROPPED)) sdp_start_disconnect(ssk); /* * If we've still not dropped let the socket layer know we're * holding on to the socket and pcb for a while. */ if (!(ssk->flags & SDP_DROPPED)) { ssk->flags |= SDP_SOCKREF; soref(so); } SDP_WUNLOCK(ssk); } /* * User requests out-of-band data. */ static int sdp_rcvoob(struct socket *so, struct mbuf *m, int flags) { int error = 0; struct sdp_sock *ssk; ssk = sdp_sk(so); SDP_WLOCK(ssk); if (!rx_ring_trylock(&ssk->rx_ring)) { SDP_WUNLOCK(ssk); return (ECONNRESET); } if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) { error = ECONNRESET; goto out; } if ((so->so_oobmark == 0 && (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) || so->so_options & SO_OOBINLINE || ssk->oobflags & SDP_HADOOB) { error = EINVAL; goto out; } if ((ssk->oobflags & SDP_HAVEOOB) == 0) { error = EWOULDBLOCK; goto out; } m->m_len = 1; *mtod(m, caddr_t) = ssk->iobc; if ((flags & MSG_PEEK) == 0) ssk->oobflags ^= (SDP_HAVEOOB | SDP_HADOOB); out: rx_ring_unlock(&ssk->rx_ring); SDP_WUNLOCK(ssk); return (error); } void sdp_urg(struct sdp_sock *ssk, struct mbuf *mb) { struct mbuf *m; struct socket *so; so = ssk->socket; if (so == NULL) return; so->so_oobmark = sbused(&so->so_rcv) + mb->m_pkthdr.len - 1; sohasoutofband(so); ssk->oobflags &= ~(SDP_HAVEOOB | SDP_HADOOB); if (!(so->so_options & SO_OOBINLINE)) { for (m = mb; m->m_next != NULL; m = m->m_next); ssk->iobc = *(mtod(m, char *) + m->m_len - 1); ssk->oobflags |= SDP_HAVEOOB; m->m_len--; mb->m_pkthdr.len--; } } /* * Notify a sdp socket of an asynchronous error. * * Do not wake up user since there currently is no mechanism for * reporting soft errors (yet - a kqueue filter may be added). */ struct sdp_sock * sdp_notify(struct sdp_sock *ssk, int error) { SDP_WLOCK_ASSERT(ssk); if ((ssk->flags & SDP_TIMEWAIT) || (ssk->flags & SDP_DROPPED)) return (ssk); /* * Ignore some errors if we are hooked up. */ if (ssk->state == TCPS_ESTABLISHED && (error == EHOSTUNREACH || error == ENETUNREACH || error == EHOSTDOWN)) return (ssk); ssk->softerror = error; return sdp_drop(ssk, error); } static void sdp_keepalive_timeout(void *data) { struct sdp_sock *ssk; ssk = data; /* Callout canceled. */ if (!callout_active(&ssk->keep2msl)) return; /* Callout rescheduled as a different kind of timer. */ if (callout_pending(&ssk->keep2msl)) goto out; callout_deactivate(&ssk->keep2msl); if (ssk->flags & SDP_DROPPED || (ssk->socket->so_options & SO_KEEPALIVE) == 0) goto out; sdp_post_keepalive(ssk); callout_reset(&ssk->keep2msl, SDP_KEEPALIVE_TIME, sdp_keepalive_timeout, ssk); out: SDP_WUNLOCK(ssk); } void sdp_start_keepalive_timer(struct socket *so) { struct sdp_sock *ssk; ssk = sdp_sk(so); if (!callout_pending(&ssk->keep2msl)) callout_reset(&ssk->keep2msl, SDP_KEEPALIVE_TIME, sdp_keepalive_timeout, ssk); } static void sdp_stop_keepalive_timer(struct socket *so) { struct sdp_sock *ssk; ssk = sdp_sk(so); callout_stop(&ssk->keep2msl); } /* * sdp_ctloutput() must drop the inpcb lock before performing copyin on * socket option arguments. When it re-acquires the lock after the copy, it * has to revalidate that the connection is still valid for the socket * option. */ #define SDP_WLOCK_RECHECK(inp) do { \ SDP_WLOCK(ssk); \ if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) { \ SDP_WUNLOCK(ssk); \ return (ECONNRESET); \ } \ } while(0) static int sdp_ctloutput(struct socket *so, struct sockopt *sopt) { int error, opt, optval; struct sdp_sock *ssk; error = 0; ssk = sdp_sk(so); if (sopt->sopt_level == SOL_SOCKET && sopt->sopt_name == SO_KEEPALIVE) { SDP_WLOCK(ssk); if (so->so_options & SO_KEEPALIVE) sdp_start_keepalive_timer(so); else sdp_stop_keepalive_timer(so); SDP_WUNLOCK(ssk); } if (sopt->sopt_level != IPPROTO_TCP) return (error); SDP_WLOCK(ssk); if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) { SDP_WUNLOCK(ssk); return (ECONNRESET); } switch (sopt->sopt_dir) { case SOPT_SET: switch (sopt->sopt_name) { case TCP_NODELAY: SDP_WUNLOCK(ssk); error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) return (error); SDP_WLOCK_RECHECK(ssk); opt = SDP_NODELAY; if (optval) ssk->flags |= opt; else ssk->flags &= ~opt; sdp_do_posts(ssk); SDP_WUNLOCK(ssk); break; default: SDP_WUNLOCK(ssk); error = ENOPROTOOPT; break; } break; case SOPT_GET: switch (sopt->sopt_name) { case TCP_NODELAY: optval = ssk->flags & SDP_NODELAY; SDP_WUNLOCK(ssk); error = sooptcopyout(sopt, &optval, sizeof optval); break; default: SDP_WUNLOCK(ssk); error = ENOPROTOOPT; break; } break; } return (error); } #undef SDP_WLOCK_RECHECK int sdp_mod_count = 0; int sdp_mod_usec = 0; void sdp_set_default_moderation(struct sdp_sock *ssk) { if (sdp_mod_count <= 0 || sdp_mod_usec <= 0) return; ib_modify_cq(ssk->rx_ring.cq, sdp_mod_count, sdp_mod_usec); } static void sdp_dev_add(struct ib_device *device) { struct ib_fmr_pool_param param; struct sdp_device *sdp_dev; sdp_dev = malloc(sizeof(*sdp_dev), M_SDP, M_WAITOK | M_ZERO); sdp_dev->pd = ib_alloc_pd(device, 0); if (IS_ERR(sdp_dev->pd)) goto out_pd; memset(¶m, 0, sizeof param); param.max_pages_per_fmr = SDP_FMR_SIZE; param.page_shift = PAGE_SHIFT; param.access = (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ); param.pool_size = SDP_FMR_POOL_SIZE; param.dirty_watermark = SDP_FMR_DIRTY_SIZE; param.cache = 1; sdp_dev->fmr_pool = ib_create_fmr_pool(sdp_dev->pd, ¶m); if (IS_ERR(sdp_dev->fmr_pool)) goto out_fmr; ib_set_client_data(device, &sdp_client, sdp_dev); return; out_fmr: ib_dealloc_pd(sdp_dev->pd); out_pd: free(sdp_dev, M_SDP); } static void sdp_dev_rem(struct ib_device *device, void *client_data) { struct sdp_device *sdp_dev; struct sdp_sock *ssk; SDP_LIST_WLOCK(); LIST_FOREACH(ssk, &sdp_list, list) { if (ssk->ib_device != device) continue; SDP_WLOCK(ssk); if ((ssk->flags & SDP_DESTROY) == 0) ssk = sdp_notify(ssk, ECONNRESET); if (ssk) SDP_WUNLOCK(ssk); } SDP_LIST_WUNLOCK(); /* * XXX Do I need to wait between these two? */ sdp_dev = ib_get_client_data(device, &sdp_client); if (!sdp_dev) return; ib_flush_fmr_pool(sdp_dev->fmr_pool); ib_destroy_fmr_pool(sdp_dev->fmr_pool); ib_dealloc_pd(sdp_dev->pd); free(sdp_dev, M_SDP); } struct ib_client sdp_client = { .name = "sdp", .add = sdp_dev_add, .remove = sdp_dev_rem }; static int sdp_pcblist(SYSCTL_HANDLER_ARGS) { int error, n, i; struct sdp_sock *ssk; struct xinpgen xig; /* * The process of preparing the TCB list is too time-consuming and * resource-intensive to repeat twice on every request. */ if (req->oldptr == NULL) { n = sdp_count; n += imax(n / 8, 10); req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xtcpcb); return (0); } if (req->newptr != NULL) return (EPERM); /* * OK, now we're committed to doing something. */ SDP_LIST_RLOCK(); n = sdp_count; SDP_LIST_RUNLOCK(); error = sysctl_wire_old_buffer(req, 2 * (sizeof xig) + n * sizeof(struct xtcpcb)); if (error != 0) return (error); bzero(&xig, sizeof(xig)); xig.xig_len = sizeof xig; xig.xig_count = n; xig.xig_gen = 0; xig.xig_sogen = so_gencnt; error = SYSCTL_OUT(req, &xig, sizeof xig); if (error) return (error); SDP_LIST_RLOCK(); for (ssk = LIST_FIRST(&sdp_list), i = 0; ssk != NULL && i < n; ssk = LIST_NEXT(ssk, list)) { struct xtcpcb xt; SDP_RLOCK(ssk); if (ssk->flags & SDP_TIMEWAIT) { if (ssk->cred != NULL) error = cr_cansee(req->td->td_ucred, ssk->cred); else error = EINVAL; /* Skip this inp. */ } else if (ssk->socket) error = cr_canseesocket(req->td->td_ucred, ssk->socket); else error = EINVAL; if (error) { error = 0; goto next; } bzero(&xt, sizeof(xt)); xt.xt_len = sizeof xt; xt.xt_inp.inp_gencnt = 0; xt.xt_inp.inp_vflag = INP_IPV4; memcpy(&xt.xt_inp.inp_laddr, &ssk->laddr, sizeof(ssk->laddr)); xt.xt_inp.inp_lport = ssk->lport; memcpy(&xt.xt_inp.inp_faddr, &ssk->faddr, sizeof(ssk->faddr)); xt.xt_inp.inp_fport = ssk->fport; xt.t_state = ssk->state; if (ssk->socket != NULL) sotoxsocket(ssk->socket, &xt.xt_inp.xi_socket); xt.xt_inp.xi_socket.xso_protocol = IPPROTO_TCP; SDP_RUNLOCK(ssk); error = SYSCTL_OUT(req, &xt, sizeof xt); if (error) break; i++; continue; next: SDP_RUNLOCK(ssk); } if (!error) { /* * Give the user an updated idea of our state. * If the generation differs from what we told * her before, she knows that something happened * while we were processing this request, and it * might be necessary to retry. */ xig.xig_gen = 0; xig.xig_sogen = so_gencnt; xig.xig_count = sdp_count; error = SYSCTL_OUT(req, &xig, sizeof xig); } SDP_LIST_RUNLOCK(); return (error); } SYSCTL_NODE(_net_inet, -1, sdp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "SDP"); SYSCTL_PROC(_net_inet_sdp, TCPCTL_PCBLIST, pcblist, CTLFLAG_RD | CTLTYPE_STRUCT | CTLFLAG_MPSAFE, 0, 0, sdp_pcblist, "S,xtcpcb", "List of active SDP connections"); static void sdp_zone_change(void *tag) { uma_zone_set_max(sdp_zone, maxsockets); } static void sdp_init(void *arg __unused) { LIST_INIT(&sdp_list); sdp_zone = uma_zcreate("sdp_sock", sizeof(struct sdp_sock), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); uma_zone_set_max(sdp_zone, maxsockets); EVENTHANDLER_REGISTER(maxsockets_change, sdp_zone_change, NULL, EVENTHANDLER_PRI_ANY); rx_comp_wq = create_singlethread_workqueue("rx_comp_wq"); ib_register_client(&sdp_client); } SYSINIT(sdp_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_SECOND, sdp_init, NULL); #define SDP_PROTOSW \ .pr_type = SOCK_STREAM, \ .pr_flags = PR_CONNREQUIRED|PR_IMPLOPCL|PR_WANTRCVD,\ .pr_ctloutput = sdp_ctloutput, \ .pr_abort = sdp_abort, \ .pr_accept = sdp_accept, \ .pr_attach = sdp_attach, \ .pr_bind = sdp_bind, \ .pr_connect = sdp_connect, \ .pr_detach = sdp_detach, \ .pr_disconnect = sdp_disconnect, \ .pr_listen = sdp_listen, \ .pr_peeraddr = sdp_getpeeraddr, \ .pr_rcvoob = sdp_rcvoob, \ .pr_send = sdp_send, \ .pr_sosend = sdp_sosend, \ .pr_soreceive = sdp_sorecv, \ .pr_shutdown = sdp_shutdown, \ .pr_sockaddr = sdp_getsockaddr, \ .pr_close = sdp_close static struct protosw sdp_ip_protosw = { .pr_protocol = IPPROTO_IP, SDP_PROTOSW }; static struct protosw sdp_tcp_protosw = { .pr_protocol = IPPROTO_TCP, SDP_PROTOSW }; static struct domain sdpdomain = { .dom_family = AF_INET_SDP, .dom_name = "SDP", .dom_nprotosw = 2, .dom_protosw = { &sdp_ip_protosw, &sdp_tcp_protosw, }, }; DOMAIN_SET(sdp); int sdp_debug_level = 1; int sdp_data_debug_level = 0; diff --git a/sys/rpc/svc_vc.c b/sys/rpc/svc_vc.c index 7eac2bc8f74e..a19d73a850b9 100644 --- a/sys/rpc/svc_vc.c +++ b/sys/rpc/svc_vc.c @@ -1,1201 +1,1197 @@ /* $NetBSD: svc_vc.c,v 1.7 2000/08/03 00:01:53 fvdl Exp $ */ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2009, Sun Microsystems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of Sun Microsystems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include /* * svc_vc.c, Server side for Connection Oriented based RPC. * * Actually implements two flavors of transporter - * a tcp rendezvouser (a listner and connection establisher) * and a record/tcp stream. */ #include "opt_kern_tls.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include SYSCTL_NODE(_kern, OID_AUTO, rpc, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "RPC"); SYSCTL_NODE(_kern_rpc, OID_AUTO, tls, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "TLS"); SYSCTL_NODE(_kern_rpc, OID_AUTO, unenc, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "unencrypted"); KRPC_VNET_DEFINE_STATIC(uint64_t, svc_vc_rx_msgbytes) = 0; SYSCTL_U64(_kern_rpc_unenc, OID_AUTO, rx_msgbytes, CTLFLAG_KRPC_VNET | CTLFLAG_RW, &KRPC_VNET_NAME(svc_vc_rx_msgbytes), 0, "Count of non-TLS rx bytes"); KRPC_VNET_DEFINE_STATIC(uint64_t, svc_vc_rx_msgcnt) = 0; SYSCTL_U64(_kern_rpc_unenc, OID_AUTO, rx_msgcnt, CTLFLAG_KRPC_VNET | CTLFLAG_RW, &KRPC_VNET_NAME(svc_vc_rx_msgcnt), 0, "Count of non-TLS rx messages"); KRPC_VNET_DEFINE_STATIC(uint64_t, svc_vc_tx_msgbytes) = 0; SYSCTL_U64(_kern_rpc_unenc, OID_AUTO, tx_msgbytes, CTLFLAG_KRPC_VNET | CTLFLAG_RW, &KRPC_VNET_NAME(svc_vc_tx_msgbytes), 0, "Count of non-TLS tx bytes"); KRPC_VNET_DEFINE_STATIC(uint64_t, svc_vc_tx_msgcnt) = 0; SYSCTL_U64(_kern_rpc_unenc, OID_AUTO, tx_msgcnt, CTLFLAG_KRPC_VNET | CTLFLAG_RW, &KRPC_VNET_NAME(svc_vc_tx_msgcnt), 0, "Count of non-TLS tx messages"); KRPC_VNET_DEFINE_STATIC(uint64_t, svc_vc_tls_alerts) = 0; SYSCTL_U64(_kern_rpc_tls, OID_AUTO, alerts, CTLFLAG_KRPC_VNET | CTLFLAG_RW, &KRPC_VNET_NAME(svc_vc_tls_alerts), 0, "Count of TLS alert messages"); KRPC_VNET_DEFINE(uint64_t, svc_vc_tls_handshake_failed) = 0; SYSCTL_U64(_kern_rpc_tls, OID_AUTO, handshake_failed, CTLFLAG_KRPC_VNET | CTLFLAG_RW, &KRPC_VNET_NAME(svc_vc_tls_handshake_failed), 0, "Count of TLS failed handshakes"); KRPC_VNET_DEFINE(uint64_t, svc_vc_tls_handshake_success) = 0; SYSCTL_U64(_kern_rpc_tls, OID_AUTO, handshake_success, CTLFLAG_KRPC_VNET | CTLFLAG_RW, &KRPC_VNET_NAME(svc_vc_tls_handshake_success), 0, "Count of TLS successful handshakes"); KRPC_VNET_DEFINE_STATIC(uint64_t, svc_vc_tls_rx_msgbytes) = 0; SYSCTL_U64(_kern_rpc_tls, OID_AUTO, rx_msgbytes, CTLFLAG_KRPC_VNET | CTLFLAG_RW, &KRPC_VNET_NAME(svc_vc_tls_rx_msgbytes), 0, "Count of TLS rx bytes"); KRPC_VNET_DEFINE_STATIC(uint64_t, svc_vc_tls_rx_msgcnt) = 0; SYSCTL_U64(_kern_rpc_tls, OID_AUTO, rx_msgcnt, CTLFLAG_KRPC_VNET | CTLFLAG_RW, &KRPC_VNET_NAME(svc_vc_tls_rx_msgcnt), 0, "Count of TLS rx messages"); KRPC_VNET_DEFINE_STATIC(uint64_t, svc_vc_tls_tx_msgbytes) = 0; SYSCTL_U64(_kern_rpc_tls, OID_AUTO, tx_msgbytes, CTLFLAG_KRPC_VNET | CTLFLAG_RW, &KRPC_VNET_NAME(svc_vc_tls_tx_msgbytes), 0, "Count of TLS tx bytes"); KRPC_VNET_DEFINE_STATIC(uint64_t, svc_vc_tls_tx_msgcnt) = 0; SYSCTL_U64(_kern_rpc_tls, OID_AUTO, tx_msgcnt, CTLFLAG_KRPC_VNET | CTLFLAG_RW, &KRPC_VNET_NAME(svc_vc_tls_tx_msgcnt), 0, "Count of TLS tx messages"); static bool_t svc_vc_rendezvous_recv(SVCXPRT *, struct rpc_msg *, struct sockaddr **, struct mbuf **); static enum xprt_stat svc_vc_rendezvous_stat(SVCXPRT *); static void svc_vc_rendezvous_destroy(SVCXPRT *); static bool_t svc_vc_null(void); static void svc_vc_destroy(SVCXPRT *); static enum xprt_stat svc_vc_stat(SVCXPRT *); static bool_t svc_vc_ack(SVCXPRT *, uint32_t *); static bool_t svc_vc_recv(SVCXPRT *, struct rpc_msg *, struct sockaddr **, struct mbuf **); static bool_t svc_vc_reply(SVCXPRT *, struct rpc_msg *, struct sockaddr *, struct mbuf *, uint32_t *seq); static bool_t svc_vc_control(SVCXPRT *xprt, const u_int rq, void *in); static bool_t svc_vc_rendezvous_control (SVCXPRT *xprt, const u_int rq, void *in); static void svc_vc_backchannel_destroy(SVCXPRT *); static enum xprt_stat svc_vc_backchannel_stat(SVCXPRT *); static bool_t svc_vc_backchannel_recv(SVCXPRT *, struct rpc_msg *, struct sockaddr **, struct mbuf **); static bool_t svc_vc_backchannel_reply(SVCXPRT *, struct rpc_msg *, struct sockaddr *, struct mbuf *, uint32_t *); static bool_t svc_vc_backchannel_control(SVCXPRT *xprt, const u_int rq, void *in); static SVCXPRT *svc_vc_create_conn(SVCPOOL *pool, struct socket *so, struct sockaddr *raddr); static int svc_vc_accept(struct socket *head, struct socket **sop); static int svc_vc_soupcall(struct socket *so, void *arg, int waitflag); static int svc_vc_rendezvous_soupcall(struct socket *, void *, int); static const struct xp_ops svc_vc_rendezvous_ops = { .xp_recv = svc_vc_rendezvous_recv, .xp_stat = svc_vc_rendezvous_stat, .xp_reply = (bool_t (*)(SVCXPRT *, struct rpc_msg *, struct sockaddr *, struct mbuf *, uint32_t *))svc_vc_null, .xp_destroy = svc_vc_rendezvous_destroy, .xp_control = svc_vc_rendezvous_control }; static const struct xp_ops svc_vc_ops = { .xp_recv = svc_vc_recv, .xp_stat = svc_vc_stat, .xp_ack = svc_vc_ack, .xp_reply = svc_vc_reply, .xp_destroy = svc_vc_destroy, .xp_control = svc_vc_control }; static const struct xp_ops svc_vc_backchannel_ops = { .xp_recv = svc_vc_backchannel_recv, .xp_stat = svc_vc_backchannel_stat, .xp_reply = svc_vc_backchannel_reply, .xp_destroy = svc_vc_backchannel_destroy, .xp_control = svc_vc_backchannel_control }; /* * Usage: * xprt = svc_vc_create(sock, send_buf_size, recv_buf_size); * * Creates, registers, and returns a (rpc) tcp based transporter. * Once *xprt is initialized, it is registered as a transporter * see (svc.h, xprt_register). This routine returns * a NULL if a problem occurred. * * The filedescriptor passed in is expected to refer to a bound, but * not yet connected socket. * * Since streams do buffered io similar to stdio, the caller can specify * how big the send and receive buffers are via the second and third parms; * 0 => use the system default. */ SVCXPRT * svc_vc_create(SVCPOOL *pool, struct socket *so, size_t sendsize, size_t recvsize) { SVCXPRT *xprt; struct sockaddr* sa; int error; SOCK_LOCK(so); if (so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED)) { SOCK_UNLOCK(so); CURVNET_SET(so->so_vnet); error = so->so_proto->pr_peeraddr(so, &sa); CURVNET_RESTORE(); if (error) return (NULL); xprt = svc_vc_create_conn(pool, so, sa); free(sa, M_SONAME); return (xprt); } SOCK_UNLOCK(so); xprt = svc_xprt_alloc(); sx_init(&xprt->xp_lock, "xprt->xp_lock"); xprt->xp_pool = pool; xprt->xp_socket = so; xprt->xp_p1 = NULL; xprt->xp_p2 = NULL; xprt->xp_ops = &svc_vc_rendezvous_ops; CURVNET_SET(so->so_vnet); error = so->so_proto->pr_sockaddr(so, &sa); CURVNET_RESTORE(); if (error) { goto cleanup_svc_vc_create; } memcpy(&xprt->xp_ltaddr, sa, sa->sa_len); free(sa, M_SONAME); xprt_register(xprt); solisten(so, -1, curthread); SOLISTEN_LOCK(so); xprt->xp_upcallset = 1; solisten_upcall_set(so, svc_vc_rendezvous_soupcall, xprt); SOLISTEN_UNLOCK(so); return (xprt); cleanup_svc_vc_create: sx_destroy(&xprt->xp_lock); svc_xprt_free(xprt); return (NULL); } /* * Create a new transport for a socket optained via soaccept(). */ SVCXPRT * svc_vc_create_conn(SVCPOOL *pool, struct socket *so, struct sockaddr *raddr) { SVCXPRT *xprt; struct cf_conn *cd; struct sockaddr* sa = NULL; struct sockopt opt; int one = 1; int error; bzero(&opt, sizeof(struct sockopt)); opt.sopt_dir = SOPT_SET; opt.sopt_level = SOL_SOCKET; opt.sopt_name = SO_KEEPALIVE; opt.sopt_val = &one; opt.sopt_valsize = sizeof(one); error = sosetopt(so, &opt); if (error) { return (NULL); } if (so->so_proto->pr_protocol == IPPROTO_TCP) { bzero(&opt, sizeof(struct sockopt)); opt.sopt_dir = SOPT_SET; opt.sopt_level = IPPROTO_TCP; opt.sopt_name = TCP_NODELAY; opt.sopt_val = &one; opt.sopt_valsize = sizeof(one); error = sosetopt(so, &opt); if (error) { return (NULL); } } cd = mem_alloc(sizeof(*cd)); cd->strm_stat = XPRT_IDLE; xprt = svc_xprt_alloc(); sx_init(&xprt->xp_lock, "xprt->xp_lock"); xprt->xp_pool = pool; xprt->xp_socket = so; xprt->xp_p1 = cd; xprt->xp_p2 = NULL; xprt->xp_ops = &svc_vc_ops; /* * See http://www.connectathon.org/talks96/nfstcp.pdf - client * has a 5 minute timer, server has a 6 minute timer. */ xprt->xp_idletimeout = 6 * 60; memcpy(&xprt->xp_rtaddr, raddr, raddr->sa_len); CURVNET_SET(so->so_vnet); error = so->so_proto->pr_sockaddr(so, &sa); CURVNET_RESTORE(); if (error) goto cleanup_svc_vc_create; memcpy(&xprt->xp_ltaddr, sa, sa->sa_len); free(sa, M_SONAME); xprt_register(xprt); SOCKBUF_LOCK(&so->so_rcv); xprt->xp_upcallset = 1; soupcall_set(so, SO_RCV, svc_vc_soupcall, xprt); SOCKBUF_UNLOCK(&so->so_rcv); /* * Throw the transport into the active list in case it already * has some data buffered. */ sx_xlock(&xprt->xp_lock); xprt_active(xprt); sx_xunlock(&xprt->xp_lock); return (xprt); cleanup_svc_vc_create: sx_destroy(&xprt->xp_lock); svc_xprt_free(xprt); mem_free(cd, sizeof(*cd)); return (NULL); } /* * Create a new transport for a backchannel on a clnt_vc socket. */ SVCXPRT * svc_vc_create_backchannel(SVCPOOL *pool) { SVCXPRT *xprt = NULL; struct cf_conn *cd = NULL; cd = mem_alloc(sizeof(*cd)); cd->strm_stat = XPRT_IDLE; xprt = svc_xprt_alloc(); sx_init(&xprt->xp_lock, "xprt->xp_lock"); xprt->xp_pool = pool; xprt->xp_socket = NULL; xprt->xp_p1 = cd; xprt->xp_p2 = NULL; xprt->xp_ops = &svc_vc_backchannel_ops; return (xprt); } /* * This does all of the accept except the final call to soaccept. The * caller will call soaccept after dropping its locks (soaccept may * call malloc). */ int svc_vc_accept(struct socket *head, struct socket **sop) { struct socket *so; int error = 0; short nbio; KASSERT(SOLISTENING(head), ("%s: socket %p is not listening", __func__, head)); #ifdef MAC error = mac_socket_check_accept(curthread->td_ucred, head); if (error != 0) goto done; #endif /* * XXXGL: we want non-blocking semantics. The socket could be a * socket created by kernel as well as socket shared with userland, * so we can't be sure about presense of SS_NBIO. We also shall not * toggle it on the socket, since that may surprise userland. So we * set SS_NBIO only temporarily. */ SOLISTEN_LOCK(head); nbio = head->so_state & SS_NBIO; head->so_state |= SS_NBIO; error = solisten_dequeue(head, &so, 0); head->so_state &= (nbio & ~SS_NBIO); if (error) goto done; so->so_state |= nbio; *sop = so; /* connection has been removed from the listen queue */ KNOTE_UNLOCKED(&head->so_rdsel.si_note, 0); done: return (error); } /*ARGSUSED*/ static bool_t svc_vc_rendezvous_recv(SVCXPRT *xprt, struct rpc_msg *msg, struct sockaddr **addrp, struct mbuf **mp) { struct socket *so = NULL; - struct sockaddr *sa = NULL; + struct sockaddr_storage ss = { .ss_len = sizeof(ss) }; int error; SVCXPRT *new_xprt; /* * The socket upcall calls xprt_active() which will eventually * cause the server to call us here. We attempt to accept a * connection from the socket and turn it into a new * transport. If the accept fails, we have drained all pending * connections so we call xprt_inactive(). */ sx_xlock(&xprt->xp_lock); error = svc_vc_accept(xprt->xp_socket, &so); if (error == EWOULDBLOCK) { /* * We must re-test for new connections after taking * the lock to protect us in the case where a new * connection arrives after our call to accept fails * with EWOULDBLOCK. */ SOLISTEN_LOCK(xprt->xp_socket); if (TAILQ_EMPTY(&xprt->xp_socket->sol_comp)) xprt_inactive_self(xprt); SOLISTEN_UNLOCK(xprt->xp_socket); sx_xunlock(&xprt->xp_lock); return (FALSE); } if (error) { SOLISTEN_LOCK(xprt->xp_socket); if (xprt->xp_upcallset) { xprt->xp_upcallset = 0; soupcall_clear(xprt->xp_socket, SO_RCV); } SOLISTEN_UNLOCK(xprt->xp_socket); xprt_inactive_self(xprt); sx_xunlock(&xprt->xp_lock); return (FALSE); } sx_xunlock(&xprt->xp_lock); - sa = NULL; - error = soaccept(so, &sa); + error = soaccept(so, (struct sockaddr *)&ss); if (error) { /* * XXX not sure if I need to call sofree or soclose here. */ - if (sa) - free(sa, M_SONAME); return (FALSE); } /* * svc_vc_create_conn will call xprt_register - we don't need * to do anything with the new connection except derefence it. */ - new_xprt = svc_vc_create_conn(xprt->xp_pool, so, sa); + new_xprt = svc_vc_create_conn(xprt->xp_pool, so, + (struct sockaddr *)&ss); if (!new_xprt) { soclose(so); } else { SVC_RELEASE(new_xprt); } - free(sa, M_SONAME); - return (FALSE); /* there is never an rpc msg to be processed */ } /*ARGSUSED*/ static enum xprt_stat svc_vc_rendezvous_stat(SVCXPRT *xprt) { return (XPRT_IDLE); } static void svc_vc_destroy_common(SVCXPRT *xprt) { uint32_t reterr; if (xprt->xp_socket) { if ((xprt->xp_tls & (RPCTLS_FLAGS_HANDSHAKE | RPCTLS_FLAGS_HANDSHFAIL)) != 0) { if ((xprt->xp_tls & RPCTLS_FLAGS_HANDSHAKE) != 0) { /* * If the upcall fails, the socket has * probably been closed via the rpctlssd * daemon having crashed or been * restarted, so just ignore returned stat. */ rpctls_srv_disconnect(xprt->xp_sslsec, xprt->xp_sslusec, xprt->xp_sslrefno, xprt->xp_sslproc, &reterr); } /* Must sorele() to get rid of reference. */ CURVNET_SET(xprt->xp_socket->so_vnet); sorele(xprt->xp_socket); CURVNET_RESTORE(); } else (void)soclose(xprt->xp_socket); } if (xprt->xp_netid) (void) mem_free(xprt->xp_netid, strlen(xprt->xp_netid) + 1); svc_xprt_free(xprt); } static void svc_vc_rendezvous_destroy(SVCXPRT *xprt) { SOLISTEN_LOCK(xprt->xp_socket); if (xprt->xp_upcallset) { xprt->xp_upcallset = 0; solisten_upcall_set(xprt->xp_socket, NULL, NULL); } SOLISTEN_UNLOCK(xprt->xp_socket); svc_vc_destroy_common(xprt); } static void svc_vc_destroy(SVCXPRT *xprt) { struct cf_conn *cd = (struct cf_conn *)xprt->xp_p1; CLIENT *cl = (CLIENT *)xprt->xp_p2; SOCKBUF_LOCK(&xprt->xp_socket->so_rcv); if (xprt->xp_upcallset) { xprt->xp_upcallset = 0; if (xprt->xp_socket->so_rcv.sb_upcall != NULL) soupcall_clear(xprt->xp_socket, SO_RCV); } SOCKBUF_UNLOCK(&xprt->xp_socket->so_rcv); if (cl != NULL) CLNT_RELEASE(cl); svc_vc_destroy_common(xprt); if (cd->mreq) m_freem(cd->mreq); if (cd->mpending) m_freem(cd->mpending); mem_free(cd, sizeof(*cd)); } static void svc_vc_backchannel_destroy(SVCXPRT *xprt) { struct cf_conn *cd = (struct cf_conn *)xprt->xp_p1; struct mbuf *m, *m2; svc_xprt_free(xprt); m = cd->mreq; while (m != NULL) { m2 = m; m = m->m_nextpkt; m_freem(m2); } mem_free(cd, sizeof(*cd)); } /*ARGSUSED*/ static bool_t svc_vc_control(SVCXPRT *xprt, const u_int rq, void *in) { return (FALSE); } static bool_t svc_vc_rendezvous_control(SVCXPRT *xprt, const u_int rq, void *in) { return (FALSE); } static bool_t svc_vc_backchannel_control(SVCXPRT *xprt, const u_int rq, void *in) { return (FALSE); } static enum xprt_stat svc_vc_stat(SVCXPRT *xprt) { struct cf_conn *cd; cd = (struct cf_conn *)(xprt->xp_p1); if (cd->strm_stat == XPRT_DIED) return (XPRT_DIED); if (cd->mreq != NULL && cd->resid == 0 && cd->eor) return (XPRT_MOREREQS); if (soreadable(xprt->xp_socket)) return (XPRT_MOREREQS); return (XPRT_IDLE); } static bool_t svc_vc_ack(SVCXPRT *xprt, uint32_t *ack) { *ack = atomic_load_acq_32(&xprt->xp_snt_cnt); *ack -= sbused(&xprt->xp_socket->so_snd); return (TRUE); } static enum xprt_stat svc_vc_backchannel_stat(SVCXPRT *xprt) { struct cf_conn *cd; cd = (struct cf_conn *)(xprt->xp_p1); if (cd->mreq != NULL) return (XPRT_MOREREQS); return (XPRT_IDLE); } /* * If we have an mbuf chain in cd->mpending, try to parse a record from it, * leaving the result in cd->mreq. If we don't have a complete record, leave * the partial result in cd->mreq and try to read more from the socket. */ static int svc_vc_process_pending(SVCXPRT *xprt) { struct cf_conn *cd = (struct cf_conn *) xprt->xp_p1; struct socket *so = xprt->xp_socket; struct mbuf *m; /* * If cd->resid is non-zero, we have part of the * record already, otherwise we are expecting a record * marker. */ if (!cd->resid && cd->mpending) { /* * See if there is enough data buffered to * make up a record marker. Make sure we can * handle the case where the record marker is * split across more than one mbuf. */ size_t n = 0; uint32_t header; m = cd->mpending; while (n < sizeof(uint32_t) && m) { n += m->m_len; m = m->m_next; } if (n < sizeof(uint32_t)) { so->so_rcv.sb_lowat = sizeof(uint32_t) - n; return (FALSE); } m_copydata(cd->mpending, 0, sizeof(header), (char *)&header); header = ntohl(header); cd->eor = (header & 0x80000000) != 0; cd->resid = header & 0x7fffffff; m_adj(cd->mpending, sizeof(uint32_t)); } /* * Start pulling off mbufs from cd->mpending * until we either have a complete record or * we run out of data. We use m_split to pull * data - it will pull as much as possible and * split the last mbuf if necessary. */ while (cd->mpending && cd->resid) { m = cd->mpending; if (cd->mpending->m_next || cd->mpending->m_len > cd->resid) cd->mpending = m_split(cd->mpending, cd->resid, M_WAITOK); else cd->mpending = NULL; if (cd->mreq) m_last(cd->mreq)->m_next = m; else cd->mreq = m; while (m) { cd->resid -= m->m_len; m = m->m_next; } } /* * Block receive upcalls if we have more data pending, * otherwise report our need. */ if (cd->mpending) so->so_rcv.sb_lowat = INT_MAX; else so->so_rcv.sb_lowat = imax(1, imin(cd->resid, so->so_rcv.sb_hiwat / 2)); return (TRUE); } static bool_t svc_vc_recv(SVCXPRT *xprt, struct rpc_msg *msg, struct sockaddr **addrp, struct mbuf **mp) { struct cf_conn *cd = (struct cf_conn *) xprt->xp_p1; struct uio uio; struct mbuf *m, *ctrl; struct socket* so = xprt->xp_socket; XDR xdrs; int error, rcvflag; uint32_t reterr, xid_plus_direction[2]; struct cmsghdr *cmsg; struct tls_get_record tgr; enum clnt_stat ret; /* * Serialise access to the socket and our own record parsing * state. */ sx_xlock(&xprt->xp_lock); for (;;) { /* If we have no request ready, check pending queue. */ while (cd->mpending && (cd->mreq == NULL || cd->resid != 0 || !cd->eor)) { if (!svc_vc_process_pending(xprt)) break; } /* Process and return complete request in cd->mreq. */ if (cd->mreq != NULL && cd->resid == 0 && cd->eor) { /* * Now, check for a backchannel reply. * The XID is in the first uint32_t of the reply * and the message direction is the second one. */ if ((cd->mreq->m_len >= sizeof(xid_plus_direction) || m_length(cd->mreq, NULL) >= sizeof(xid_plus_direction)) && xprt->xp_p2 != NULL) { m_copydata(cd->mreq, 0, sizeof(xid_plus_direction), (char *)xid_plus_direction); xid_plus_direction[0] = ntohl(xid_plus_direction[0]); xid_plus_direction[1] = ntohl(xid_plus_direction[1]); /* Check message direction. */ if (xid_plus_direction[1] == REPLY) { clnt_bck_svccall(xprt->xp_p2, cd->mreq, xid_plus_direction[0]); cd->mreq = NULL; continue; } } xdrmbuf_create(&xdrs, cd->mreq, XDR_DECODE); cd->mreq = NULL; /* Check for next request in a pending queue. */ svc_vc_process_pending(xprt); if (cd->mreq == NULL || cd->resid != 0) { SOCKBUF_LOCK(&so->so_rcv); if (!soreadable(so)) xprt_inactive_self(xprt); SOCKBUF_UNLOCK(&so->so_rcv); } sx_xunlock(&xprt->xp_lock); if (! xdr_callmsg(&xdrs, msg)) { XDR_DESTROY(&xdrs); return (FALSE); } *addrp = NULL; *mp = xdrmbuf_getall(&xdrs); XDR_DESTROY(&xdrs); return (TRUE); } /* * If receiving is disabled so that a TLS handshake can be * done by the rpctlssd daemon, return FALSE here. */ rcvflag = MSG_DONTWAIT; if ((xprt->xp_tls & RPCTLS_FLAGS_HANDSHAKE) != 0) rcvflag |= MSG_TLSAPPDATA; tryagain: if (xprt->xp_dontrcv) { sx_xunlock(&xprt->xp_lock); return (FALSE); } /* * The socket upcall calls xprt_active() which will eventually * cause the server to call us here. We attempt to * read as much as possible from the socket and put * the result in cd->mpending. If the read fails, * we have drained both cd->mpending and the socket so * we can call xprt_inactive(). */ uio.uio_resid = 1000000000; uio.uio_td = curthread; ctrl = m = NULL; error = soreceive(so, NULL, &uio, &m, &ctrl, &rcvflag); if (error == EWOULDBLOCK) { /* * We must re-test for readability after * taking the lock to protect us in the case * where a new packet arrives on the socket * after our call to soreceive fails with * EWOULDBLOCK. */ SOCKBUF_LOCK(&so->so_rcv); if (!soreadable(so)) xprt_inactive_self(xprt); SOCKBUF_UNLOCK(&so->so_rcv); sx_xunlock(&xprt->xp_lock); return (FALSE); } /* * A return of ENXIO indicates that there is an * alert record at the head of the * socket's receive queue, for TLS connections. * This record needs to be handled in userland * via an SSL_read() call, so do an upcall to the daemon. */ KRPC_CURVNET_SET(so->so_vnet); if ((xprt->xp_tls & RPCTLS_FLAGS_HANDSHAKE) != 0 && error == ENXIO) { KRPC_VNET(svc_vc_tls_alerts)++; KRPC_CURVNET_RESTORE(); /* Disable reception. */ xprt->xp_dontrcv = TRUE; sx_xunlock(&xprt->xp_lock); ret = rpctls_srv_handlerecord(xprt->xp_sslsec, xprt->xp_sslusec, xprt->xp_sslrefno, xprt->xp_sslproc, &reterr); sx_xlock(&xprt->xp_lock); xprt->xp_dontrcv = FALSE; if (ret != RPC_SUCCESS || reterr != RPCTLSERR_OK) { /* * All we can do is soreceive() it and * then toss it. */ rcvflag = MSG_DONTWAIT; goto tryagain; } sx_xunlock(&xprt->xp_lock); xprt_active(xprt); /* Harmless if already active. */ return (FALSE); } if (error) { KRPC_CURVNET_RESTORE(); SOCKBUF_LOCK(&so->so_rcv); if (xprt->xp_upcallset) { xprt->xp_upcallset = 0; soupcall_clear(so, SO_RCV); } SOCKBUF_UNLOCK(&so->so_rcv); xprt_inactive_self(xprt); cd->strm_stat = XPRT_DIED; sx_xunlock(&xprt->xp_lock); return (FALSE); } if (!m) { KRPC_CURVNET_RESTORE(); /* * EOF - the other end has closed the socket. */ xprt_inactive_self(xprt); cd->strm_stat = XPRT_DIED; sx_xunlock(&xprt->xp_lock); return (FALSE); } /* Process any record header(s). */ if (ctrl != NULL) { cmsg = mtod(ctrl, struct cmsghdr *); if (cmsg->cmsg_type == TLS_GET_RECORD && cmsg->cmsg_len == CMSG_LEN(sizeof(tgr))) { memcpy(&tgr, CMSG_DATA(cmsg), sizeof(tgr)); /* * TLS_RLTYPE_ALERT records should be handled * since soreceive() would have returned * ENXIO. Just throw any other * non-TLS_RLTYPE_APP records away. */ if (tgr.tls_type != TLS_RLTYPE_APP) { m_freem(m); m_free(ctrl); rcvflag = MSG_DONTWAIT | MSG_TLSAPPDATA; KRPC_CURVNET_RESTORE(); goto tryagain; } KRPC_VNET(svc_vc_tls_rx_msgcnt)++; KRPC_VNET(svc_vc_tls_rx_msgbytes) += 1000000000 - uio.uio_resid; } m_free(ctrl); } else { KRPC_VNET(svc_vc_rx_msgcnt)++; KRPC_VNET(svc_vc_rx_msgbytes) += 1000000000 - uio.uio_resid; } KRPC_CURVNET_RESTORE(); if (cd->mpending) m_last(cd->mpending)->m_next = m; else cd->mpending = m; } } static bool_t svc_vc_backchannel_recv(SVCXPRT *xprt, struct rpc_msg *msg, struct sockaddr **addrp, struct mbuf **mp) { struct cf_conn *cd = (struct cf_conn *) xprt->xp_p1; struct ct_data *ct; struct mbuf *m; XDR xdrs; sx_xlock(&xprt->xp_lock); ct = (struct ct_data *)xprt->xp_p2; if (ct == NULL) { sx_xunlock(&xprt->xp_lock); return (FALSE); } mtx_lock(&ct->ct_lock); m = cd->mreq; if (m == NULL) { xprt_inactive_self(xprt); mtx_unlock(&ct->ct_lock); sx_xunlock(&xprt->xp_lock); return (FALSE); } cd->mreq = m->m_nextpkt; mtx_unlock(&ct->ct_lock); sx_xunlock(&xprt->xp_lock); xdrmbuf_create(&xdrs, m, XDR_DECODE); if (! xdr_callmsg(&xdrs, msg)) { XDR_DESTROY(&xdrs); return (FALSE); } *addrp = NULL; *mp = xdrmbuf_getall(&xdrs); XDR_DESTROY(&xdrs); return (TRUE); } static bool_t svc_vc_reply(SVCXPRT *xprt, struct rpc_msg *msg, struct sockaddr *addr, struct mbuf *m, uint32_t *seq) { XDR xdrs; struct mbuf *mrep; bool_t stat = TRUE; int error, len, maxextsiz; #ifdef KERN_TLS u_int maxlen; #endif /* * Leave space for record mark. */ mrep = m_gethdr(M_WAITOK, MT_DATA); mrep->m_data += sizeof(uint32_t); xdrmbuf_create(&xdrs, mrep, XDR_ENCODE); if (msg->rm_reply.rp_stat == MSG_ACCEPTED && msg->rm_reply.rp_acpt.ar_stat == SUCCESS) { if (!xdr_replymsg(&xdrs, msg)) stat = FALSE; else xdrmbuf_append(&xdrs, m); } else { stat = xdr_replymsg(&xdrs, msg); } if (stat) { m_fixhdr(mrep); /* * Prepend a record marker containing the reply length. */ M_PREPEND(mrep, sizeof(uint32_t), M_WAITOK); len = mrep->m_pkthdr.len; *mtod(mrep, uint32_t *) = htonl(0x80000000 | (len - sizeof(uint32_t))); /* For RPC-over-TLS, copy mrep to a chain of ext_pgs. */ KRPC_CURVNET_SET(xprt->xp_socket->so_vnet); if ((xprt->xp_tls & RPCTLS_FLAGS_HANDSHAKE) != 0) { /* * Copy the mbuf chain to a chain of * ext_pgs mbuf(s) as required by KERN_TLS. */ maxextsiz = TLS_MAX_MSG_SIZE_V10_2; #ifdef KERN_TLS if (rpctls_getinfo(&maxlen, false, false)) maxextsiz = min(maxextsiz, maxlen); #endif mrep = _rpc_copym_into_ext_pgs(mrep, maxextsiz); KRPC_VNET(svc_vc_tls_tx_msgcnt)++; KRPC_VNET(svc_vc_tls_tx_msgbytes) += len; } else { KRPC_VNET(svc_vc_tx_msgcnt)++; KRPC_VNET(svc_vc_tx_msgbytes) += len; } KRPC_CURVNET_RESTORE(); atomic_add_32(&xprt->xp_snd_cnt, len); /* * sosend consumes mreq. */ error = sosend(xprt->xp_socket, NULL, NULL, mrep, NULL, 0, curthread); if (!error) { atomic_add_rel_32(&xprt->xp_snt_cnt, len); if (seq) *seq = xprt->xp_snd_cnt; stat = TRUE; } else atomic_subtract_32(&xprt->xp_snd_cnt, len); } else { m_freem(mrep); } XDR_DESTROY(&xdrs); return (stat); } static bool_t svc_vc_backchannel_reply(SVCXPRT *xprt, struct rpc_msg *msg, struct sockaddr *addr, struct mbuf *m, uint32_t *seq) { struct ct_data *ct; XDR xdrs; struct mbuf *mrep; bool_t stat = TRUE; int error, maxextsiz; #ifdef KERN_TLS u_int maxlen; #endif /* * Leave space for record mark. */ mrep = m_gethdr(M_WAITOK, MT_DATA); mrep->m_data += sizeof(uint32_t); xdrmbuf_create(&xdrs, mrep, XDR_ENCODE); if (msg->rm_reply.rp_stat == MSG_ACCEPTED && msg->rm_reply.rp_acpt.ar_stat == SUCCESS) { if (!xdr_replymsg(&xdrs, msg)) stat = FALSE; else xdrmbuf_append(&xdrs, m); } else { stat = xdr_replymsg(&xdrs, msg); } if (stat) { m_fixhdr(mrep); /* * Prepend a record marker containing the reply length. */ M_PREPEND(mrep, sizeof(uint32_t), M_WAITOK); *mtod(mrep, uint32_t *) = htonl(0x80000000 | (mrep->m_pkthdr.len - sizeof(uint32_t))); /* For RPC-over-TLS, copy mrep to a chain of ext_pgs. */ if ((xprt->xp_tls & RPCTLS_FLAGS_HANDSHAKE) != 0) { /* * Copy the mbuf chain to a chain of * ext_pgs mbuf(s) as required by KERN_TLS. */ maxextsiz = TLS_MAX_MSG_SIZE_V10_2; #ifdef KERN_TLS if (rpctls_getinfo(&maxlen, false, false)) maxextsiz = min(maxextsiz, maxlen); #endif mrep = _rpc_copym_into_ext_pgs(mrep, maxextsiz); } sx_xlock(&xprt->xp_lock); ct = (struct ct_data *)xprt->xp_p2; if (ct != NULL) error = sosend(ct->ct_socket, NULL, NULL, mrep, NULL, 0, curthread); else error = EPIPE; sx_xunlock(&xprt->xp_lock); if (!error) { stat = TRUE; } } else { m_freem(mrep); } XDR_DESTROY(&xdrs); return (stat); } static bool_t svc_vc_null(void) { return (FALSE); } static int svc_vc_soupcall(struct socket *so, void *arg, int waitflag) { SVCXPRT *xprt = (SVCXPRT *) arg; if (soreadable(xprt->xp_socket)) xprt_active(xprt); return (SU_OK); } static int svc_vc_rendezvous_soupcall(struct socket *head, void *arg, int waitflag) { SVCXPRT *xprt = (SVCXPRT *) arg; if (!TAILQ_EMPTY(&head->sol_comp)) xprt_active(xprt); return (SU_OK); } #if 0 /* * Get the effective UID of the sending process. Used by rpcbind, keyserv * and rpc.yppasswdd on AF_LOCAL. */ int __rpc_get_local_uid(SVCXPRT *transp, uid_t *uid) { int sock, ret; gid_t egid; uid_t euid; struct sockaddr *sa; sock = transp->xp_fd; sa = (struct sockaddr *)transp->xp_rtaddr; if (sa->sa_family == AF_LOCAL) { ret = getpeereid(sock, &euid, &egid); if (ret == 0) *uid = euid; return (ret); } else return (-1); } #endif diff --git a/sys/sys/protosw.h b/sys/sys/protosw.h index 469e845b64b9..629e7570cb7e 100644 --- a/sys/sys/protosw.h +++ b/sys/sys/protosw.h @@ -1,204 +1,204 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _SYS_PROTOSW_H_ #define _SYS_PROTOSW_H_ #include /* Forward declare these structures referenced from prototypes below. */ struct kaiocb; struct mbuf; struct thread; struct sockaddr; struct socket; struct sockopt; /*#ifdef _KERNEL*/ /* * Protocol switch table. * * Each protocol has a handle initializing one of these structures, * which is used for protocol-protocol and system-protocol communication. * * In retrospect, it would be a lot nicer to use an interface * similar to the vnode VOP interface. */ struct ifnet; struct stat; struct ucred; struct uio; /* USE THESE FOR YOUR PROTOTYPES ! */ typedef int pr_ctloutput_t(struct socket *, struct sockopt *); typedef int pr_setsbopt_t(struct socket *, struct sockopt *); typedef void pr_abort_t(struct socket *); -typedef int pr_accept_t(struct socket *, struct sockaddr **); +typedef int pr_accept_t(struct socket *, struct sockaddr *); typedef int pr_attach_t(struct socket *, int, struct thread *); typedef int pr_bind_t(struct socket *, struct sockaddr *, struct thread *); typedef int pr_connect_t(struct socket *, struct sockaddr *, struct thread *); typedef int pr_connect2_t(struct socket *, struct socket *); typedef int pr_control_t(struct socket *, unsigned long, void *, struct ifnet *, struct thread *); typedef void pr_detach_t(struct socket *); typedef int pr_disconnect_t(struct socket *); typedef int pr_listen_t(struct socket *, int, struct thread *); typedef int pr_peeraddr_t(struct socket *, struct sockaddr **); typedef int pr_rcvd_t(struct socket *, int); typedef int pr_rcvoob_t(struct socket *, struct mbuf *, int); typedef enum { PRUS_OOB = 0x1, PRUS_EOF = 0x2, PRUS_MORETOCOME = 0x4, PRUS_NOTREADY = 0x8, PRUS_IPV6 = 0x10, } pr_send_flags_t; typedef int pr_send_t(struct socket *, int, struct mbuf *, struct sockaddr *, struct mbuf *, struct thread *); typedef int pr_ready_t(struct socket *, struct mbuf *, int); typedef int pr_sense_t(struct socket *, struct stat *); typedef int pr_shutdown_t(struct socket *); typedef int pr_flush_t(struct socket *, int); typedef int pr_sockaddr_t(struct socket *, struct sockaddr **); typedef int pr_sosend_t(struct socket *, struct sockaddr *, struct uio *, struct mbuf *, struct mbuf *, int, struct thread *); typedef int pr_soreceive_t(struct socket *, struct sockaddr **, struct uio *, struct mbuf **, struct mbuf **, int *); typedef int pr_sopoll_t(struct socket *, int, struct ucred *, struct thread *); typedef void pr_sosetlabel_t(struct socket *); typedef void pr_close_t(struct socket *); typedef int pr_bindat_t(int, struct socket *, struct sockaddr *, struct thread *); typedef int pr_connectat_t(int, struct socket *, struct sockaddr *, struct thread *); typedef int pr_aio_queue_t(struct socket *, struct kaiocb *); struct protosw { short pr_type; /* socket type used for */ short pr_protocol; /* protocol number */ short pr_flags; /* see below */ short pr_unused; struct domain *pr_domain; /* domain protocol a member of */ pr_soreceive_t *pr_soreceive; /* recv(2) */ pr_rcvd_t *pr_rcvd; /* soreceive_generic() if PR_WANTRCVD */ pr_sosend_t *pr_sosend; /* send(2) */ pr_send_t *pr_send; /* send(2) via sosend_generic() */ pr_ready_t *pr_ready; /* sendfile/ktls readyness */ pr_sopoll_t *pr_sopoll; /* poll(2) */ /* Cache line #2 */ pr_attach_t *pr_attach; /* creation: socreate(), sonewconn() */ pr_detach_t *pr_detach; /* destruction: sofree() */ pr_connect_t *pr_connect; /* connect(2) */ pr_disconnect_t *pr_disconnect; /* sodisconnect() */ pr_close_t *pr_close; /* close(2) */ pr_shutdown_t *pr_shutdown; /* shutdown(2) */ pr_abort_t *pr_abort; /* abrupt tear down: soabort() */ pr_aio_queue_t *pr_aio_queue; /* aio(9) */ /* Cache line #3 */ pr_bind_t *pr_bind; /* bind(2) */ pr_bindat_t *pr_bindat; /* bindat(2) */ pr_listen_t *pr_listen; /* listen(2) */ pr_accept_t *pr_accept; /* accept(2) */ pr_connectat_t *pr_connectat; /* connectat(2) */ pr_connect2_t *pr_connect2; /* socketpair(2) */ pr_control_t *pr_control; /* ioctl(2) */ pr_rcvoob_t *pr_rcvoob; /* soreceive_rcvoob() */ /* Cache line #4 */ pr_ctloutput_t *pr_ctloutput; /* control output (from above) */ pr_peeraddr_t *pr_peeraddr; /* getpeername(2) */ pr_sockaddr_t *pr_sockaddr; /* getsockname(2) */ pr_sense_t *pr_sense; /* stat(2) */ pr_flush_t *pr_flush; /* XXXGL: merge with pr_shutdown_t! */ pr_sosetlabel_t *pr_sosetlabel; /* MAC, XXXGL: remove */ pr_setsbopt_t *pr_setsbopt; /* Socket buffer ioctls */ }; /*#endif*/ /* * Values for pr_flags. * PR_ADDR requires PR_ATOMIC; * PR_ADDR and PR_CONNREQUIRED are mutually exclusive. * PR_IMPLOPCL means that the protocol allows sendto without prior connect, * and the protocol understands the MSG_EOF flag. The first property is * is only relevant if PR_CONNREQUIRED is set (otherwise sendto is allowed * anyhow). * PR_SOCKBUF requires protocol to initialize and destroy its socket buffers * in its pr_attach and pr_detach. */ #define PR_ATOMIC 0x01 /* exchange atomic messages only */ #define PR_ADDR 0x02 /* addresses given with messages */ #define PR_CONNREQUIRED 0x04 /* connection required by protocol */ #define PR_WANTRCVD 0x08 /* want PRU_RCVD calls */ #define PR_RIGHTS 0x10 /* passes capabilities */ #define PR_IMPLOPCL 0x20 /* implied open/close */ /* was PR_LASTHDR 0x40 enforce ipsec policy; last header */ #define PR_CAPATTACH 0x80 /* socket can attach in cap mode */ #define PR_SOCKBUF 0x100 /* private implementation of buffers */ /* * The arguments to ctloutput are: * (*protosw[].pr_ctloutput)(req, so, level, optname, optval, p); * req is one of the actions listed below, so is a (struct socket *), * level is an indication of which protocol layer the option is intended. * optname is a protocol dependent socket option request, * optval is a pointer to a mbuf-chain pointer, for value-return results. * The protocol is responsible for disposal of the mbuf chain *optval * if supplied, * the caller is responsible for any space held by *optval, when returned. * A non-zero return from ctloutput gives an * UNIX error number which should be passed to higher level software. */ #define PRCO_GETOPT 0 #define PRCO_SETOPT 1 #define PRCO_NCMDS 2 #ifdef PRCOREQUESTS char *prcorequests[] = { "GETOPT", "SETOPT", }; #endif #ifdef _KERNEL struct domain *pffinddomain(int family); struct protosw *pffindproto(int family, int type, int proto); int protosw_register(struct domain *, struct protosw *); int protosw_unregister(struct protosw *); /* Domains that are known to be avaliable for protosw_register(). */ extern struct domain inetdomain; extern struct domain inet6domain; #endif #endif diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h index 52523b996863..005938aee08a 100644 --- a/sys/sys/socketvar.h +++ b/sys/sys/socketvar.h @@ -1,599 +1,599 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _SYS_SOCKETVAR_H_ #define _SYS_SOCKETVAR_H_ /* * Socket generation count type. Also used in xinpcb, xtcpcb, xunpcb. */ typedef uint64_t so_gen_t; #if defined(_KERNEL) || defined(_WANT_SOCKET) #include /* for TAILQ macros */ #include /* for struct selinfo */ #include #include #include #include #include #ifdef _KERNEL #include #include #endif struct vnet; /* * Kernel structure per socket. * Contains send and receive buffer queues, * handle on protocol and pointer to protocol * private data and error information. */ typedef int so_upcall_t(struct socket *, void *, int); typedef void so_dtor_t(struct socket *); struct socket; enum socket_qstate { SQ_NONE = 0, SQ_INCOMP = 0x0800, /* on sol_incomp */ SQ_COMP = 0x1000, /* on sol_comp */ }; /*- * Locking key to struct socket: * (a) constant after allocation, no locking required. * (b) locked by SOCK_LOCK(so). * (cr) locked by SOCK_RECVBUF_LOCK(so) * (cs) locked by SOCK_SENDBUF_LOCK(so) * (e) locked by SOLISTEN_LOCK() of corresponding listening socket. * (f) not locked since integer reads/writes are atomic. * (g) used only as a sleep/wakeup address, no value. * (h) locked by global mutex so_global_mtx. * (k) locked by KTLS workqueue mutex */ TAILQ_HEAD(accept_queue, socket); struct socket { struct mtx so_lock; volatile u_int so_count; /* (b / refcount) */ struct selinfo so_rdsel; /* (b/cr) for so_rcv/so_comp */ struct selinfo so_wrsel; /* (b/cs) for so_snd */ int so_options; /* (b) from socket call, see socket.h */ short so_type; /* (a) generic type, see socket.h */ short so_state; /* (b) internal state flags SS_* */ void *so_pcb; /* protocol control block */ struct vnet *so_vnet; /* (a) network stack instance */ struct protosw *so_proto; /* (a) protocol handle */ short so_linger; /* time to linger close(2) */ short so_timeo; /* (g) connection timeout */ u_short so_error; /* (f) error affecting connection */ u_short so_rerror; /* (f) error affecting connection */ struct sigio *so_sigio; /* [sg] information for async I/O or out of band data (SIGURG) */ struct ucred *so_cred; /* (a) user credentials */ struct label *so_label; /* (b) MAC label for socket */ /* NB: generation count must not be first. */ so_gen_t so_gencnt; /* (h) generation count */ void *so_emuldata; /* (b) private data for emulators */ so_dtor_t *so_dtor; /* (b) optional destructor */ struct osd osd; /* Object Specific extensions */ /* * so_fibnum, so_user_cookie and friends can be used to attach * some user-specified metadata to a socket, which then can be * used by the kernel for various actions. * so_user_cookie is used by ipfw/dummynet. */ int so_fibnum; /* routing domain for this socket */ uint32_t so_user_cookie; int so_ts_clock; /* type of the clock used for timestamps */ uint32_t so_max_pacing_rate; /* (f) TX rate limit in bytes/s */ /* * Mutexes to prevent interleaving of socket I/O. These have to be * outside of the socket buffers in order to interlock with listen(2). */ struct sx so_snd_sx __aligned(CACHE_LINE_SIZE); struct mtx so_snd_mtx; struct sx so_rcv_sx __aligned(CACHE_LINE_SIZE); struct mtx so_rcv_mtx; union { /* Regular (data flow) socket. */ struct { /* (cr, cs) Receive and send buffers. */ struct sockbuf so_rcv, so_snd; /* (e) Our place on accept queue. */ TAILQ_ENTRY(socket) so_list; struct socket *so_listen; /* (b) */ enum socket_qstate so_qstate; /* (b) */ /* (b) cached MAC label for peer */ struct label *so_peerlabel; u_long so_oobmark; /* chars to oob mark */ /* (k) Our place on KTLS RX work queue. */ STAILQ_ENTRY(socket) so_ktls_rx_list; }; /* * Listening socket, where accepts occur, is so_listen in all * subsidiary sockets. If so_listen is NULL, socket is not * related to an accept. For a listening socket itself * sol_incomp queues partially completed connections, while * sol_comp is a queue of connections ready to be accepted. * If a connection is aborted and it has so_listen set, then * it has to be pulled out of either sol_incomp or sol_comp. * We allow connections to queue up based on current queue * lengths and limit on number of queued connections for this * socket. */ struct { /* (e) queue of partial unaccepted connections */ struct accept_queue sol_incomp; /* (e) queue of complete unaccepted connections */ struct accept_queue sol_comp; u_int sol_qlen; /* (e) sol_comp length */ u_int sol_incqlen; /* (e) sol_incomp length */ u_int sol_qlimit; /* (e) queue limit */ /* accept_filter(9) optional data */ struct accept_filter *sol_accept_filter; void *sol_accept_filter_arg; /* saved filter args */ char *sol_accept_filter_str; /* saved user args */ /* Optional upcall, for kernel socket. */ so_upcall_t *sol_upcall; /* (e) */ void *sol_upcallarg; /* (e) */ /* Socket buffer parameters, to be copied to * dataflow sockets, accepted from this one. */ int sol_sbrcv_lowat; int sol_sbsnd_lowat; u_int sol_sbrcv_hiwat; u_int sol_sbsnd_hiwat; short sol_sbrcv_flags; short sol_sbsnd_flags; sbintime_t sol_sbrcv_timeo; sbintime_t sol_sbsnd_timeo; /* Information tracking listen queue overflows. */ struct timeval sol_lastover; /* (e) */ int sol_overcount; /* (e) */ }; }; }; #endif /* defined(_KERNEL) || defined(_WANT_SOCKET) */ /* * Socket state bits. * * Historically, these bits were all kept in the so_state field. * They are now split into separate, lock-specific fields. * so_state maintains basic socket state protected by the socket lock. * so_qstate holds information about the socket accept queues. * Each socket buffer also has a state field holding information * relevant to that socket buffer (can't send, rcv). * Many fields will be read without locks to improve performance and avoid * lock order issues. However, this approach must be used with caution. */ #define SS_ISCONNECTED 0x0002 /* socket connected to a peer */ #define SS_ISCONNECTING 0x0004 /* in process of connecting to peer */ #define SS_ISDISCONNECTING 0x0008 /* in process of disconnecting */ #define SS_NBIO 0x0100 /* non-blocking ops */ #define SS_ASYNC 0x0200 /* async i/o notify */ #define SS_ISCONFIRMING 0x0400 /* deciding to accept connection req */ #define SS_ISDISCONNECTED 0x2000 /* socket disconnected from peer */ #ifdef _KERNEL #define SOCK_MTX(so) (&(so)->so_lock) #define SOCK_LOCK(so) mtx_lock(&(so)->so_lock) #define SOCK_OWNED(so) mtx_owned(&(so)->so_lock) #define SOCK_UNLOCK(so) mtx_unlock(&(so)->so_lock) #define SOCK_LOCK_ASSERT(so) mtx_assert(&(so)->so_lock, MA_OWNED) #define SOCK_UNLOCK_ASSERT(so) mtx_assert(&(so)->so_lock, MA_NOTOWNED) #define SOLISTENING(sol) (((sol)->so_options & SO_ACCEPTCONN) != 0) #define SOLISTEN_LOCK(sol) do { \ mtx_lock(&(sol)->so_lock); \ KASSERT(SOLISTENING(sol), \ ("%s: %p not listening", __func__, (sol))); \ } while (0) #define SOLISTEN_TRYLOCK(sol) mtx_trylock(&(sol)->so_lock) #define SOLISTEN_UNLOCK(sol) do { \ KASSERT(SOLISTENING(sol), \ ("%s: %p not listening", __func__, (sol))); \ mtx_unlock(&(sol)->so_lock); \ } while (0) #define SOLISTEN_LOCK_ASSERT(sol) do { \ mtx_assert(&(sol)->so_lock, MA_OWNED); \ KASSERT(SOLISTENING(sol), \ ("%s: %p not listening", __func__, (sol))); \ } while (0) #define SOLISTEN_UNLOCK_ASSERT(sol) do { \ mtx_assert(&(sol)->so_lock, MA_NOTOWNED); \ KASSERT(SOLISTENING(sol), \ ("%s: %p not listening", __func__, (sol))); \ } while (0) /* * Socket buffer locks. These are strongly preferred over SOCKBUF_LOCK(sb) * macros, as we are moving towards protocol specific socket buffers. */ #define SOCK_RECVBUF_MTX(so) \ (&(so)->so_rcv_mtx) #define SOCK_RECVBUF_LOCK(so) \ mtx_lock(SOCK_RECVBUF_MTX(so)) #define SOCK_RECVBUF_UNLOCK(so) \ mtx_unlock(SOCK_RECVBUF_MTX(so)) #define SOCK_RECVBUF_LOCK_ASSERT(so) \ mtx_assert(SOCK_RECVBUF_MTX(so), MA_OWNED) #define SOCK_RECVBUF_UNLOCK_ASSERT(so) \ mtx_assert(SOCK_RECVBUF_MTX(so), MA_NOTOWNED) #define SOCK_SENDBUF_MTX(so) \ (&(so)->so_snd_mtx) #define SOCK_SENDBUF_LOCK(so) \ mtx_lock(SOCK_SENDBUF_MTX(so)) #define SOCK_SENDBUF_UNLOCK(so) \ mtx_unlock(SOCK_SENDBUF_MTX(so)) #define SOCK_SENDBUF_LOCK_ASSERT(so) \ mtx_assert(SOCK_SENDBUF_MTX(so), MA_OWNED) #define SOCK_SENDBUF_UNLOCK_ASSERT(so) \ mtx_assert(SOCK_SENDBUF_MTX(so), MA_NOTOWNED) #define SOCK_BUF_LOCK(so, which) \ mtx_lock(soeventmtx(so, which)) #define SOCK_BUF_UNLOCK(so, which) \ mtx_unlock(soeventmtx(so, which)) #define SOCK_BUF_LOCK_ASSERT(so, which) \ mtx_assert(soeventmtx(so, which), MA_OWNED) #define SOCK_BUF_UNLOCK_ASSERT(so, which) \ mtx_assert(soeventmtx(so, which), MA_NOTOWNED) static inline struct sockbuf * sobuf(struct socket *so, const sb_which which) { return (which == SO_RCV ? &so->so_rcv : &so->so_snd); } static inline struct mtx * soeventmtx(struct socket *so, const sb_which which) { return (which == SO_RCV ? SOCK_RECVBUF_MTX(so) : SOCK_SENDBUF_MTX(so)); } /* * Macros for sockets and socket buffering. */ /* * Flags to soiolock(). */ #define SBL_WAIT 0x00000001 /* Wait if not immediately available. */ #define SBL_NOINTR 0x00000002 /* Force non-interruptible sleep. */ #define SBL_VALID (SBL_WAIT | SBL_NOINTR) #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT) #define SOCK_IO_SEND_LOCK(so, flags) \ soiolock((so), &(so)->so_snd_sx, (flags)) #define SOCK_IO_SEND_UNLOCK(so) \ soiounlock(&(so)->so_snd_sx) #define SOCK_IO_SEND_OWNED(so) sx_xlocked(&(so)->so_snd_sx) #define SOCK_IO_RECV_LOCK(so, flags) \ soiolock((so), &(so)->so_rcv_sx, (flags)) #define SOCK_IO_RECV_UNLOCK(so) \ soiounlock(&(so)->so_rcv_sx) #define SOCK_IO_RECV_OWNED(so) sx_xlocked(&(so)->so_rcv_sx) /* do we have to send all at once on a socket? */ #define sosendallatonce(so) \ ((so)->so_proto->pr_flags & PR_ATOMIC) /* can we read something from so? */ #define soreadabledata(so) \ (sbavail(&(so)->so_rcv) >= (so)->so_rcv.sb_lowat || \ (so)->so_error || (so)->so_rerror) #define soreadable(so) \ (soreadabledata(so) || ((so)->so_rcv.sb_state & SBS_CANTRCVMORE)) /* can we write something to so? */ #define sowriteable(so) \ ((sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat && \ (((so)->so_state&SS_ISCONNECTED) || \ ((so)->so_proto->pr_flags&PR_CONNREQUIRED)==0)) || \ ((so)->so_snd.sb_state & SBS_CANTSENDMORE) || \ (so)->so_error) /* * soref()/sorele() ref-count the socket structure. * soref() may be called without owning socket lock, but in that case a * caller must own something that holds socket, and so_count must be not 0. * Note that you must still explicitly close the socket, but the last ref * count will free the structure. */ #define soref(so) refcount_acquire(&(so)->so_count) #define sorele(so) do { \ SOCK_UNLOCK_ASSERT(so); \ if (!refcount_release_if_not_last(&(so)->so_count)) { \ SOCK_LOCK(so); \ sorele_locked(so); \ } \ } while (0) /* * In sorwakeup() and sowwakeup(), acquire the socket buffer lock to * avoid a non-atomic test-and-wakeup. However, sowakeup is * responsible for releasing the lock if it is called. We unlock only * if we don't call into sowakeup. If any code is introduced that * directly invokes the underlying sowakeup() primitives, it must * maintain the same semantics. */ #define sorwakeup(so) do { \ SOCK_RECVBUF_LOCK(so); \ sorwakeup_locked(so); \ } while (0) #define sowwakeup(so) do { \ SOCK_SENDBUF_LOCK(so); \ sowwakeup_locked(so); \ } while (0) struct accept_filter { char accf_name[16]; int (*accf_callback) (struct socket *so, void *arg, int waitflag); void * (*accf_create) (struct socket *so, char *arg); void (*accf_destroy) (struct socket *so); SLIST_ENTRY(accept_filter) accf_next; }; #define ACCEPT_FILTER_DEFINE(modname, filtname, cb, create, destroy, ver) \ static struct accept_filter modname##_filter = { \ .accf_name = filtname, \ .accf_callback = cb, \ .accf_create = create, \ .accf_destroy = destroy, \ }; \ static moduledata_t modname##_mod = { \ .name = __XSTRING(modname), \ .evhand = accept_filt_generic_mod_event, \ .priv = &modname##_filter, \ }; \ DECLARE_MODULE(modname, modname##_mod, SI_SUB_DRIVERS, \ SI_ORDER_MIDDLE); \ MODULE_VERSION(modname, ver) #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_ACCF); MALLOC_DECLARE(M_PCB); MALLOC_DECLARE(M_SONAME); #endif /* * Socket specific helper hook point identifiers * Do not leave holes in the sequence, hook registration is a loop. */ #define HHOOK_SOCKET_OPT 0 #define HHOOK_SOCKET_CREATE 1 #define HHOOK_SOCKET_RCV 2 #define HHOOK_SOCKET_SND 3 #define HHOOK_FILT_SOREAD 4 #define HHOOK_FILT_SOWRITE 5 #define HHOOK_SOCKET_CLOSE 6 #define HHOOK_SOCKET_LAST HHOOK_SOCKET_CLOSE struct socket_hhook_data { struct socket *so; struct mbuf *m; void *hctx; /* hook point specific data*/ int status; }; extern int maxsockets; extern u_long sb_max; extern so_gen_t so_gencnt; struct file; struct filecaps; struct filedesc; struct mbuf; struct sockaddr; struct ucred; struct uio; /* Return values for socket upcalls. */ #define SU_OK 0 #define SU_ISCONNECTED 1 /* * From uipc_socket and friends */ int getsockaddr(struct sockaddr **namp, const struct sockaddr *uaddr, size_t len); int getsock_cap(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp, struct filecaps *havecaps); int getsock(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp); void soabort(struct socket *so); -int soaccept(struct socket *so, struct sockaddr **nam); +int soaccept(struct socket *so, struct sockaddr *sa); void soaio_enqueue(struct task *task); void soaio_rcv(void *context, int pending); void soaio_snd(void *context, int pending); int socheckuid(struct socket *so, uid_t uid); int sobind(struct socket *so, struct sockaddr *nam, struct thread *td); int sobindat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td); int soclose(struct socket *so); int soconnect(struct socket *so, struct sockaddr *nam, struct thread *td); int soconnectat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td); int soconnect2(struct socket *so1, struct socket *so2); int socreate(int dom, struct socket **aso, int type, int proto, struct ucred *cred, struct thread *td); int sodisconnect(struct socket *so); void sodtor_set(struct socket *, so_dtor_t *); struct sockaddr *sodupsockaddr(const struct sockaddr *sa, int mflags); void sohasoutofband(struct socket *so); int solisten(struct socket *so, int backlog, struct thread *td); void solisten_proto(struct socket *so, int backlog); void solisten_proto_abort(struct socket *so); int solisten_proto_check(struct socket *so); bool solisten_enqueue(struct socket *, int); int solisten_dequeue(struct socket *, struct socket **, int); struct socket * solisten_clone(struct socket *); struct socket * sonewconn(struct socket *head, int connstatus); struct socket * sopeeloff(struct socket *); int sopoll(struct socket *so, int events, struct ucred *active_cred, struct thread *td); int sopoll_generic(struct socket *so, int events, struct ucred *active_cred, struct thread *td); int soreceive(struct socket *so, struct sockaddr **paddr, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp); int soreceive_stream(struct socket *so, struct sockaddr **paddr, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp); int soreceive_dgram(struct socket *so, struct sockaddr **paddr, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp); int soreceive_generic(struct socket *so, struct sockaddr **paddr, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp); void sorele_locked(struct socket *so); void sodealloc(struct socket *); int soreserve(struct socket *so, u_long sndcc, u_long rcvcc); void sorflush(struct socket *so); int sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags, struct thread *td); int sousrsend(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *control, int flags, struct proc *); int sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags, struct thread *td); int sosend_generic(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags, struct thread *td); int soshutdown(struct socket *so, int how); void soupcall_clear(struct socket *, sb_which); void soupcall_set(struct socket *, sb_which, so_upcall_t, void *); void solisten_upcall_set(struct socket *, so_upcall_t, void *); void sorwakeup_locked(struct socket *); void sowwakeup_locked(struct socket *); void sowakeup_aio(struct socket *, sb_which); void solisten_wakeup(struct socket *); int selsocket(struct socket *so, int events, struct timeval *tv, struct thread *td); void soisconnected(struct socket *so); void soisconnecting(struct socket *so); void soisdisconnected(struct socket *so); void soisdisconnecting(struct socket *so); void socantrcvmore(struct socket *so); void socantrcvmore_locked(struct socket *so); void socantsendmore(struct socket *so); void socantsendmore_locked(struct socket *so); void soroverflow(struct socket *so); void soroverflow_locked(struct socket *so); int soiolock(struct socket *so, struct sx *sx, int flags); void soiounlock(struct sx *sx); /* * Accept filter functions (duh). */ int accept_filt_add(struct accept_filter *filt); int accept_filt_del(char *name); struct accept_filter *accept_filt_get(char *name); #ifdef ACCEPT_FILTER_MOD #ifdef SYSCTL_DECL SYSCTL_DECL(_net_inet_accf); #endif int accept_filt_generic_mod_event(module_t mod, int event, void *data); #endif #endif /* _KERNEL */ /* * Structure to export socket from kernel to utilities, via sysctl(3). */ struct xsocket { ksize_t xso_len; /* length of this structure */ kvaddr_t xso_so; /* kernel address of struct socket */ kvaddr_t so_pcb; /* kernel address of struct inpcb */ uint64_t so_oobmark; int64_t so_spare64[8]; int32_t xso_protocol; int32_t xso_family; uint32_t so_qlen; uint32_t so_incqlen; uint32_t so_qlimit; pid_t so_pgid; uid_t so_uid; int32_t so_spare32[8]; int16_t so_type; int16_t so_options; int16_t so_linger; int16_t so_state; int16_t so_timeo; uint16_t so_error; struct xsockbuf { uint32_t sb_cc; uint32_t sb_hiwat; uint32_t sb_mbcnt; uint32_t sb_spare0; /* was sb_mcnt */ uint32_t sb_spare1; /* was sb_ccnt */ uint32_t sb_mbmax; int32_t sb_lowat; int32_t sb_timeo; int16_t sb_flags; } so_rcv, so_snd; }; #ifdef _KERNEL void sotoxsocket(struct socket *so, struct xsocket *xso); void sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb); #endif /* * Socket buffer state bits. Exported via libprocstat(3). */ #define SBS_CANTSENDMORE 0x0010 /* can't send more data to peer */ #define SBS_CANTRCVMORE 0x0020 /* can't receive more data from peer */ #define SBS_RCVATMARK 0x0040 /* at mark on input */ #endif /* !_SYS_SOCKETVAR_H_ */ diff --git a/sys/sys/syscallsubr.h b/sys/sys/syscallsubr.h index 0480eda2fca4..c28016b4b3f8 100644 --- a/sys/sys/syscallsubr.h +++ b/sys/sys/syscallsubr.h @@ -1,391 +1,391 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2002 Ian Dowse. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _SYS_SYSCALLSUBR_H_ #define _SYS_SYSCALLSUBR_H_ #include #include #include #include #include #include #include struct __wrusage; struct cpuset_copy_cb; struct file; struct filecaps; enum idtype; struct itimerval; struct image_args; struct jail; struct kevent; struct kevent_copyops; struct kld_file_stat; struct ksiginfo; struct mbuf; struct msghdr; struct msqid_ds; struct pollfd; struct ogetdirentries_args; struct rlimit; struct rusage; struct sched_param; struct sembuf; union semun; struct sockaddr; struct spacectl_range; struct stat; struct thr_param; struct timex; struct uio; struct vm_map; struct vmspace; typedef int (*mmap_check_fp_fn)(struct file *, int, int, int); struct mmap_req { vm_offset_t mr_hint; vm_size_t mr_len; int mr_prot; int mr_flags; int mr_fd; off_t mr_pos; mmap_check_fp_fn mr_check_fp_fn; }; uint64_t at2cnpflags(u_int at_flags, u_int mask); int kern___getcwd(struct thread *td, char *buf, enum uio_seg bufseg, size_t buflen, size_t path_max); int kern_abort2(struct thread *td, const char *why, int nargs, void **uargs); -int kern_accept(struct thread *td, int s, struct sockaddr **name, - socklen_t *namelen, struct file **fp); -int kern_accept4(struct thread *td, int s, struct sockaddr **name, - socklen_t *namelen, int flags, struct file **fp); +int kern_accept(struct thread *td, int s, struct sockaddr *sa, + struct file **fp); +int kern_accept4(struct thread *td, int s, struct sockaddr *sa, + int flags, struct file **fp); int kern_accessat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, int flags, int mode); int kern_adjtime(struct thread *td, struct timeval *delta, struct timeval *olddelta); int kern_bindat(struct thread *td, int dirfd, int fd, struct sockaddr *sa); int kern_break(struct thread *td, uintptr_t *addr); int kern_cap_ioctls_limit(struct thread *td, int fd, u_long *cmds, size_t ncmds); int kern_cap_rights_limit(struct thread *td, int fd, cap_rights_t *rights); int kern_chdir(struct thread *td, const char *path, enum uio_seg pathseg); int kern_clock_getcpuclockid2(struct thread *td, id_t id, int which, clockid_t *clk_id); int kern_clock_getres(struct thread *td, clockid_t clock_id, struct timespec *ts); int kern_clock_gettime(struct thread *td, clockid_t clock_id, struct timespec *ats); int kern_clock_nanosleep(struct thread *td, clockid_t clock_id, int flags, const struct timespec *rqtp, struct timespec *rmtp); int kern_clock_settime(struct thread *td, clockid_t clock_id, struct timespec *ats); void kern_thread_cputime(struct thread *targettd, struct timespec *ats); void kern_process_cputime(struct proc *targetp, struct timespec *ats); int kern_close_range(struct thread *td, int flags, u_int lowfd, u_int highfd); int kern_close(struct thread *td, int fd); int kern_connectat(struct thread *td, int dirfd, int fd, struct sockaddr *sa); int kern_copy_file_range(struct thread *td, int infd, off_t *inoffp, int outfd, off_t *outoffp, size_t len, unsigned int flags); int user_cpuset_getaffinity(struct thread *td, cpulevel_t level, cpuwhich_t which, id_t id, size_t cpusetsize, cpuset_t *maskp, const struct cpuset_copy_cb *cb); int kern_cpuset_getaffinity(struct thread *td, cpulevel_t level, cpuwhich_t which, id_t id, size_t cpusetsize, cpuset_t *mask); int kern_cpuset_setaffinity(struct thread *td, cpulevel_t level, cpuwhich_t which, id_t id, cpuset_t *maskp); int user_cpuset_setaffinity(struct thread *td, cpulevel_t level, cpuwhich_t which, id_t id, size_t cpusetsize, const cpuset_t *maskp, const struct cpuset_copy_cb *cb); int kern_cpuset_getdomain(struct thread *td, cpulevel_t level, cpuwhich_t which, id_t id, size_t domainsetsize, domainset_t *maskp, int *policyp, const struct cpuset_copy_cb *cb); int kern_cpuset_setdomain(struct thread *td, cpulevel_t level, cpuwhich_t which, id_t id, size_t domainsetsize, const domainset_t *maskp, int policy, const struct cpuset_copy_cb *cb); int kern_cpuset_getid(struct thread *td, cpulevel_t level, cpuwhich_t which, id_t id, cpusetid_t *setid); int kern_cpuset_setid(struct thread *td, cpuwhich_t which, id_t id, cpusetid_t setid); int kern_dup(struct thread *td, u_int mode, int flags, int old, int new); int kern_execve(struct thread *td, struct image_args *args, struct mac *mac_p, struct vmspace *oldvmspace); int kern_extattr_delete_fd(struct thread *td, int fd, int attrnamespace, const char *attrname); int kern_extattr_delete_path(struct thread *td, const char *path, int attrnamespace, const char *attrname, int follow, enum uio_seg pathseg); int kern_extattr_get_fd(struct thread *td, int fd, int attrnamespace, const char *attrname, void *data, size_t nbytes); int kern_extattr_get_path(struct thread *td, const char *path, int attrnamespace, const char *attrname, void *data, size_t nbytes, int follow, enum uio_seg pathseg); int kern_extattr_list_fd(struct thread *td, int fd, int attrnamespace, struct uio *auiop); int kern_extattr_list_path(struct thread *td, const char *path, int attrnamespace, struct uio *auiop, int follow, enum uio_seg pathseg); int kern_extattr_set_fd(struct thread *td, int fd, int attrnamespace, const char *attrname, void *data, size_t nbytes); int kern_extattr_set_path(struct thread *td, const char *path, int attrnamespace, const char *attrname, void *data, size_t nbytes, int follow, enum uio_seg pathseg); int kern_fchmodat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, mode_t mode, int flag); int kern_fchownat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, int uid, int gid, int flag); int kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg); int kern_fcntl_freebsd(struct thread *td, int fd, int cmd, long arg); int kern_fhopen(struct thread *td, const struct fhandle *u_fhp, int flags); int kern_fhstat(struct thread *td, fhandle_t fh, struct stat *buf); int kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf); int kern_fpathconf(struct thread *td, int fd, int name, long *valuep); int kern_freebsd11_getfsstat(struct thread *td, struct freebsd11_statfs *ubuf, long bufsize, int mode); int kern_fstat(struct thread *td, int fd, struct stat *sbp); int kern_fstatfs(struct thread *td, int fd, struct statfs *buf); int kern_fsync(struct thread *td, int fd, bool fullsync); int kern_ftruncate(struct thread *td, int fd, off_t length); int kern_futimes(struct thread *td, int fd, const struct timeval *tptr, enum uio_seg tptrseg); int kern_futimens(struct thread *td, int fd, const struct timespec *tptr, enum uio_seg tptrseg); int kern_getdirentries(struct thread *td, int fd, char *buf, size_t count, off_t *basep, ssize_t *residp, enum uio_seg bufseg); int kern_getfhat(struct thread *td, int flags, int fd, const char *path, enum uio_seg pathseg, fhandle_t *fhp, enum uio_seg fhseg); int kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, size_t *countp, enum uio_seg bufseg, int mode); int kern_getitimer(struct thread *, u_int, struct itimerval *); int kern_getppid(struct thread *); int kern_getpeername(struct thread *td, int fd, struct sockaddr **sa, socklen_t *alen); int kern_getpriority(struct thread *td, int which, int who); int kern_getrusage(struct thread *td, int who, struct rusage *rup); int kern_getsid(struct thread *td, pid_t pid); int kern_getsockname(struct thread *td, int fd, struct sockaddr **sa, socklen_t *alen); int kern_getsockopt(struct thread *td, int s, int level, int name, void *optval, enum uio_seg valseg, socklen_t *valsize); int kern_ioctl(struct thread *td, int fd, u_long com, caddr_t data); int kern_jail(struct thread *td, struct jail *j); int kern_jail_get(struct thread *td, struct uio *options, int flags); int kern_jail_set(struct thread *td, struct uio *options, int flags); int kern_kevent(struct thread *td, int fd, int nchanges, int nevents, struct kevent_copyops *k_ops, const struct timespec *timeout); int kern_kevent_anonymous(struct thread *td, int nevents, struct kevent_copyops *k_ops); int kern_kevent_fp(struct thread *td, struct file *fp, int nchanges, int nevents, struct kevent_copyops *k_ops, const struct timespec *timeout); int kern_kill(struct thread *td, pid_t pid, int signum); int kern_kqueue(struct thread *td, int flags, struct filecaps *fcaps); int kern_kldload(struct thread *td, const char *file, int *fileid); int kern_kldstat(struct thread *td, int fileid, struct kld_file_stat *stat); int kern_kldunload(struct thread *td, int fileid, int flags); int kern_linkat(struct thread *td, int fd1, int fd2, const char *path1, const char *path2, enum uio_seg segflg, int flag); int kern_listen(struct thread *td, int s, int backlog); int kern_lseek(struct thread *td, int fd, off_t offset, int whence); int kern_lutimes(struct thread *td, const char *path, enum uio_seg pathseg, const struct timeval *tptr, enum uio_seg tptrseg); int kern_madvise(struct thread *td, uintptr_t addr, size_t len, int behav); int kern_mincore(struct thread *td, uintptr_t addr, size_t len, char *vec); int kern_minherit(struct thread *td, uintptr_t addr, size_t len, int inherit); int kern_mkdirat(struct thread *td, int fd, const char *path, enum uio_seg segflg, int mode); int kern_membarrier(struct thread *td, int cmd, unsigned flags, int cpu_id); int kern_mkfifoat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, int mode); int kern_mknodat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, int mode, dev_t dev); int kern_mlock(struct proc *proc, struct ucred *cred, uintptr_t addr, size_t len); int kern_mmap(struct thread *td, const struct mmap_req *mrp); int kern_mmap_racct_check(struct thread *td, struct vm_map *map, vm_size_t size); int kern_mmap_maxprot(struct proc *p, int prot); int kern_mprotect(struct thread *td, uintptr_t addr, size_t size, int prot, int flags); int kern_msgctl(struct thread *, int, int, struct msqid_ds *); int kern_msgrcv(struct thread *, int, void *, size_t, long, int, long *); int kern_msgsnd(struct thread *, int, const void *, size_t, int, long); int kern_msync(struct thread *td, uintptr_t addr, size_t size, int flags); int kern_munlock(struct thread *td, uintptr_t addr, size_t size); int kern_munmap(struct thread *td, uintptr_t addr, size_t size); int kern_nanosleep(struct thread *td, struct timespec *rqt, struct timespec *rmt); int kern_ntp_adjtime(struct thread *td, struct timex *ntv, int *retvalp); int kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, long *ploff); int kern_ommap(struct thread *td, uintptr_t hint, int len, int oprot, int oflags, int fd, long pos); int kern_openat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, int flags, int mode); int kern_pathconf(struct thread *td, const char *path, enum uio_seg pathseg, int name, u_long flags, long *valuep); int kern_pipe(struct thread *td, int fildes[2], int flags, struct filecaps *fcaps1, struct filecaps *fcaps2); int kern_poll(struct thread *td, struct pollfd *fds, u_int nfds, struct timespec *tsp, sigset_t *uset); int kern_poll_kfds(struct thread *td, struct pollfd *fds, u_int nfds, struct timespec *tsp, sigset_t *uset); bool kern_poll_maxfds(u_int nfds); int kern_posix_error(struct thread *td, int error); int kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, int advice); int kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len); int kern_fspacectl(struct thread *td, int fd, int cmd, const struct spacectl_range *, int flags, struct spacectl_range *); int kern_procctl(struct thread *td, enum idtype idtype, id_t id, int com, void *data); int kern_pread(struct thread *td, int fd, void *buf, size_t nbyte, off_t offset); int kern_preadv(struct thread *td, int fd, struct uio *auio, off_t offset); int kern_pselect(struct thread *td, int nd, fd_set *in, fd_set *ou, fd_set *ex, struct timeval *tvp, sigset_t *uset, int abi_nfdbits); int kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data); int kern_pwrite(struct thread *td, int fd, const void *buf, size_t nbyte, off_t offset); int kern_pwritev(struct thread *td, int fd, struct uio *auio, off_t offset); int kern_readlinkat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, char *buf, enum uio_seg bufseg, size_t count); int kern_readv(struct thread *td, int fd, struct uio *auio); int kern_recvit(struct thread *td, int s, struct msghdr *mp, enum uio_seg fromseg, struct mbuf **controlp); int kern_renameat(struct thread *td, int oldfd, const char *old, int newfd, const char *new, enum uio_seg pathseg); int kern_frmdirat(struct thread *td, int dfd, const char *path, int fd, enum uio_seg pathseg, int flag); int kern_sched_getparam(struct thread *td, struct thread *targettd, struct sched_param *param); int kern_sched_getscheduler(struct thread *td, struct thread *targettd, int *policy); int kern_sched_setparam(struct thread *td, struct thread *targettd, struct sched_param *param); int kern_sched_setscheduler(struct thread *td, struct thread *targettd, int policy, struct sched_param *param); int kern_sched_rr_get_interval(struct thread *td, pid_t pid, struct timespec *ts); int kern_sched_rr_get_interval_td(struct thread *td, struct thread *targettd, struct timespec *ts); int kern_semctl(struct thread *td, int semid, int semnum, int cmd, union semun *arg, register_t *rval); int kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou, fd_set *fd_ex, struct timeval *tvp, int abi_nfdbits); int kern_sendit(struct thread *td, int s, struct msghdr *mp, int flags, struct mbuf *control, enum uio_seg segflg); int kern_setgroups(struct thread *td, u_int ngrp, gid_t *groups); int kern_setitimer(struct thread *, u_int, struct itimerval *, struct itimerval *); int kern_setpriority(struct thread *td, int which, int who, int prio); int kern_setrlimit(struct thread *, u_int, struct rlimit *); int kern_setsockopt(struct thread *td, int s, int level, int name, const void *optval, enum uio_seg valseg, socklen_t valsize); int kern_settimeofday(struct thread *td, struct timeval *tv, struct timezone *tzp); int kern_shm_open(struct thread *td, const char *userpath, int flags, mode_t mode, struct filecaps *fcaps); int kern_shm_open2(struct thread *td, const char *path, int flags, mode_t mode, int shmflags, struct filecaps *fcaps, const char *name); int kern_shmat(struct thread *td, int shmid, const void *shmaddr, int shmflg); int kern_shmctl(struct thread *td, int shmid, int cmd, void *buf, size_t *bufsz); int kern_shutdown(struct thread *td, int s, int how); int kern_sigaction(struct thread *td, int sig, const struct sigaction *act, struct sigaction *oact, int flags); int kern_sigaltstack(struct thread *td, stack_t *ss, stack_t *oss); int kern_sigprocmask(struct thread *td, int how, sigset_t *set, sigset_t *oset, int flags); int kern_sigsuspend(struct thread *td, sigset_t mask); int kern_sigtimedwait(struct thread *td, sigset_t waitset, struct ksiginfo *ksi, struct timespec *timeout); int kern_sigqueue(struct thread *td, pid_t pid, int signum, union sigval *value); int kern_socket(struct thread *td, int domain, int type, int protocol); int kern_statat(struct thread *td, int flag, int fd, const char *path, enum uio_seg pathseg, struct stat *sbp); int kern_specialfd(struct thread *td, int type, void *arg); int kern_statfs(struct thread *td, const char *path, enum uio_seg pathseg, struct statfs *buf); int kern_symlinkat(struct thread *td, const char *path1, int fd, const char *path2, enum uio_seg segflg); int kern_sync(struct thread *td); int kern_ktimer_create(struct thread *td, clockid_t clock_id, struct sigevent *evp, int *timerid, int preset_id); int kern_ktimer_delete(struct thread *, int); int kern_ktimer_settime(struct thread *td, int timer_id, int flags, struct itimerspec *val, struct itimerspec *oval); int kern_ktimer_gettime(struct thread *td, int timer_id, struct itimerspec *val); int kern_ktimer_getoverrun(struct thread *td, int timer_id); int kern_semop(struct thread *td, int usemid, struct sembuf *usops, size_t nsops, struct timespec *timeout); int kern_thr_alloc(struct proc *, int pages, struct thread **); int kern_thr_exit(struct thread *td); int kern_thr_new(struct thread *td, struct thr_param *param); int kern_thr_suspend(struct thread *td, struct timespec *tsp); int kern_truncate(struct thread *td, const char *path, enum uio_seg pathseg, off_t length); int kern_funlinkat(struct thread *td, int dfd, const char *path, int fd, enum uio_seg pathseg, int flag, ino_t oldinum); int kern_utimesat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, const struct timeval *tptr, enum uio_seg tptrseg); int kern_utimensat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, const struct timespec *tptr, enum uio_seg tptrseg, int flag); int kern_wait(struct thread *td, pid_t pid, int *status, int options, struct rusage *rup); int kern_wait6(struct thread *td, enum idtype idtype, id_t id, int *status, int options, struct __wrusage *wrup, siginfo_t *sip); int kern_writev(struct thread *td, int fd, struct uio *auio); int kern_socketpair(struct thread *td, int domain, int type, int protocol, int *rsv); int kern_unmount(struct thread *td, const char *path, int flags); /* flags for kern_sigaction */ #define KSA_OSIGSET 0x0001 /* uses osigact_t */ #define KSA_FREEBSD4 0x0002 /* uses ucontext4 */ struct freebsd11_dirent; int freebsd11_kern_getdirentries(struct thread *td, int fd, char *ubuf, u_int count, long *basep, void (*func)(struct freebsd11_dirent *)); #endif /* !_SYS_SYSCALLSUBR_H_ */ diff --git a/tests/sys/kern/socket_accept.c b/tests/sys/kern/socket_accept.c index 278eb2fb9853..24c3210900d6 100644 --- a/tests/sys/kern/socket_accept.c +++ b/tests/sys/kern/socket_accept.c @@ -1,130 +1,127 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2023 Gleb Smirnoff * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include static int tcp4_listensock(struct sockaddr_in *sin) { int l; ATF_REQUIRE((l = socket(PF_INET, SOCK_STREAM, 0)) > 0); ATF_REQUIRE(setsockopt(l, SOL_SOCKET, SO_REUSEADDR, &(socklen_t){1}, sizeof(int)) == 0); *sin = (struct sockaddr_in){ .sin_len = sizeof(sin), .sin_family = AF_INET, .sin_addr.s_addr = htonl(INADDR_LOOPBACK), }; ATF_REQUIRE(bind(l, (struct sockaddr *)sin, sizeof(*sin)) == 0); ATF_REQUIRE(getsockname(l, (struct sockaddr *)sin, &(socklen_t){ sizeof(*sin) }) == 0); ATF_REQUIRE(listen(l, -1) == 0); return (l); } static int tcp4_clientsock(struct sockaddr_in *sin) { int s; ATF_REQUIRE((s = socket(PF_INET, SOCK_STREAM, 0)) > 0); ATF_REQUIRE(connect(s, (struct sockaddr *)sin, sizeof(*sin)) == 0); return (s); } ATF_TC_WITHOUT_HEAD(tcp4_zerolen); ATF_TC_BODY(tcp4_zerolen, tc) { static char canary[sizeof(struct sockaddr_in)] = { [0 ... sizeof(struct sockaddr_in) - 1] = 0xa }; struct sockaddr_in sin, ret; socklen_t salen; int l; l = tcp4_listensock(&sin); (void )tcp4_clientsock(&sin); memcpy(&ret, &canary, sizeof(ret)); salen = 0; ATF_REQUIRE(accept(l, (struct sockaddr *)&ret, &salen) > 0); ATF_REQUIRE(memcmp(&ret, &canary, sizeof(ret)) == 0); -#if 0 - /* Linux behavior. Matches my reading of accept(2) and POSIX. */ ATF_REQUIRE(salen == sizeof(struct sockaddr_in)); -#endif /* Note: Linux will block for connection here, we fail immediately. */ ATF_REQUIRE(accept(l, (struct sockaddr *)&ret, NULL) == -1); ATF_REQUIRE(errno == EFAULT); } ATF_TC_WITHOUT_HEAD(tcp4); ATF_TC_BODY(tcp4, tc) { struct sockaddr_in sin, ret; socklen_t salen; int l, s; l = tcp4_listensock(&sin); s = tcp4_clientsock(&sin); salen = sizeof(struct sockaddr_in) + 2; ATF_REQUIRE(accept(l, (struct sockaddr *)&ret, &salen) > 0); ATF_REQUIRE(salen == sizeof(struct sockaddr_in)); ATF_REQUIRE(getsockname(s, (struct sockaddr *)&sin, &(socklen_t){ sizeof(sin) }) == 0); ATF_REQUIRE(memcmp(&ret, &sin, sizeof(sin)) == 0); } ATF_TC_WITHOUT_HEAD(tcp4_noaddr); ATF_TC_BODY(tcp4_noaddr, tc) { struct sockaddr_in sin; int l; l = tcp4_listensock(&sin); (void )tcp4_clientsock(&sin); ATF_REQUIRE(accept(l, NULL, NULL) > 0); } ATF_TP_ADD_TCS(tp) { ATF_TP_ADD_TC(tp, tcp4); ATF_TP_ADD_TC(tp, tcp4_noaddr); ATF_TP_ADD_TC(tp, tcp4_zerolen); return (atf_no_error()); }