diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -1372,8 +1372,8 @@ struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) { struct sockbuf *sb = &so->so_rcv; - struct mbuf *control, *m, *first, *last, *next; - u_int ctl, space, datalen, mbcnt, lastlen; + struct mbuf *control, *m, *first, *part, *next; + u_int ctl, space, datalen, mbcnt, partlen; int error, flags; bool nonblock, waitall, peek; @@ -1457,22 +1457,16 @@ control = NULL; /* - * Find split point for the next copyout. On exit from the loop: - * last == NULL - socket to be flushed - * last != NULL - * lastlen > last->m_len - uio to be filled, last to be adjusted - * lastlen == 0 - MT_CONTROL, M_EOR or M_NOTREADY encountered + * Find split point for the next copyout. On exit from the loop, + * 'next' points to the new head of the buffer STAILQ and 'datalen' + * contains the amount of data we will copy out at the end. The + * copyout is protected by the I/O lock only, as writers can only + * append to the buffer. We need to record the socket buffer state + * and do all length adjustments before dropping the socket buffer lock. */ - space = uio->uio_resid; - datalen = 0; - for (m = first, last = sb->uxst_fnrdy, lastlen = 0; - m != sb->uxst_fnrdy; + for (space = uio->uio_resid, m = next = first, part = NULL, datalen = 0; + space > 0 && m != sb->uxst_fnrdy && m->m_type == MT_DATA; m = STAILQ_NEXT(m, m_stailq)) { - if (m->m_type != MT_DATA) { - last = m; - lastlen = 0; - break; - } if (space >= m->m_len) { space -= m->m_len; datalen += m->m_len; @@ -1480,29 +1474,29 @@ if (m->m_flags & M_EXT) mbcnt += m->m_ext.ext_size; if (m->m_flags & M_EOR) { - last = STAILQ_NEXT(m, m_stailq); - lastlen = 0; flags |= MSG_EOR; + next = STAILQ_NEXT(m, m_stailq); break; } } else { datalen += space; - last = m; - lastlen = space; + partlen = space; + if (!peek) { + m->m_len -= partlen; + m->m_data += partlen; + } + next = part = m; break; } + next = STAILQ_NEXT(m, m_stailq); } - UIPC_STREAM_SBCHECK(sb); if (!peek) { - if (last == NULL) + STAILQ_FIRST(&sb->uxst_mbq) = next; +#ifdef INVARIANTS + if (next == NULL) STAILQ_INIT(&sb->uxst_mbq); - else { - STAILQ_FIRST(&sb->uxst_mbq) = last; - MPASS(last->m_len > lastlen); - last->m_len -= lastlen; - last->m_data += lastlen; - } +#endif MPASS(sb->sb_acc >= datalen); sb->sb_acc -= datalen; sb->sb_ccc -= datalen; @@ -1629,33 +1623,34 @@ } } - for (m = first; m != last; m = next) { + for (m = first; datalen > 0; m = next) { + void *data; + u_int len; + next = STAILQ_NEXT(m, m_stailq); - error = uiomove(mtod(m, char *), m->m_len, uio); + if (m == part) { + data = peek ? + mtod(m, char *) : mtod(m, char *) - partlen; + len = partlen; + } else { + data = mtod(m, char *); + len = m->m_len; + } + error = uiomove(data, len, uio); if (__predict_false(error)) { - SOCK_IO_RECV_UNLOCK(so); if (!peek) - for (; m != last; m = next) { + for (; m != part && datalen > 0; m = next) { next = STAILQ_NEXT(m, m_stailq); + MPASS(datalen >= m->m_len); + datalen -= m->m_len; m_free(m); } - return (error); - } - if (!peek) - m_free(m); - } - if (last != NULL && lastlen > 0) { - if (!peek) { - MPASS(!(m->m_flags & M_PKTHDR)); - MPASS(last->m_data - M_START(last) >= lastlen); - error = uiomove(mtod(last, char *) - lastlen, - lastlen, uio); - } else - error = uiomove(mtod(last, char *), lastlen, uio); - if (__predict_false(error)) { SOCK_IO_RECV_UNLOCK(so); return (error); } + datalen -= len; + if (!peek && m != part) + m_free(m); } if (waitall && !(flags & MSG_EOR) && uio->uio_resid > 0) goto restart; diff --git a/tests/sys/kern/unix_seqpacket_test.c b/tests/sys/kern/unix_seqpacket_test.c --- a/tests/sys/kern/unix_seqpacket_test.c +++ b/tests/sys/kern/unix_seqpacket_test.c @@ -1314,6 +1314,67 @@ free(params.records); } +/* See bug 290658. */ +#define PEEK_RACE_SIZE 10 +#define PEEK_RACE_TRIES 10000 +static void * +peek_race_writer(void *args) +{ + struct timespec ts = {}; + u_short seed[3]; + char buf[PEEK_RACE_SIZE]; + int fd = *(int *)args; + + arc4random_buf(seed, sizeof(seed)); + for (u_int i = 0; i < PEEK_RACE_TRIES; i++) { + ATF_REQUIRE_EQ(PEEK_RACE_SIZE, + send(fd, buf, sizeof(buf), MSG_EOR)); + ts.tv_nsec = nrand48(seed) % 20; + (void)clock_nanosleep(CLOCK_MONOTONIC_FAST, 0, &ts, NULL); + } + + return (NULL); +} + +static void * +peek_race_peeker(void *args) +{ + char buf[PEEK_RACE_SIZE * 10]; + int fd = *(int *)args; + + for (u_int i = 0; i < PEEK_RACE_TRIES; i++) { + ssize_t rcvd; + + while ((rcvd = recv(fd, buf, sizeof(buf), + MSG_PEEK | MSG_DONTWAIT)) == -1) + ATF_REQUIRE(errno == EAGAIN); + ATF_REQUIRE(rcvd == PEEK_RACE_SIZE); + + ATF_REQUIRE_EQ(PEEK_RACE_SIZE, + recv(fd, buf, sizeof(buf), 0)); + } + + return (NULL); +} + +ATF_TC_WITHOUT_HEAD(peek_race); +ATF_TC_BODY(peek_race, tc) +{ + pthread_t peeker, writer; + int sv[2]; + + do_socketpair(sv); + + ATF_REQUIRE_EQ(0, pthread_create(&writer, NULL, peek_race_writer, + &sv[0])); + ATF_REQUIRE_EQ(0, pthread_create(&peeker, NULL, peek_race_peeker, + &sv[1])); + ATF_REQUIRE_EQ(0, pthread_join(writer, NULL)); + ATF_REQUIRE_EQ(0, pthread_join(peeker, NULL)); + close(sv[0]); + close(sv[1]); +} + /* * Main. */ @@ -1370,6 +1431,7 @@ ATF_TP_ADD_TC(tp, pipe_128k_8k); ATF_TP_ADD_TC(tp, pipe_128k_128k); ATF_TP_ADD_TC(tp, random_eor_and_waitall); + ATF_TP_ADD_TC(tp, peek_race); return atf_no_error(); }