diff --git a/usr.sbin/bhyve/iov.h b/usr.sbin/bhyve/iov.h --- a/usr.sbin/bhyve/iov.h +++ b/usr.sbin/bhyve/iov.h @@ -31,12 +31,12 @@ #ifndef _IOV_H_ #define _IOV_H_ -void seek_iov(const struct iovec *iov1, int niov1, struct iovec *iov2, - int *niov2, size_t seek); -void truncate_iov(struct iovec *iov, int *niov, size_t length); -size_t count_iov(const struct iovec *iov, int niov); -ssize_t iov_to_buf(const struct iovec *iov, int niov, void **buf); -ssize_t buf_to_iov(const void *buf, size_t buflen, const struct iovec *iov, - int niov, size_t seek); +#include + +void split_iov(struct iovec *, int *, struct iovec **, int *, size_t); +size_t count_iov(const struct iovec *, int); +bool check_iov_len(const struct iovec *, int, size_t); +ssize_t iov_to_buf(const struct iovec *, int, void **); +ssize_t buf_to_iov(const void *, size_t, const struct iovec *, int); #endif /* _IOV_H_ */ diff --git a/usr.sbin/bhyve/iov.c b/usr.sbin/bhyve/iov.c --- a/usr.sbin/bhyve/iov.c +++ b/usr.sbin/bhyve/iov.c @@ -32,38 +32,64 @@ #include #include +#include #include #include #include "iov.h" +/* + * Given an array of iovecs iov1[niov1] and an offset, truncate iov1 at offset + * and return the address and count of the remaining iovecs in iov2 and niov2. + */ void -seek_iov(const struct iovec *iov1, int niov1, struct iovec *iov2, int *niov2, - size_t seek) +split_iov(struct iovec *iov1, int *niov1, struct iovec **iov2, int *niov2, + size_t offset) { size_t remainder = 0; - size_t left = seek; int i, j; - for (i = 0; i < niov1; i++) { - size_t toseek = MIN(left, iov1[i].iov_len); - left -= toseek; - - if (toseek == iov1[i].iov_len) + /* Seek to the requested offset and truncate the final iovec. */ + for (i = 0; i < *niov1; i++) { + if (offset > iov1[i].iov_len) { + /* + * We're seeking past this iovec. Adjust the offset and + * move on. + */ + offset -= iov1[i].iov_len; continue; - - if (left == 0) { - remainder = toseek; - break; } - } - for (j = i; j < niov1; j++) { - iov2[j - i].iov_base = (char *)iov1[j].iov_base + remainder; - iov2[j - i].iov_len = iov1[j].iov_len - remainder; - remainder = 0; + /* + * We're seeking within this iovec. Calculate the remainder, + * truncate at offset, and update iov2, niov2, iov1, and niov1. + */ + remainder = iov1[i].iov_len - offset; + iov1[i].iov_len = offset; + + *iov2 = &iov1[i + 1]; /* iov2 starts after the final iov1 */ + *niov2 = *niov1 - i - 1; /* remaining iovecs in iov2 */ + *niov1 = i + 1; /* iovecs counted so far */ + break; } - *niov2 = j - i; + /* + * Check for the (unlikely, ideally) case where there is a remainder + * from the final iovec, and deal with it if necessary. + */ + if (remainder > 0) { + /* + * Make room for a new iovec covering the remainder by moving + * all following iovecs up. It is the caller's responsibility + * that there is enough spare space for this extra iovec. + */ + for (j = *niov2; j > 0; j--) + *iov2[j] = *iov2[j - 1]; + + /* Fill in the iovec covering the remainder. */ + iov2[0]->iov_len = remainder; + iov2[0]->iov_base = + (char *)iov1[*niov1 - 1].iov_base + offset; + } } size_t @@ -78,22 +104,19 @@ return (total); } -void -truncate_iov(struct iovec *iov, int *niov, size_t length) +bool +check_iov_len(const struct iovec *iov, int niov, size_t len) { - size_t done = 0; + size_t total = 0; int i; - for (i = 0; i < *niov; i++) { - size_t toseek = MIN(length - done, iov[i].iov_len); - done += toseek; - - if (toseek <= iov[i].iov_len) { - iov[i].iov_len = toseek; - *niov = i + 1; - return; - } + for (i = 0; i < niov; i++) { + total += iov[i].iov_len; + if (total >= len) + return (true); } + + return (false); } ssize_t @@ -116,31 +139,16 @@ } ssize_t -buf_to_iov(const void *buf, size_t buflen, const struct iovec *iov, int niov, - size_t seek) +buf_to_iov(const void *buf, size_t buflen, const struct iovec *iov, int niov) { - struct iovec *diov; size_t off = 0, len; int i; - if (seek > 0) { - int ndiov; - - diov = malloc(sizeof(struct iovec) * niov); - seek_iov(iov, niov, diov, &ndiov, seek); - iov = diov; - niov = ndiov; - } - for (i = 0; i < niov && off < buflen; i++) { len = MIN(iov[i].iov_len, buflen - off); memcpy(iov[i].iov_base, (const uint8_t *)buf + off, len); off += len; } - if (seek > 0) - free(diov); - return ((ssize_t)off); } - diff --git a/usr.sbin/bhyve/net_backends.c b/usr.sbin/bhyve/net_backends.c --- a/usr.sbin/bhyve/net_backends.c +++ b/usr.sbin/bhyve/net_backends.c @@ -197,7 +197,7 @@ * we read it from there. */ ret = buf_to_iov(priv->bbuf, priv->bbuflen, - iov, iovcnt, 0); + iov, iovcnt); /* Mark the bounce buffer as empty. */ priv->bbuflen = 0; diff --git a/usr.sbin/bhyve/pci_virtio_scsi.c b/usr.sbin/bhyve/pci_virtio_scsi.c --- a/usr.sbin/bhyve/pci_virtio_scsi.c +++ b/usr.sbin/bhyve/pci_virtio_scsi.c @@ -3,6 +3,7 @@ * * Copyright (c) 2016 Jakub Klama . * Copyright (c) 2018 Marcelo Araujo . + * Copyright 2025 Hans Rosenfeld * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -28,6 +29,7 @@ * SUCH DAMAGE. */ + #include #include #include @@ -64,17 +66,17 @@ #include "virtio.h" #include "iov.h" -#define VTSCSI_RINGSZ 64 +#define VTSCSI_RINGSZ 64 #define VTSCSI_REQUESTQ 1 #define VTSCSI_THR_PER_Q 16 #define VTSCSI_MAXQ (VTSCSI_REQUESTQ + 2) #define VTSCSI_MAXSEG 64 #define VTSCSI_IN_HEADER_LEN(_sc) \ - (sizeof(struct pci_vtscsi_req_cmd_rd) + _sc->vss_config.cdb_size) + (sizeof (struct pci_vtscsi_req_cmd_rd) + _sc->vss_config.cdb_size) -#define VTSCSI_OUT_HEADER_LEN(_sc) \ - (sizeof(struct pci_vtscsi_req_cmd_wr) + _sc->vss_config.sense_size) +#define VTSCSI_OUT_HEADER_LEN(_sc) \ + (sizeof (struct pci_vtscsi_req_cmd_wr) + _sc->vss_config.sense_size) #define VIRTIO_SCSI_MAX_CHANNEL 0 #define VIRTIO_SCSI_MAX_TARGET 0 @@ -101,45 +103,57 @@ uint32_t max_lun; } __attribute__((packed)); +STAILQ_HEAD(pci_vtscsi_req_queue, pci_vtscsi_request); + struct pci_vtscsi_queue { - struct pci_vtscsi_softc * vsq_sc; - struct vqueue_info * vsq_vq; - pthread_mutex_t vsq_mtx; - pthread_mutex_t vsq_qmtx; - pthread_cond_t vsq_cv; - STAILQ_HEAD(, pci_vtscsi_request) vsq_requests; - LIST_HEAD(, pci_vtscsi_worker) vsq_workers; + struct pci_vtscsi_softc *vsq_sc; + struct vqueue_info *vsq_vq; + pthread_mutex_t vsq_rmtx; + pthread_mutex_t vsq_fmtx; + pthread_mutex_t vsq_qmtx; + pthread_cond_t vsq_cv; + struct pci_vtscsi_req_queue vsq_requests; + struct pci_vtscsi_req_queue vsq_free_requests; + LIST_HEAD(, pci_vtscsi_worker) vsq_workers; }; struct pci_vtscsi_worker { - struct pci_vtscsi_queue * vsw_queue; - pthread_t vsw_thread; - bool vsw_exiting; - LIST_ENTRY(pci_vtscsi_worker) vsw_link; + struct pci_vtscsi_queue *vsw_queue; + pthread_t vsw_thread; + bool vsw_exiting; + LIST_ENTRY(pci_vtscsi_worker) vsw_link; }; struct pci_vtscsi_request { - struct pci_vtscsi_queue * vsr_queue; - struct iovec vsr_iov_in[VTSCSI_MAXSEG]; - int vsr_niov_in; - struct iovec vsr_iov_out[VTSCSI_MAXSEG]; - int vsr_niov_out; - uint32_t vsr_idx; - STAILQ_ENTRY(pci_vtscsi_request) vsr_link; + struct pci_vtscsi_queue *vsr_queue; + struct iovec vsr_iov[VTSCSI_MAXSEG + 2]; + struct iovec *vsr_iov_in; + struct iovec *vsr_iov_out; + struct iovec *vsr_data_iov_in; + struct iovec *vsr_data_iov_out; + struct pci_vtscsi_req_cmd_rd *vsr_cmd_rd; + struct pci_vtscsi_req_cmd_wr *vsr_cmd_wr; + union ctl_io *vsr_ctl_io; + int vsr_niov_in; + int vsr_niov_out; + int vsr_data_niov_in; + int vsr_data_niov_out; + uint32_t vsr_idx; + STAILQ_ENTRY(pci_vtscsi_request) vsr_link; }; /* * Per-device softc */ struct pci_vtscsi_softc { - struct virtio_softc vss_vs; - struct vqueue_info vss_vq[VTSCSI_MAXQ]; - struct pci_vtscsi_queue vss_queues[VTSCSI_REQUESTQ]; - pthread_mutex_t vss_mtx; - int vss_iid; - int vss_ctl_fd; - uint32_t vss_features; - struct pci_vtscsi_config vss_config; + struct virtio_softc vss_vs; + struct vqueue_info vss_vq[VTSCSI_MAXQ]; + struct pci_vtscsi_queue vss_queues[VTSCSI_REQUESTQ]; + pthread_mutex_t vss_mtx; + int vss_iid; + int vss_ctl_fd; + uint32_t vss_features; + struct pci_vtscsi_config vss_config; }; #define VIRTIO_SCSI_T_TMF 0 @@ -150,7 +164,9 @@ #define VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET 4 #define VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET 5 #define VIRTIO_SCSI_T_TMF_QUERY_TASK 6 -#define VIRTIO_SCSI_T_TMF_QUERY_TASK_SET 7 +#define VIRTIO_SCSI_T_TMF_QUERY_TASK_SET 7 + +#define VIRTIO_SCSI_T_TMF_MAX_FUNC VIRTIO_SCSI_T_TMF_QUERY_TASK_SET /* command-specific response values */ #define VIRTIO_SCSI_S_FUNCTION_COMPLETE 0 @@ -158,11 +174,11 @@ #define VIRTIO_SCSI_S_FUNCTION_REJECTED 11 struct pci_vtscsi_ctrl_tmf { - uint32_t type; - uint32_t subtype; - uint8_t lun[8]; - uint64_t id; - uint8_t response; + const uint32_t type; + const uint32_t subtype; + const uint8_t lun[8]; + const uint64_t id; + uint8_t response; } __attribute__((packed)); #define VIRTIO_SCSI_T_AN_QUERY 1 @@ -174,15 +190,15 @@ #define VIRTIO_SCSI_EVT_ASYNC_DEVICE_BUSY 64 struct pci_vtscsi_ctrl_an { - uint32_t type; - uint8_t lun[8]; - uint32_t event_requested; - uint32_t event_actual; - uint8_t response; + const uint32_t type; + const uint8_t lun[8]; + const uint32_t event_requested; + uint32_t event_actual; + uint8_t response; } __attribute__((packed)); /* command-specific response values */ -#define VIRTIO_SCSI_S_OK 0 +#define VIRTIO_SCSI_S_OK 0 #define VIRTIO_SCSI_S_OVERRUN 1 #define VIRTIO_SCSI_S_ABORTED 2 #define VIRTIO_SCSI_S_BAD_TARGET 3 @@ -201,27 +217,27 @@ #define VIRTIO_SCSI_S_ACA 3 struct pci_vtscsi_event { - uint32_t event; - uint8_t lun[8]; - uint32_t reason; + uint32_t event; + uint8_t lun[8]; + uint32_t reason; } __attribute__((packed)); struct pci_vtscsi_req_cmd_rd { - uint8_t lun[8]; - uint64_t id; - uint8_t task_attr; - uint8_t prio; - uint8_t crn; - uint8_t cdb[]; + const uint8_t lun[8]; + const uint64_t id; + const uint8_t task_attr; + const uint8_t prio; + const uint8_t crn; + const uint8_t cdb[]; } __attribute__((packed)); struct pci_vtscsi_req_cmd_wr { - uint32_t sense_len; - uint32_t residual; - uint16_t status_qualifier; - uint8_t status; - uint8_t response; - uint8_t sense[]; + uint32_t sense_len; + uint32_t residual; + uint16_t status_qualifier; + uint8_t status; + uint8_t response; + uint8_t sense[]; } __attribute__((packed)); static void *pci_vtscsi_proc(void *); @@ -229,14 +245,28 @@ static void pci_vtscsi_neg_features(void *, uint64_t); static int pci_vtscsi_cfgread(void *, int, int, uint32_t *); static int pci_vtscsi_cfgwrite(void *, int, int, uint32_t); -static inline int pci_vtscsi_get_lun(uint8_t *); -static int pci_vtscsi_control_handle(struct pci_vtscsi_softc *, void *, size_t); -static int pci_vtscsi_tmf_handle(struct pci_vtscsi_softc *, - struct pci_vtscsi_ctrl_tmf *); -static int pci_vtscsi_an_handle(struct pci_vtscsi_softc *, - struct pci_vtscsi_ctrl_an *); -static int pci_vtscsi_request_handle(struct pci_vtscsi_queue *, struct iovec *, - int, struct iovec *, int); +static inline bool pci_vtscsi_check_lun(const uint8_t *); +static inline int pci_vtscsi_get_lun(const uint8_t *); + +static void pci_vtscsi_tmf_handle(struct pci_vtscsi_softc *, void *); +static void pci_vtscsi_an_handle(struct pci_vtscsi_softc *, void *); +static void pci_vtscsi_control_handle(struct pci_vtscsi_softc *, void *, + size_t); + +static struct pci_vtscsi_request *pci_vtscsi_alloc_request( + struct pci_vtscsi_softc *); +static void pci_vtscsi_free_request(struct pci_vtscsi_request *); +static struct pci_vtscsi_request *pci_vtscsi_get_request( + struct pci_vtscsi_req_queue *); +static void pci_vtscsi_put_request(struct pci_vtscsi_req_queue *, + struct pci_vtscsi_request *); +static void pci_vtscsi_queue_request(struct pci_vtscsi_softc *, + struct vqueue_info *); +static void pci_vtscsi_return_request(struct pci_vtscsi_queue *, + struct pci_vtscsi_request *, int); + +static int pci_vtscsi_request_handle(struct pci_vtscsi_softc *, + struct pci_vtscsi_request *); static void pci_vtscsi_controlq_notify(void *, struct vqueue_info *); static void pci_vtscsi_eventq_notify(void *, struct vqueue_info *); static void pci_vtscsi_requestq_notify(void *, struct vqueue_info *); @@ -247,12 +277,12 @@ static struct virtio_consts vtscsi_vi_consts = { .vc_name = "vtscsi", .vc_nvq = VTSCSI_MAXQ, - .vc_cfgsize = sizeof(struct pci_vtscsi_config), + .vc_cfgsize = sizeof (struct pci_vtscsi_config), .vc_reset = pci_vtscsi_reset, .vc_cfgread = pci_vtscsi_cfgread, .vc_cfgwrite = pci_vtscsi_cfgwrite, .vc_apply_features = pci_vtscsi_neg_features, - .vc_hv_caps = 0, + .vc_hv_caps = VIRTIO_RING_F_INDIRECT_DESC, }; static void * @@ -260,36 +290,33 @@ { struct pci_vtscsi_worker *worker = (struct pci_vtscsi_worker *)arg; struct pci_vtscsi_queue *q = worker->vsw_queue; - struct pci_vtscsi_request *req; + struct pci_vtscsi_softc *sc = q->vsq_sc; int iolen; for (;;) { - pthread_mutex_lock(&q->vsq_mtx); + struct pci_vtscsi_request *req; + + pthread_mutex_lock(&q->vsq_rmtx); - while (STAILQ_EMPTY(&q->vsq_requests) - && !worker->vsw_exiting) - pthread_cond_wait(&q->vsq_cv, &q->vsq_mtx); + while (STAILQ_EMPTY(&q->vsq_requests) && !worker->vsw_exiting) + pthread_cond_wait(&q->vsq_cv, &q->vsq_rmtx); if (worker->vsw_exiting) break; - req = STAILQ_FIRST(&q->vsq_requests); - STAILQ_REMOVE_HEAD(&q->vsq_requests, vsr_link); + req = pci_vtscsi_get_request(&q->vsq_requests); + pthread_mutex_unlock(&q->vsq_rmtx); - pthread_mutex_unlock(&q->vsq_mtx); - iolen = pci_vtscsi_request_handle(q, req->vsr_iov_in, - req->vsr_niov_in, req->vsr_iov_out, req->vsr_niov_out); + DPRINTF("I/O request lun %d, data_niov_in %d, data_niov_out %d", + pci_vtscsi_get_lun(req->vsr_cmd_rd->lun), + req->vsr_data_niov_in, req->vsr_data_niov_out); - pthread_mutex_lock(&q->vsq_qmtx); - vq_relchain(q->vsq_vq, req->vsr_idx, iolen); - vq_endchains(q->vsq_vq, 0); - pthread_mutex_unlock(&q->vsq_qmtx); + iolen = pci_vtscsi_request_handle(sc, req); - DPRINTF("request completed", req->vsr_idx); - free(req); + pci_vtscsi_return_request(q, req, iolen); } - pthread_mutex_unlock(&q->vsq_mtx); + pthread_mutex_unlock(&q->vsq_rmtx); return (NULL); } @@ -304,13 +331,13 @@ vi_reset_dev(&sc->vss_vs); /* initialize config structure */ - sc->vss_config = (struct pci_vtscsi_config){ + sc->vss_config = (struct pci_vtscsi_config) { .num_queues = VTSCSI_REQUESTQ, /* Leave room for the request and the response. */ .seg_max = VTSCSI_MAXSEG - 2, .max_sectors = 2, .cmd_per_lun = 1, - .event_info_size = sizeof(struct pci_vtscsi_event), + .event_info_size = sizeof (struct pci_vtscsi_event), .sense_size = 96, .cdb_size = 32, .max_channel = VIRTIO_SCSI_MAX_CHANNEL, @@ -345,57 +372,86 @@ return (0); } +/* + * Check that the given LUN address conforms to the virtio spec, does not + * address a target other than 0, and especially does not address the + * REPORT_LUNS well-known logical unit. + */ +static inline bool +pci_vtscsi_check_lun(const uint8_t *lun) +{ + /* + * The virtio spec says that we SHOULD implement the REPORT_LUNS well- + * known logical unit, but we currently don't. + */ + if (lun[0] == 0xC1) + return (false); + + /* A well-formed LUN address begins with 0x01. */ + if (lun[0] != 0x01) + return (false); + + /* Next comes the target. We currently only support target 0. */ + if (lun[1] != 0x00) + return (false); + + /* Next two bytes contain the LUN. We only support single-level LUNs. */ + if ((lun[2] & 0xc0) != 0x40) + return (false); + + /* The remaining four bytes must be zero. */ + if (lun[4] != 0 || lun[5] != 0 || lun[6] != 0 || lun[7] != 0) + return (false); + + return (true); +} + static inline int -pci_vtscsi_get_lun(uint8_t *lun) +pci_vtscsi_get_lun(const uint8_t *lun) { + assert(lun[0] == 0x01); + assert(lun[1] == 0x00); + assert((lun[2] & 0xc0) == 0x40); return (((lun[2] << 8) | lun[3]) & 0x3fff); } -static int -pci_vtscsi_control_handle(struct pci_vtscsi_softc *sc, void *buf, - size_t bufsize) +static void +pci_vtscsi_tmf_handle(struct pci_vtscsi_softc *sc, void *buf) { - struct pci_vtscsi_ctrl_tmf *tmf; - struct pci_vtscsi_ctrl_an *an; - uint32_t type; - - if (bufsize < sizeof(uint32_t)) { - WPRINTF("ignoring truncated control request"); - return (0); - } + struct pci_vtscsi_ctrl_tmf *tmf = buf; + union ctl_io *io; + int err; - type = *(uint32_t *)buf; + if (pci_vtscsi_check_lun(tmf->lun) == false) { + WPRINTF("TMF request to invalid LUN %.2hhx%.2hhx-%.2hhx%.2hhx-" + "%.2hhx%.2hhx-%.2hhx%.2hhx", tmf->lun[0], tmf->lun[1], + tmf->lun[2], tmf->lun[3], tmf->lun[4], tmf->lun[5], + tmf->lun[6], tmf->lun[7]); - if (type == VIRTIO_SCSI_T_TMF) { - if (bufsize != sizeof(*tmf)) { - WPRINTF("ignoring tmf request with size %zu", bufsize); - return (0); - } - tmf = (struct pci_vtscsi_ctrl_tmf *)buf; - return (pci_vtscsi_tmf_handle(sc, tmf)); + tmf->response = VIRTIO_SCSI_S_BAD_TARGET; + return; } - if (type == VIRTIO_SCSI_T_AN_QUERY) { - if (bufsize != sizeof(*an)) { - WPRINTF("ignoring AN request with size %zu", bufsize); - return (0); - } - an = (struct pci_vtscsi_ctrl_an *)buf; - return (pci_vtscsi_an_handle(sc, an)); + if (tmf->subtype > VIRTIO_SCSI_T_TMF_MAX_FUNC) { + WPRINTF("pci_vtscsi_tmf_handle: invalid subtype %u", + tmf->subtype); + tmf->response = VIRTIO_SCSI_S_FUNCTION_REJECTED; + return; } - return (0); -} - -static int -pci_vtscsi_tmf_handle(struct pci_vtscsi_softc *sc, - struct pci_vtscsi_ctrl_tmf *tmf) -{ - union ctl_io *io; - int err; + DPRINTF("TMF request lun %d, subtype %d, id %lu", + pci_vtscsi_get_lun(tmf->lun), tmf->subtype, tmf->id); io = ctl_scsi_alloc_io(sc->vss_iid); + if (io == NULL) { + WPRINTF("failed to allocate ctl_io: err=%d (%s)", + errno, strerror(errno)); + + tmf->response = VIRTIO_SCSI_S_FAILURE; + return; + } + ctl_scsi_zero_io(io); io->io_hdr.io_type = CTL_IO_TASK; @@ -453,73 +509,300 @@ tmf->response = io->taskio.task_status; ctl_scsi_free_io(io); - return (1); } -static int -pci_vtscsi_an_handle(struct pci_vtscsi_softc *sc __unused, - struct pci_vtscsi_ctrl_an *an __unused) +static void +pci_vtscsi_an_handle(struct pci_vtscsi_softc *sc __unused, void *buf) { - return (0); + struct pci_vtscsi_ctrl_an *an = buf; + + if (pci_vtscsi_check_lun(an->lun) == false) { + WPRINTF("AN request to invalid LUN %.2hhx%.2hhx-%.2hhx%.2hhx-" + "%.2hhx%.2hhx-%.2hhx%.2hhx", an->lun[0], an->lun[1], + an->lun[2], an->lun[3], an->lun[4], an->lun[5], an->lun[6], + an->lun[7]); + an->response = VIRTIO_SCSI_S_BAD_TARGET; + return; + } + + DPRINTF("AN request lun %d, event requested %x", + pci_vtscsi_get_lun(an->lun), an->event_requested); + + an->response = VIRTIO_SCSI_S_FAILURE; } -static int -pci_vtscsi_request_handle(struct pci_vtscsi_queue *q, struct iovec *iov_in, - int niov_in, struct iovec *iov_out, int niov_out) +static void +pci_vtscsi_control_handle(struct pci_vtscsi_softc *sc, void *buf, + size_t bufsize) { - struct pci_vtscsi_softc *sc = q->vsq_sc; - struct pci_vtscsi_req_cmd_rd *cmd_rd = NULL; - struct pci_vtscsi_req_cmd_wr *cmd_wr; - struct iovec data_iov_in[VTSCSI_MAXSEG], data_iov_out[VTSCSI_MAXSEG]; - union ctl_io *io; - int data_niov_in, data_niov_out; - void *ext_data_ptr = NULL; - uint32_t ext_data_len = 0, ext_sg_entries = 0; - int err, nxferred; + uint32_t type; + + if (bufsize < sizeof (uint32_t)) { + WPRINTF("ignoring truncated control request"); + return; + } + + type = *(uint32_t *)buf; + + if (type == VIRTIO_SCSI_T_TMF) { + if (bufsize != sizeof (struct pci_vtscsi_ctrl_tmf)) { + WPRINTF("ignoring TMF request with size %zu", bufsize); + return; + } + + pci_vtscsi_tmf_handle(sc, buf); + } else if (type == VIRTIO_SCSI_T_AN_QUERY) { + if (bufsize != sizeof (struct pci_vtscsi_ctrl_an)) { + WPRINTF("ignoring AN request with size %zu", bufsize); + return; + } + + pci_vtscsi_an_handle(sc, buf); + } else { + WPRINTF("ignoring unknown control request type = %u", type); + } +} + +static struct pci_vtscsi_request * +pci_vtscsi_alloc_request(struct pci_vtscsi_softc *sc) +{ + struct pci_vtscsi_request *req; + + req = calloc(1, sizeof (struct pci_vtscsi_request)); + if (req == NULL) + goto alloc_fail; + + req->vsr_cmd_rd = calloc(1, VTSCSI_IN_HEADER_LEN(sc)); + if (req->vsr_cmd_rd == NULL) + goto alloc_fail; + req->vsr_cmd_wr = calloc(1, VTSCSI_OUT_HEADER_LEN(sc)); + if (req->vsr_cmd_wr == NULL) + goto alloc_fail; + + req->vsr_ctl_io = ctl_scsi_alloc_io(sc->vss_iid); + if (req->vsr_ctl_io == NULL) + goto alloc_fail; + ctl_scsi_zero_io(req->vsr_ctl_io); + + return (req); + +alloc_fail: + EPRINTLN("failed to allocate request: %s", strerror(errno)); + + if (req != NULL) + pci_vtscsi_free_request(req); + + return (NULL); +} - if (count_iov(iov_out, niov_out) < VTSCSI_OUT_HEADER_LEN(sc)) { +static void +pci_vtscsi_free_request(struct pci_vtscsi_request *req) +{ + if (req->vsr_ctl_io != NULL) + free(req->vsr_ctl_io); + if (req->vsr_cmd_rd != NULL) + free(req->vsr_cmd_rd); + if (req->vsr_cmd_wr != NULL) + free(req->vsr_cmd_wr); + + free(req); +} + +static struct pci_vtscsi_request * +pci_vtscsi_get_request(struct pci_vtscsi_req_queue *req_queue) +{ + struct pci_vtscsi_request *req; + + assert(!STAILQ_EMPTY(req_queue)); + + req = STAILQ_FIRST(req_queue); + STAILQ_REMOVE_HEAD(req_queue, vsr_link); + + return (req); +} + +static void +pci_vtscsi_put_request(struct pci_vtscsi_req_queue *req_queue, + struct pci_vtscsi_request *req) +{ + STAILQ_INSERT_TAIL(req_queue, req, vsr_link); +} + +static void +pci_vtscsi_queue_request(struct pci_vtscsi_softc *sc, struct vqueue_info *vq) +{ + struct pci_vtscsi_queue *q = &sc->vss_queues[vq->vq_num - 2]; + struct pci_vtscsi_request *req; + struct vi_req vireq; + int n; + + pthread_mutex_lock(&q->vsq_fmtx); + req = pci_vtscsi_get_request(&q->vsq_free_requests); + assert(req != NULL); + pthread_mutex_unlock(&q->vsq_fmtx); + + n = vq_getchain(vq, req->vsr_iov, VTSCSI_MAXSEG, &vireq); + assert(n >= 1 && n <= VTSCSI_MAXSEG); + + req->vsr_idx = vireq.idx; + req->vsr_queue = q; + req->vsr_iov_in = &req->vsr_iov[0]; + req->vsr_niov_in = vireq.readable; + req->vsr_iov_out = &req->vsr_iov[vireq.readable]; + req->vsr_niov_out = vireq.writable; + + /* + * Make sure we got at least enough space for the VirtIO-SCSI + * command headers. If not, return this request immediately. + */ + if (check_iov_len(req->vsr_iov_out, req->vsr_niov_out, + VTSCSI_OUT_HEADER_LEN(q->vsq_sc)) == false) { WPRINTF("ignoring request with insufficient output"); - return (0); + req->vsr_cmd_wr->response = VIRTIO_SCSI_S_FAILURE; + pci_vtscsi_return_request(q, req, 1); + return; } - if (count_iov(iov_in, niov_in) < VTSCSI_IN_HEADER_LEN(sc)) { + + if (check_iov_len(req->vsr_iov_in, req->vsr_niov_in, + VTSCSI_IN_HEADER_LEN(q->vsq_sc)) == false) { WPRINTF("ignoring request with incomplete header"); - return (0); + req->vsr_cmd_wr->response = VIRTIO_SCSI_S_FAILURE; + pci_vtscsi_return_request(q, req, 1); + return; } - seek_iov(iov_in, niov_in, data_iov_in, &data_niov_in, - VTSCSI_IN_HEADER_LEN(sc)); - seek_iov(iov_out, niov_out, data_iov_out, &data_niov_out, - VTSCSI_OUT_HEADER_LEN(sc)); + /* + * We have to split the iovec array in a header and data portion each + * for input and output. + * + * We need to start with the output section (at the end) in case the + * iovec covering the final part of the output header needs splitting, + * in which case split_iov() will move all reamaining iovecs up by one + * to make room for a new iovec covering the first part of the output + * data portion. + */ + split_iov(req->vsr_iov_out, &req->vsr_niov_out, &req->vsr_data_iov_out, + &req->vsr_data_niov_out, VTSCSI_OUT_HEADER_LEN(q->vsq_sc)); + + /* + * Similarly, to not overwrite the first iovec of the output section, + * the 2nd call to split_iov() to split the input section must actually + * cover the entire iovec array (both input and the already split output + * sections). + */ + req->vsr_niov_in += req->vsr_niov_out + req->vsr_data_niov_out; + + split_iov(req->vsr_iov_in, &req->vsr_niov_in, &req->vsr_data_iov_in, + &req->vsr_data_niov_in, VTSCSI_IN_HEADER_LEN(q->vsq_sc)); + + /* + * And of course we now have to adjust data_niov_in accordingly. + */ + req->vsr_data_niov_in -= req->vsr_niov_out + req->vsr_data_niov_out; + + /* + * iov_to_buf() realloc()s the buffer given as 3rd argument to the + * total size of all iovecs it will be copying. Since we've just + * truncated it in split_iov(), we know that the size will be + * VTSCSI_IN_HEADER_LEN(q->vsq_sc). + * + * Since we pre-allocated req->vsr_cmd_rd to this size, the realloc() + * should never fail. + * + * This will have to change if we begin allowing config space writes + * to change sense size. + */ + assert(iov_to_buf(req->vsr_iov_in, req->vsr_niov_in, + (void **)&req->vsr_cmd_rd) == VTSCSI_IN_HEADER_LEN(q->vsq_sc)); + + /* Make sure this request addresses a valid LUN. */ + if (pci_vtscsi_check_lun(req->vsr_cmd_rd->lun) == false) { + WPRINTF("I/O request to invalid LUN " + "%.2hhx%.2hhx-%.2hhx%.2hhx-%.2hhx%.2hhx-%.2hhx%.2hhx", + req->vsr_cmd_rd->lun[0], req->vsr_cmd_rd->lun[1], + req->vsr_cmd_rd->lun[2], req->vsr_cmd_rd->lun[3], + req->vsr_cmd_rd->lun[4], req->vsr_cmd_rd->lun[5], + req->vsr_cmd_rd->lun[6], req->vsr_cmd_rd->lun[7]); + req->vsr_cmd_wr->response = VIRTIO_SCSI_S_BAD_TARGET; + pci_vtscsi_return_request(q, req, 1); + return; + } - truncate_iov(iov_in, &niov_in, VTSCSI_IN_HEADER_LEN(sc)); - truncate_iov(iov_out, &niov_out, VTSCSI_OUT_HEADER_LEN(sc)); - iov_to_buf(iov_in, niov_in, (void **)&cmd_rd); + pthread_mutex_lock(&q->vsq_rmtx); + pci_vtscsi_put_request(&q->vsq_requests, req); + pthread_cond_signal(&q->vsq_cv); + pthread_mutex_unlock(&q->vsq_rmtx); - cmd_wr = calloc(1, VTSCSI_OUT_HEADER_LEN(sc)); - io = ctl_scsi_alloc_io(sc->vss_iid); - ctl_scsi_zero_io(io); + DPRINTF("request enqueued", vireq.idx); +} + +static void +pci_vtscsi_return_request(struct pci_vtscsi_queue *q, + struct pci_vtscsi_request *req, int iolen) +{ + void *cmd_rd = req->vsr_cmd_rd; + void *cmd_wr = req->vsr_cmd_wr; + void *ctl_io = req->vsr_ctl_io; + int idx = req->vsr_idx; + + DPRINTF("request completed, response %d", idx, + req->vsr_cmd_wr->response); + + iolen += buf_to_iov(cmd_wr, VTSCSI_OUT_HEADER_LEN(q->vsq_sc), + req->vsr_iov_out, req->vsr_niov_out); + + ctl_scsi_zero_io(req->vsr_ctl_io); + + memset(cmd_rd, 0, VTSCSI_IN_HEADER_LEN(q->vsq_sc)); + memset(cmd_wr, 0, VTSCSI_OUT_HEADER_LEN(q->vsq_sc)); + memset(req, 0, sizeof (struct pci_vtscsi_request)); + + req->vsr_cmd_rd = cmd_rd; + req->vsr_cmd_wr = cmd_wr; + req->vsr_ctl_io = ctl_io; + + pthread_mutex_lock(&q->vsq_fmtx); + pci_vtscsi_put_request(&q->vsq_free_requests, req); + pthread_mutex_unlock(&q->vsq_fmtx); + + pthread_mutex_lock(&q->vsq_qmtx); + vq_relchain(q->vsq_vq, idx, iolen); + vq_endchains(q->vsq_vq, 0); + pthread_mutex_unlock(&q->vsq_qmtx); +} + +static int +pci_vtscsi_request_handle(struct pci_vtscsi_softc *sc, + struct pci_vtscsi_request *req) +{ + union ctl_io *io = req->vsr_ctl_io; + void *ext_data_ptr = NULL; + uint32_t ext_data_len = 0, ext_sg_entries = 0; + int err, nxferred; io->io_hdr.nexus.initid = sc->vss_iid; - io->io_hdr.nexus.targ_lun = pci_vtscsi_get_lun(cmd_rd->lun); + io->io_hdr.nexus.targ_lun = pci_vtscsi_get_lun(req->vsr_cmd_rd->lun); io->io_hdr.io_type = CTL_IO_SCSI; - if (data_niov_in > 0) { - ext_data_ptr = (void *)data_iov_in; - ext_sg_entries = data_niov_in; - ext_data_len = count_iov(data_iov_in, data_niov_in); + if (req->vsr_data_niov_in > 0) { + ext_data_ptr = (void *)req->vsr_data_iov_in; + ext_sg_entries = req->vsr_data_niov_in; + ext_data_len = count_iov(req->vsr_data_iov_in, + req->vsr_data_niov_in); io->io_hdr.flags |= CTL_FLAG_DATA_OUT; - } else if (data_niov_out > 0) { - ext_data_ptr = (void *)data_iov_out; - ext_sg_entries = data_niov_out; - ext_data_len = count_iov(data_iov_out, data_niov_out); + } else if (req->vsr_data_niov_out > 0) { + ext_data_ptr = (void *)req->vsr_data_iov_out; + ext_sg_entries = req->vsr_data_niov_out; + ext_data_len = count_iov(req->vsr_data_iov_out, + req->vsr_data_niov_out); io->io_hdr.flags |= CTL_FLAG_DATA_IN; } io->scsiio.sense_len = sc->vss_config.sense_size; - io->scsiio.tag_num = cmd_rd->id; + io->scsiio.tag_num = req->vsr_cmd_rd->id; io->io_hdr.flags |= CTL_FLAG_USER_TAG; - switch (cmd_rd->task_attr) { + switch (req->vsr_cmd_rd->task_attr) { case VIRTIO_SCSI_S_ORDERED: io->scsiio.tag_type = CTL_TAG_ORDERED; break; @@ -539,7 +822,7 @@ io->scsiio.ext_data_len = ext_data_len; io->scsiio.ext_data_filled = 0; io->scsiio.cdb_len = sc->vss_config.cdb_size; - memcpy(io->scsiio.cdb, cmd_rd->cdb, sc->vss_config.cdb_size); + memcpy(io->scsiio.cdb, req->vsr_cmd_rd->cdb, sc->vss_config.cdb_size); if (pci_vtscsi_debug) { struct sbuf *sb = sbuf_new_auto(); @@ -552,22 +835,20 @@ err = ioctl(sc->vss_ctl_fd, CTL_IO, io); if (err != 0) { WPRINTF("CTL_IO: err=%d (%s)", errno, strerror(errno)); - cmd_wr->response = VIRTIO_SCSI_S_FAILURE; - } else { - cmd_wr->sense_len = MIN(io->scsiio.sense_len, - sc->vss_config.sense_size); - cmd_wr->residual = ext_data_len - io->scsiio.ext_data_filled; - cmd_wr->status = io->scsiio.scsi_status; - cmd_wr->response = VIRTIO_SCSI_S_OK; - memcpy(&cmd_wr->sense, &io->scsiio.sense_data, - cmd_wr->sense_len); + req->vsr_cmd_wr->response = VIRTIO_SCSI_S_FAILURE; + goto out; } - buf_to_iov(cmd_wr, VTSCSI_OUT_HEADER_LEN(sc), iov_out, niov_out, 0); - nxferred = VTSCSI_OUT_HEADER_LEN(sc) + io->scsiio.ext_data_filled; - free(cmd_rd); - free(cmd_wr); - ctl_scsi_free_io(io); + req->vsr_cmd_wr->sense_len = + MIN(io->scsiio.sense_len, sc->vss_config.sense_size); + req->vsr_cmd_wr->residual = ext_data_len - io->scsiio.ext_data_filled; + req->vsr_cmd_wr->status = io->scsiio.scsi_status; + req->vsr_cmd_wr->response = VIRTIO_SCSI_S_OK; + memcpy(&req->vsr_cmd_wr->sense, &io->scsiio.sense_data, + req->vsr_cmd_wr->sense_len); + +out: + nxferred = io->scsiio.ext_data_filled; return (nxferred); } @@ -579,7 +860,7 @@ struct vi_req req; void *buf = NULL; size_t bufsize; - int iolen, n; + int n; sc = vsc; @@ -588,14 +869,13 @@ assert(n >= 1 && n <= VTSCSI_MAXSEG); bufsize = iov_to_buf(iov, n, &buf); - iolen = pci_vtscsi_control_handle(sc, buf, bufsize); - buf_to_iov((uint8_t *)buf + bufsize - iolen, iolen, iov, n, - bufsize - iolen); + pci_vtscsi_control_handle(sc, buf, bufsize); + buf_to_iov((uint8_t *)buf, bufsize, iov, n); /* * Release this chain and handle more */ - vq_relchain(vq, req.idx, iolen); + vq_relchain(vq, req.idx, bufsize); } vq_endchains(vq, 1); /* Generate interrupt if appropriate. */ free(buf); @@ -610,36 +890,8 @@ static void pci_vtscsi_requestq_notify(void *vsc, struct vqueue_info *vq) { - struct pci_vtscsi_softc *sc; - struct pci_vtscsi_queue *q; - struct pci_vtscsi_request *req; - struct iovec iov[VTSCSI_MAXSEG]; - struct vi_req vireq; - int n; - - sc = vsc; - q = &sc->vss_queues[vq->vq_num - 2]; - while (vq_has_descs(vq)) { - n = vq_getchain(vq, iov, VTSCSI_MAXSEG, &vireq); - assert(n >= 1 && n <= VTSCSI_MAXSEG); - - req = calloc(1, sizeof(struct pci_vtscsi_request)); - req->vsr_idx = vireq.idx; - req->vsr_queue = q; - req->vsr_niov_in = vireq.readable; - req->vsr_niov_out = vireq.writable; - memcpy(req->vsr_iov_in, iov, - req->vsr_niov_in * sizeof(struct iovec)); - memcpy(req->vsr_iov_out, iov + vireq.readable, - req->vsr_niov_out * sizeof(struct iovec)); - - pthread_mutex_lock(&q->vsq_mtx); - STAILQ_INSERT_TAIL(&q->vsq_requests, req, vsr_link); - pthread_cond_signal(&q->vsq_cv); - pthread_mutex_unlock(&q->vsq_mtx); - - DPRINTF("request enqueued", vireq.idx); + pci_vtscsi_queue_request(vsc, vq); } } @@ -647,27 +899,42 @@ pci_vtscsi_init_queue(struct pci_vtscsi_softc *sc, struct pci_vtscsi_queue *queue, int num) { - struct pci_vtscsi_worker *worker; char tname[MAXCOMLEN + 1]; int i; queue->vsq_sc = sc; queue->vsq_vq = &sc->vss_vq[num + 2]; - pthread_mutex_init(&queue->vsq_mtx, NULL); + pthread_mutex_init(&queue->vsq_rmtx, NULL); + pthread_mutex_init(&queue->vsq_fmtx, NULL); pthread_mutex_init(&queue->vsq_qmtx, NULL); pthread_cond_init(&queue->vsq_cv, NULL); STAILQ_INIT(&queue->vsq_requests); + STAILQ_INIT(&queue->vsq_free_requests); LIST_INIT(&queue->vsq_workers); + for (i = 0; i < VTSCSI_RINGSZ; i++) { + struct pci_vtscsi_request *req; + + req = pci_vtscsi_alloc_request(sc); + if (req == NULL) + return (-1); + + pci_vtscsi_put_request(&queue->vsq_free_requests, req); + } + for (i = 0; i < VTSCSI_THR_PER_Q; i++) { - worker = calloc(1, sizeof(struct pci_vtscsi_worker)); + struct pci_vtscsi_worker *worker; + worker = calloc(1, sizeof (struct pci_vtscsi_worker)); + if (worker == NULL) + return (-1); + worker->vsw_queue = queue; pthread_create(&worker->vsw_thread, NULL, &pci_vtscsi_proc, (void *)worker); - snprintf(tname, sizeof(tname), "vtscsi:%d-%d", num, i); + snprintf(tname, sizeof (tname), "vtscsi:%d-%d", num, i); pthread_set_name_np(worker->vsw_thread, tname); LIST_INSERT_HEAD(&queue->vsq_workers, worker, vsw_link); } @@ -675,23 +942,38 @@ return (0); } +/* + * The following forms are accepted for legacy config options: + * B:D:F,virtio-scsi, + * B:D:F,virtio-scsi,,,... + * B:D:F,virtio-scsi,,... + * B:D:F,virtio-scsi, + */ static int pci_vtscsi_legacy_config(nvlist_t *nvl, const char *opts) { - char *cp, *devname; + size_t n; if (opts == NULL) return (0); - cp = strchr(opts, ','); - if (cp == NULL) { - set_config_value_node(nvl, "dev", opts); - return (0); + n = strcspn(opts, ",="); + + if (opts[n] == ',' || opts[n] == '\0') { + char *tmp = strndup(opts, n); + + set_config_value_node(nvl, "dev", tmp); + free(tmp); + + opts += n; + if (opts[0] == ',' && opts[1] != '\0') + opts++; } - devname = strndup(opts, cp - opts); - set_config_value_node(nvl, "dev", devname); - free(devname); - return (pci_parse_legacy_config(nvl, cp + 1)); + + if (opts[0] == '\0') + return (0); + + return (pci_parse_legacy_config(nvl, opts)); } static int @@ -699,9 +981,13 @@ { struct pci_vtscsi_softc *sc; const char *devname, *value; + int err; int i; - sc = calloc(1, sizeof(struct pci_vtscsi_softc)); + sc = calloc(1, sizeof (struct pci_vtscsi_softc)); + if (sc == NULL) + return (-1); + value = get_config_value_node(nvl, "iid"); if (value != NULL) sc->vss_iid = strtoul(value, NULL, 10); @@ -722,7 +1008,7 @@ if (sc->vss_ctl_fd < 0) { WPRINTF("cannot open %s: %s", devname, strerror(errno)); free(sc); - return (1); + return (-1); } pthread_mutex_init(&sc->vss_mtx, NULL); @@ -730,6 +1016,19 @@ vi_softc_linkup(&sc->vss_vs, &vtscsi_vi_consts, sc, pi, sc->vss_vq); sc->vss_vs.vs_mtx = &sc->vss_mtx; + /* + * Perform a "reset" before we set up our queues. + * + * This will write the default config into vss_config, which is used + * by the rest of the driver to get the request header size. Note that + * if we ever allow the guest to override sense size through config + * space writes, pre-allocation of I/O requests will have to change + * accordingly. + */ + pthread_mutex_lock(&sc->vss_mtx); + pci_vtscsi_reset(sc); + pthread_mutex_unlock(&sc->vss_mtx); + /* controlq */ sc->vss_vq[0].vq_qsize = VTSCSI_RINGSZ; sc->vss_vq[0].vq_notify = pci_vtscsi_controlq_notify; @@ -742,7 +1041,10 @@ for (i = 2; i < VTSCSI_MAXQ; i++) { sc->vss_vq[i].vq_qsize = VTSCSI_RINGSZ; sc->vss_vq[i].vq_notify = pci_vtscsi_requestq_notify; - pci_vtscsi_init_queue(sc, &sc->vss_queues[i - 2], i - 2); + + err = pci_vtscsi_init_queue(sc, &sc->vss_queues[i - 2], i - 2); + if (err != 0) + return (err); } /* initialize config space */ @@ -752,8 +1054,10 @@ pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_ID_SCSI); pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR); - if (vi_intr_init(&sc->vss_vs, 1, fbsdrun_virtio_msix())) - return (1); + err = vi_intr_init(&sc->vss_vs, 1, fbsdrun_virtio_msix()); + if (err != 0) + return (err); + vi_set_io_bar(&sc->vss_vs, 0); return (0);