Page MenuHomeFreeBSD

D37753.id115033.diff
No OneTemporary

D37753.id115033.diff

diff --git a/share/man/man5/Makefile b/share/man/man5/Makefile
--- a/share/man/man5/Makefile
+++ b/share/man/man5/Makefile
@@ -70,6 +70,7 @@
style.Makefile.5 \
style.mdoc.5 \
sysctl.conf.5 \
+ tarfs.5 \
tmpfs.5 \
unionfs.5
diff --git a/share/man/man5/tarfs.5 b/share/man/man5/tarfs.5
new file mode 100644
--- /dev/null
+++ b/share/man/man5/tarfs.5
@@ -0,0 +1,101 @@
+.\"-
+.\" Copyright (c) 2022 Klara, Inc.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.Dd December 1, 2022
+.Dt TARFS 5
+.Os
+.Sh NAME
+.Nm tarfs
+.Nd tarball filesystem
+.Sh SYNOPSIS
+To compile this driver into the kernel, place the following line in
+your kernel configuration file:
+.Bd -ragged -offset indent
+.Cd "options TARFS"
+.Ed
+.Pp
+Alternatively, to load the driver as a module at boot time, place the
+following line in
+.Xr loader.conf 5 :
+.Bd -literal -offset indent
+tarfs_load="YES"
+.Ed
+.Sh DESCRIPTION
+The
+.Nm
+driver implementes a read-only filesystem backed by a
+.Xr tar 5
+file.
+Currently, only POSIX archives, optionally compressed with
+.Xr zstd 1 ,
+are supported.
+.Pp
+The preferred I/O size for
+.Nm
+filesystems can be adjusted using the
+.Va vfs.tarfs.ioshift
+sysctl setting and tunable.
+Setting it to 0 will reset it to its default value.
+Note that changes to this setting only apply to filesystems mounted
+after the change.
+.Sh DIAGNOSTICS
+If enabled by the
+.Dv TARFS_DEBUG
+kernel option, the
+.Va vfs.tarfs.debug
+sysctl setting can be used to control debugging output from the
+.Nm
+driver.
+Debugging output for individual sections of the driver can be enabled
+by adding together the relevant values from the table below.
+.Bl -column Value Description
+.It 0x01 Ta Memory allocations
+.It 0x02 Ta Checksum calculations
+.It 0x04 Ta Filesystem operations (vfsops)
+.It 0x08 Ta Path lookups
+.It 0x10 Ta File operations (vnops)
+.It 0x20 Ta General I/O
+.It 0x40 Ta Decompression
+.It 0x80 Ta Decompression index
+.It 0x100 Ta Sparse file mapping
+.El
+.Sh SEE ALSO
+.Xr tar 1 ,
+.Xr zstd 1 ,
+.Xr fstab 5 ,
+.Xr tar 5 ,
+.Xr mount 8 ,
+.Xr sysctl 8
+.Sh HISTORY
+.An -nosplit
+The
+.Nm
+driver was developed by
+.An Stephen J. Kiernan Aq Mt stevek@FreeBSD.org
+and
+.An Dag-Erling Smørgrav Aq Mt des@FreeBSD.org
+for Juniper Networks and Klara Systems.
+This manual page was written by
+.An Dag-Erling Smørgrav Aq Mt des@FreeBSD.org
+for Juniper Networks and Klara Systems.
diff --git a/sys/conf/files b/sys/conf/files
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -3614,6 +3614,10 @@
fs/smbfs/smbfs_subr.c optional smbfs
fs/smbfs/smbfs_vfsops.c optional smbfs
fs/smbfs/smbfs_vnops.c optional smbfs
+fs/tarfs/tarfs_io.c optional tarfs compile-with "${NORMAL_C} -I$S/contrib/zstd/lib/freebsd"
+fs/tarfs/tarfs_subr.c optional tarfs
+fs/tarfs/tarfs_vfsops.c optional tarfs
+fs/tarfs/tarfs_vnops.c optional tarfs
fs/udf/osta.c optional udf
fs/udf/udf_iconv.c optional udf_iconv
fs/udf/udf_vfsops.c optional udf
diff --git a/sys/conf/options b/sys/conf/options
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -265,6 +265,7 @@
PROCFS opt_dontuse.h
PSEUDOFS opt_dontuse.h
SMBFS opt_dontuse.h
+TARFS opt_dontuse.h
TMPFS opt_dontuse.h
UDF opt_dontuse.h
UNIONFS opt_dontuse.h
@@ -273,6 +274,9 @@
# Pseudofs debugging
PSEUDOFS_TRACE opt_pseudofs.h
+# Tarfs debugging
+TARFS_DEBUG opt_tarfs.h
+
# In-kernel GSS-API
KGSSAPI opt_kgssapi.h
KGSSAPI_DEBUG opt_kgssapi.h
diff --git a/sys/fs/tarfs/tarfs.h b/sys/fs/tarfs/tarfs.h
new file mode 100644
--- /dev/null
+++ b/sys/fs/tarfs/tarfs.h
@@ -0,0 +1,254 @@
+/*-
+ * Copyright (c) 2013 Juniper Networks, Inc.
+ * Copyright (c) 2022 Klara, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _FS_TARFS_TARFS_H_
+#define _FS_TARFS_TARFS_H_
+
+#ifndef _KERNEL
+#error Should only be included by kernel
+#endif
+
+MALLOC_DECLARE(M_TARFSMNT);
+MALLOC_DECLARE(M_TARFSNODE);
+MALLOC_DECLARE(M_TARFSNAME);
+
+#ifdef SYSCTL_DECL
+SYSCTL_DECL(_vfs_tarfs);
+#endif
+
+struct componentname;
+struct mount;
+struct vnode;
+
+/*
+ * Internal representation of a tarfs file system node.
+ */
+struct tarfs_node {
+ TAILQ_ENTRY(tarfs_node) entries;
+ TAILQ_ENTRY(tarfs_node) dirents;
+
+ struct mtx lock;
+
+ struct vnode *vnode;
+ struct tarfs_mount *tmp;
+ enum vtype type;
+ ino_t ino;
+ off_t offset;
+ size_t size;
+ size_t physize;
+ char *name;
+ size_t namelen;
+
+ /* Node attributes */
+ uid_t uid;
+ gid_t gid;
+ mode_t mode;
+ unsigned int flags;
+ nlink_t nlink;
+ struct timespec atime;
+ struct timespec mtime;
+ struct timespec ctime;
+ struct timespec birthtime;
+ unsigned long gen;
+
+ /* Block map */
+ size_t nblk;
+ struct tarfs_blk *blk;
+
+ struct tarfs_node *parent;
+ union {
+ /* VDIR */
+ struct {
+ TAILQ_HEAD(, tarfs_node) dirhead;
+ off_t lastcookie;
+ struct tarfs_node *lastnode;
+ } dir;
+
+ /* VLNK */
+ struct {
+ char *name;
+ size_t namelen;
+ } link;
+
+ /* VBLK or VCHR */
+ dev_t rdev;
+
+ /* VREG */
+ struct tarfs_node *other;
+ };
+};
+
+/*
+ * Entry in sparse file block map.
+ */
+struct tarfs_blk {
+ off_t i; /* input (physical) offset */
+ off_t o; /* output (logical) offset */
+ size_t l; /* length */
+};
+
+/*
+ * Decompression buffer.
+ */
+#define TARFS_ZBUF_SIZE 1048576
+struct tarfs_zbuf {
+ u_char buf[TARFS_ZBUF_SIZE];
+ size_t off; /* offset of contents */
+ size_t len; /* length of contents */
+};
+
+/*
+ * Internal representation of a tarfs mount point.
+ */
+struct tarfs_mount {
+ TAILQ_HEAD(, tarfs_node) allnodes;
+ struct mtx allnode_lock;
+
+ struct g_consumer *cp;
+ struct cdev *dev;
+ struct tarfs_node *root;
+ struct vnode *vp;
+ struct mount *vfs;
+ ino_t ino;
+ struct unrhdr *ino_unr;
+ size_t iosize;
+ size_t nblocks;
+ size_t nfiles;
+ time_t mtime; /* default mtime for directories */
+
+ struct tarfs_zio *zio;
+ struct vnode *znode;
+};
+
+struct tarfs_zio {
+ struct tarfs_mount *tmp;
+
+ /* decompression state */
+#ifdef ZSTDIO
+ struct tarfs_zstd *zstd; /* decompression state (zstd) */
+#endif
+ off_t ipos; /* current input position */
+ off_t opos; /* current output position */
+
+ /* index of compression frames */
+ unsigned int curidx; /* current index position*/
+ unsigned int nidx; /* number of index entries */
+ unsigned int szidx; /* index capacity */
+ struct tarfs_idx { off_t i, o; } *idx;
+};
+
+struct tarfs_fid {
+ u_short len; /* length of data in bytes */
+ u_short data0; /* force alignment */
+ ino_t ino;
+ unsigned long gen;
+};
+
+#define TARFS_NODE_LOCK(tnp) \
+ mtx_lock(&(tnp)->lock)
+#define TARFS_NODE_UNLOCK(tnp) \
+ mtx_unlock(&(tnp)->lock)
+#define TARFS_ALLNODES_LOCK(tnp) \
+ mtx_lock(&(tmp)->allnode_lock)
+#define TARFS_ALLNODES_UNLOCK(tnp) \
+ mtx_unlock(&(tmp)->allnode_lock)
+
+/*
+ * Data and metadata within tar files are aligned on 512-byte boundaries,
+ * to match the block size of the magnetic tapes they were originally
+ * intended for.
+ */
+#define TARFS_BSHIFT 9
+#define TARFS_BLOCKSIZE (size_t)(1U<<TARFS_BSHIFT)
+#define TARFS_BLKOFF(l) ((l) % TARFS_BLOCKSIZE)
+#define TARFS_BLKNUM(l) ((l) >> TARFS_BSHIFT)
+#define TARFS_SZ2BLKS(sz) (((sz) + TARFS_BLOCKSIZE - 1) / TARFS_BLOCKSIZE)
+
+/*
+ * Our preferred I/O size.
+ */
+extern unsigned int tarfs_ioshift;
+#define TARFS_IOSHIFT_MIN TARFS_BSHIFT
+#define TARFS_IOSHIFT_DEFAULT PAGE_SHIFT
+#define TARFS_IOSHIFT_MAX PAGE_SHIFT
+
+#define TARFS_ROOTINO ((ino_t)3)
+#define TARFS_ZIOINO ((ino_t)4)
+#define TARFS_MININO ((ino_t)65535)
+
+#define TARFS_COOKIE_DOT 0
+#define TARFS_COOKIE_DOTDOT 1
+#define TARFS_COOKIE_EOF OFF_MAX
+
+#define TARFS_ZIO_NAME ".tar"
+#define TARFS_ZIO_NAMELEN (sizeof(TARFS_ZIO_NAME) - 1)
+
+extern struct vop_vector tarfs_vnodeops;
+
+static inline
+struct tarfs_mount *
+MP_TO_TARFS_MOUNT(struct mount *mp)
+{
+
+ MPASS(mp != NULL && mp->mnt_data != NULL);
+ return (mp->mnt_data);
+}
+
+static inline
+struct tarfs_node *
+VP_TO_TARFS_NODE(struct vnode *vp)
+{
+
+ MPASS(vp != NULL && vp->v_data != NULL);
+ return (vp->v_data);
+}
+
+int tarfs_alloc_node(struct tarfs_mount *tmp, const char *name,
+ size_t namelen, enum vtype type, off_t off, size_t sz,
+ time_t mtime, uid_t uid, gid_t gid, mode_t mode,
+ unsigned int flags, const char *linkname, dev_t rdev,
+ struct tarfs_node *parent, struct tarfs_node **node);
+int tarfs_load_blockmap(struct tarfs_node *tnp, size_t realsize);
+void tarfs_dump_tree(struct tarfs_node *tnp);
+void tarfs_free_node(struct tarfs_node *tnp);
+struct tarfs_node *
+ tarfs_lookup_dir(struct tarfs_node *tnp, off_t cookie);
+struct tarfs_node *
+ tarfs_lookup_node(struct tarfs_node *tnp, struct tarfs_node *f,
+ struct componentname *cnp);
+void tarfs_print_node(struct tarfs_node *tnp);
+int tarfs_read_file(struct tarfs_node *tnp, size_t len, struct uio *uiop);
+
+int tarfs_io_init(struct tarfs_mount *tmp);
+int tarfs_io_fini(struct tarfs_mount *tmp);
+int tarfs_io_read(struct tarfs_mount *tmp, bool raw,
+ struct uio *uiop);
+ssize_t tarfs_io_read_buf(struct tarfs_mount *tmp, bool raw,
+ void *buf, off_t off, size_t len);
+unsigned int
+ tarfs_strtofflags(const char *str, char **end);
+
+#endif /* _FS_TARFS_TARFS_H_ */
diff --git a/sys/fs/tarfs/tarfs_dbg.h b/sys/fs/tarfs/tarfs_dbg.h
new file mode 100644
--- /dev/null
+++ b/sys/fs/tarfs/tarfs_dbg.h
@@ -0,0 +1,63 @@
+/*-
+ * Copyright (c) 2013 Juniper Networks, Inc.
+ * Copyright (c) 2022 Klara, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _FS_TARFS_TARFS_DBG_H_
+#define _FS_TARFS_TARFS_DBG_H_
+
+#ifndef _KERNEL
+#error Should only be included by kernel
+#endif
+
+#ifdef TARFS_DEBUG
+extern int tarfs_debug;
+
+#define TARFS_DEBUG_ALLOC 0x01
+#define TARFS_DEBUG_CHECKSUM 0x02
+#define TARFS_DEBUG_FS 0x04
+#define TARFS_DEBUG_LOOKUP 0x08
+#define TARFS_DEBUG_VNODE 0x10
+#define TARFS_DEBUG_IO 0x20
+#define TARFS_DEBUG_ZIO 0x40
+#define TARFS_DEBUG_ZIDX 0x80
+#define TARFS_DEBUG_MAP 0x100
+
+#define TARFS_DPF(category, fmt, ...) \
+ do { \
+ if ((tarfs_debug & TARFS_DEBUG_##category) != 0) \
+ printf(fmt, ## __VA_ARGS__); \
+ } while (0)
+#define TARFS_DPF_IFF(category, cond, fmt, ...) \
+ do { \
+ if ((cond) \
+ && (tarfs_debug & TARFS_DEBUG_##category) != 0) \
+ printf(fmt, ## __VA_ARGS__); \
+ } while (0)
+#else
+#define TARFS_DPF(category, fmt, ...)
+#define TARFS_DPF_IFF(category, cond, fmt, ...)
+#endif
+
+#endif /* _FS_TARFS_TARFS_DBG_H_ */
diff --git a/sys/fs/tarfs/tarfs_io.c b/sys/fs/tarfs/tarfs_io.c
new file mode 100644
--- /dev/null
+++ b/sys/fs/tarfs/tarfs_io.c
@@ -0,0 +1,663 @@
+/*-
+ * Copyright (c) 2013 Juniper Networks, Inc.
+ * Copyright (c) 2022 Klara, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_tarfs.h"
+#include "opt_zstdio.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/counter.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/sysctl.h>
+#include <sys/uio.h>
+#include <sys/vnode.h>
+
+#ifdef ZSTDIO
+#define ZSTD_STATIC_LINKING_ONLY
+#include <contrib/zstd/lib/zstd.h>
+#endif
+
+#include <fs/tarfs/tarfs.h>
+#include <fs/tarfs/tarfs_dbg.h>
+
+#ifdef TARFS_DEBUG
+SYSCTL_NODE(_vfs_tarfs, OID_AUTO, zio, CTLFLAG_RD, 0,
+ "Tar filesystem decompression layer");
+COUNTER_U64_DEFINE_EARLY(tarfs_zio_inflated);
+SYSCTL_COUNTER_U64(_vfs_tarfs_zio, OID_AUTO, inflated, CTLFLAG_RD,
+ &tarfs_zio_inflated, "Amount of compressed data inflated.");
+COUNTER_U64_DEFINE_EARLY(tarfs_zio_consumed);
+SYSCTL_COUNTER_U64(_vfs_tarfs_zio, OID_AUTO, consumed, CTLFLAG_RD,
+ &tarfs_zio_consumed, "Amount of compressed data consumed.");
+
+static int
+tarfs_sysctl_handle_zio_reset(SYSCTL_HANDLER_ARGS)
+{
+ unsigned int tmp;
+ int error;
+
+ tmp = 0;
+ if ((error = SYSCTL_OUT(req, &tmp, sizeof(tmp))) != 0)
+ return (error);
+ if (req->newptr != NULL) {
+ if ((error = SYSCTL_IN(req, &tmp, sizeof(tmp))) != 0)
+ return (error);
+ counter_u64_zero(tarfs_zio_inflated);
+ counter_u64_zero(tarfs_zio_consumed);
+ }
+ return (0);
+}
+
+SYSCTL_PROC(_vfs_tarfs_zio, OID_AUTO, reset,
+ CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW,
+ NULL, 0, tarfs_sysctl_handle_zio_reset, "IU",
+ "Reset compression counters.");
+#endif
+
+MALLOC_DEFINE(M_TARFSZSTATE, "tarfs zstate", "tarfs decompression state");
+MALLOC_DEFINE(M_TARFSZBUF, "tarfs zbuf", "tarfs decompression buffers");
+
+#define XZ_MAGIC (uint8_t[]){ 0xfd, 0x37, 0x7a, 0x58, 0x5a }
+#define ZLIB_MAGIC (uint8_t[]){ 0x1f, 0x8b, 0x08 }
+#define ZSTD_MAGIC (uint8_t[]){ 0x28, 0xb5, 0x2f, 0xfd }
+
+#ifdef ZSTDIO
+struct tarfs_zstd {
+ ZSTD_DStream *zds;
+};
+#endif
+
+/* XXX review use of curthread / uio_td / td_cred */
+
+/*
+ * Reads from the tar file according to the provided uio. If the archive
+ * is compressed and raw is false, reads the decompressed stream;
+ * otherwise, reads directly from the original file. Returns 0 on success
+ * and a positive errno value on failure.
+ */
+int
+tarfs_io_read(struct tarfs_mount *tmp, bool raw, struct uio *uiop)
+{
+ void *rl = NULL;
+ off_t off = uiop->uio_offset;
+ size_t len = uiop->uio_resid;
+ int error;
+
+ if (raw || tmp->znode == NULL) {
+ error = vn_lock(tmp->vp, LK_EXCLUSIVE);
+ if (error == 0) {
+ rl = vn_rangelock_rlock(tmp->vp, off, off + len);
+ error = VOP_READ(tmp->vp, uiop, IO_DIRECT,
+ uiop->uio_td->td_ucred);
+ vn_rangelock_unlock(tmp->vp, rl);
+ VOP_UNLOCK(tmp->vp);
+ }
+ } else {
+ error = vn_lock(tmp->znode, LK_EXCLUSIVE);
+ if (error == 0) {
+ error = VOP_READ(tmp->znode, uiop, IO_DIRECT,
+ uiop->uio_td->td_ucred);
+ VOP_UNLOCK(tmp->znode);
+ }
+ }
+ TARFS_DPF(IO, "%s(%zu, %zu) = %d (resid %zd)\n", __func__,
+ (size_t)off, len, error, uiop->uio_resid);
+ return (error);
+}
+
+/*
+ * Reads from the tar file into the provided buffer. If the archive is
+ * compressed and raw is false, reads the decompressed stream; otherwise,
+ * reads directly from the original file. Returns the number of bytes
+ * read on success, 0 on EOF, and a negative errno value on failure.
+ */
+ssize_t
+tarfs_io_read_buf(struct tarfs_mount *tmp, bool raw,
+ void *buf, off_t off, size_t len)
+{
+ struct uio auio;
+ struct iovec aiov;
+ ssize_t res;
+ int error;
+
+ if (len == 0) {
+ TARFS_DPF(IO, "%s(%zu, %zu) null\n", __func__,
+ (size_t)off, len);
+ return (0);
+ }
+ aiov.iov_base = buf;
+ aiov.iov_len = len;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_offset = off;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_rw = UIO_READ;
+ auio.uio_resid = len;
+ auio.uio_td = curthread;
+ error = tarfs_io_read(tmp, raw, &auio);
+ if (error != 0) {
+ TARFS_DPF(IO, "%s(%zu, %zu) error %d\n", __func__,
+ (size_t)off, len, error);
+ return (-error);
+ }
+ res = len - auio.uio_resid;
+ if (res == 0 && len != 0) {
+ TARFS_DPF(IO, "%s(%zu, %zu) eof\n", __func__,
+ (size_t)off, len);
+ } else {
+ TARFS_DPF(IO, "%s(%zu, %zu) read %zd | %*D\n", __func__,
+ (size_t)off, len, res,
+ (int)(res > 8 ? 8 : res), (uint8_t *)buf, " ");
+ }
+ return (res);
+}
+
+#ifdef ZSTDIO
+static void *
+tarfs_zstate_alloc(void *opaque, size_t size)
+{
+
+ (void)opaque;
+ return (malloc(size, M_TARFSZSTATE, M_WAITOK));
+}
+#endif
+
+#ifdef ZSTDIO
+static void
+tarfs_zstate_free(void *opaque, void *address)
+{
+
+ (void)opaque;
+ free(address, M_TARFSZSTATE);
+}
+#endif
+
+#ifdef ZSTDIO
+static ZSTD_customMem tarfs_zstd_mem = {
+ tarfs_zstate_alloc,
+ tarfs_zstate_free,
+ NULL,
+};
+#endif
+
+/*
+ * Updates the decompression frame index, recording the current input and
+ * output offsets in a new index entry, and growing the index if
+ * necessary.
+ */
+static void
+tarfs_zio_update_index(struct tarfs_zio *zio, off_t i, off_t o)
+{
+
+ if (++zio->curidx >= zio->nidx) {
+ if (++zio->nidx > zio->szidx) {
+ zio->szidx *= 2;
+ zio->idx = realloc(zio->idx,
+ zio->szidx * sizeof(*zio->idx),
+ M_TARFSZSTATE, M_ZERO | M_WAITOK);
+ TARFS_DPF(ALLOC, "%s: resized zio index\n", __func__);
+ }
+ zio->idx[zio->curidx].i = i;
+ zio->idx[zio->curidx].o = o;
+ TARFS_DPF(ZIDX, "%s: index %u = i %zu o %zu\n", __func__,
+ zio->curidx, (size_t)zio->idx[zio->curidx].i,
+ (size_t)zio->idx[zio->curidx].o);
+ }
+ MPASS(zio->idx[zio->curidx].i == i);
+ MPASS(zio->idx[zio->curidx].o == o);
+}
+
+/*
+ * VOP_ACCESS for zio node.
+ */
+static int
+tarfs_zaccess(struct vop_access_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ struct tarfs_zio *zio = vp->v_data;
+ struct tarfs_mount *tmp = zio->tmp;
+ accmode_t accmode = ap->a_accmode;
+ int error = EPERM;
+
+ if (accmode == VREAD)
+ error = VOP_ACCESS(tmp->vp, accmode, ap->a_cred, ap->a_td);
+ TARFS_DPF(ZIO, "%s(%d) = %d\n", __func__, accmode, error);
+ return (error);
+}
+
+/*
+ * VOP_GETATTR for zio node.
+ */
+static int
+tarfs_zgetattr(struct vop_getattr_args *ap)
+{
+ struct vattr va;
+ struct vnode *vp = ap->a_vp;
+ struct tarfs_zio *zio = vp->v_data;
+ struct tarfs_mount *tmp = zio->tmp;
+ struct vattr *vap = ap->a_vap;
+ int error = 0;
+
+ VATTR_NULL(vap);
+ error = VOP_GETATTR(tmp->vp, &va, ap->a_cred);
+ if (error == 0) {
+ vap->va_type = VREG;
+ vap->va_mode = va.va_mode;
+ vap->va_nlink = 1;
+ vap->va_gid = va.va_gid;
+ vap->va_uid = va.va_uid;
+ vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
+ vap->va_fileid = TARFS_ZIOINO;
+ vap->va_size = zio->idx[zio->nidx - 1].o;
+ vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
+ vap->va_atime = va.va_atime;
+ vap->va_ctime = va.va_ctime;
+ vap->va_mtime = va.va_mtime;
+ vap->va_birthtime = tmp->root->birthtime;
+ vap->va_bytes = va.va_bytes;
+ }
+ TARFS_DPF(ZIO, "%s() = %d\n", __func__, error);
+ return (error);
+}
+
+/*
+ * VOP_READ for zio node.
+ */
+static int
+tarfs_zread(struct vop_read_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ struct tarfs_zio *zio = vp->v_data;
+ struct tarfs_mount *tmp = zio->tmp;
+ struct uio *uiop = ap->a_uio;
+ struct buf *bp;
+ off_t off = uiop->uio_offset;
+ size_t len = uiop->uio_resid;
+ int error;
+
+ error = bread(vp, off / tmp->iosize,
+ (off + len + tmp->iosize - 1) / tmp->iosize - off / tmp->iosize,
+ uiop->uio_td->td_ucred, &bp);
+ if (error == 0) {
+ if (off % tmp->iosize + len > bp->b_bufsize)
+ len = bp->b_bufsize - off % tmp->iosize;
+ error = uiomove(bp->b_data + off % tmp->iosize, len, uiop);
+ brelse(bp);
+ }
+ TARFS_DPF(ZIO, "%s(%zu, %zu) = %d (resid %zd)\n", __func__,
+ (size_t)off, len, error, uiop->uio_resid);
+ return (error);
+}
+
+/*
+ * VOP_RECLAIM for zio node.
+ */
+static int
+tarfs_zreclaim(struct vop_reclaim_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+
+ TARFS_DPF(ZIO, "%s(%p)\n", __func__, vp);
+ vp->v_data = NULL;
+ vnode_destroy_vobject(vp);
+ cache_purge(vp);
+ return (0);
+}
+
+#ifdef ZSTDIO
+/*
+ * VOP_STRATEGY for zio node, zstd edition.
+ */
+static int
+tarfs_zstrategy_zstd(struct tarfs_zio *zio, struct buf *bp)
+{
+ void *buf = NULL, *rl = NULL;
+ struct uio auio;
+ struct iovec aiov;
+ struct tarfs_mount *tmp = zio->tmp;
+ struct tarfs_zstd *zstd = zio->zstd;
+ struct vattr va;
+ ZSTD_inBuffer zib;
+ ZSTD_outBuffer zob;
+ off_t ipos, opos;
+ size_t ilen, olen;
+ size_t zerror;
+ off_t off = bp->b_blkno * tmp->iosize;
+ size_t len = bp->b_bufsize;
+ size_t bsize;
+ int error;
+ bool reset = false;
+
+ TARFS_DPF(ZIO, "%s: bufsize %ld bcount %ld resid %ld\n", __func__,
+ bp->b_bufsize, bp->b_bcount, bp->b_resid);
+
+ /* lock tarball */
+ error = vn_lock(tmp->vp, LK_EXCLUSIVE);
+ if (error != 0) {
+ goto fail_unlocked;
+ }
+
+ /* check size */
+ error = VOP_GETATTR(tmp->vp, &va, bp->b_rcred);
+ if (error != 0) {
+ goto fail;
+ }
+ /* do we have to rewind? */
+ if (off < zio->opos) {
+ while (zio->curidx > 0 && off < zio->idx[zio->curidx].o)
+ zio->curidx--;
+ reset = true;
+ }
+ /* advance to the nearest index entry */
+ if (off > zio->opos) {
+ // XXX maybe do a binary search instead
+ while (zio->curidx < zio->nidx - 1 &&
+ off >= zio->idx[zio->curidx + 1].o) {
+ zio->curidx++;
+ reset = true;
+ }
+ }
+ /* reset the decompression stream if needed */
+ if (reset) {
+ zio->ipos = zio->idx[zio->curidx].i;
+ zio->opos = zio->idx[zio->curidx].o;
+ ZSTD_resetDStream(zstd->zds);
+ TARFS_DPF(ZIDX, "%s: skipping to index %u = i %zu o %zu\n", __func__,
+ zio->curidx, (size_t)zio->ipos, (size_t)zio->opos);
+ } else {
+ TARFS_DPF(ZIDX, "%s: continuing at i %zu o %zu\n", __func__,
+ (size_t)zio->ipos, (size_t)zio->opos);
+ }
+ if (zio->ipos >= va.va_size) {
+ error = EIO;
+ goto fail;
+ }
+ MPASS(zio->opos <= off);
+ bsize = MAXBSIZE; // XXX should probably use ZSTD_CStreamOutSize()
+ buf = malloc(bsize, M_TEMP, M_WAITOK);
+ zib.src = NULL;
+ zib.size = 0;
+ zib.pos = 0;
+ zob.dst = bp->b_data;
+ zob.size = bp->b_bufsize;
+ zob.pos = 0;
+ bp->b_resid = len;
+ error = 0;
+ rl = vn_rangelock_rlock(tmp->vp, zio->ipos, OFF_MAX);
+ while (bp->b_resid > 0) {
+ if (zib.pos == zib.size) {
+ /* request data from the underlying file */
+ aiov.iov_base = buf;
+ aiov.iov_len = bsize;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_offset = zio->ipos;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_rw = UIO_READ;
+ auio.uio_resid = aiov.iov_len;
+ auio.uio_td = curthread;
+ error = VOP_READ(tmp->vp, &auio, IO_DIRECT, bp->b_rcred);
+ if (error != 0)
+ goto fail;
+ TARFS_DPF(ZIO, "%s: req %zu+%zu got %zu+%zu\n", __func__,
+ (size_t)zio->ipos, bsize,
+ (size_t)zio->ipos, bsize - auio.uio_resid);
+ zib.src = buf;
+ zib.size = bsize - auio.uio_resid;
+ zib.pos = 0;
+ }
+ MPASS(zib.pos <= zib.size);
+ if (zib.pos == zib.size) {
+ TARFS_DPF(ZIO, "%s: end of file after i %zu o %zu\n", __func__,
+ (size_t)zio->ipos, (size_t)zio->opos);
+ goto fail;
+ }
+ if (zio->opos < off) {
+ /* to be discarded */
+ zob.size = min(off - zio->opos, bp->b_bufsize);
+ zob.pos = 0;
+ } else {
+ zob.size = bp->b_bufsize;
+ zob.pos = zio->opos - off;
+ if (zob.size > zob.pos + bp->b_resid)
+ zob.size = zob.pos + bp->b_resid;
+ }
+ ipos = zib.pos;
+ opos = zob.pos;
+ /* decompress as much as possible */
+ zerror = ZSTD_decompressStream(zstd->zds, &zob, &zib);
+ zio->ipos += ilen = zib.pos - ipos;
+ zio->opos += olen = zob.pos - opos;
+ if (zio->opos > off)
+ bp->b_resid -= olen;
+ if (ZSTD_isError(zerror)) {
+ TARFS_DPF(ZIO, "%s: inflate failed after i %zu o %zu: %s\n", __func__,
+ (size_t)zio->ipos, (size_t)zio->opos, ZSTD_getErrorName(zerror));
+ error = EIO;
+ goto fail;
+ }
+ if (zerror == 0 && olen == 0) {
+ TARFS_DPF(ZIO, "%s: end of stream after i %zu o %zu\n", __func__,
+ (size_t)zio->ipos, (size_t)zio->opos);
+ break;
+ }
+ if (zerror == 0) {
+ TARFS_DPF(ZIO, "%s: end of frame after i %zu o %zu\n", __func__,
+ (size_t)zio->ipos, (size_t)zio->opos);
+ tarfs_zio_update_index(zio, zio->ipos, zio->opos);
+ }
+ TARFS_DPF(ZIO, "%s: inflated %zu\n", __func__, olen);
+#ifdef TARFS_DEBUG
+ counter_u64_add(tarfs_zio_inflated, olen);
+#endif
+ }
+fail:
+ if (rl != NULL)
+ vn_rangelock_unlock(tmp->vp, rl);
+ VOP_UNLOCK(tmp->vp);
+fail_unlocked:
+ if (buf != NULL)
+ free(buf, M_TEMP);
+ TARFS_DPF(ZIO, "%s(%zu, %zu) = %d (resid %zd)\n", __func__,
+ (size_t)off, len, error, bp->b_resid);
+#ifdef TARFS_DEBUG
+ counter_u64_add(tarfs_zio_consumed, len - bp->b_resid);
+#endif
+ bp->b_flags |= B_DONE;
+ bp->b_error = error;
+ if (error != 0) {
+ bp->b_ioflags |= BIO_ERROR;
+ zio->curidx = 0;
+ zio->ipos = zio->idx[0].i;
+ zio->opos = zio->idx[0].o;
+ ZSTD_resetDStream(zstd->zds);
+ }
+ return (0);
+}
+#endif
+
+/*
+ * VOP_STRATEGY for zio node.
+ */
+static int
+tarfs_zstrategy(struct vop_strategy_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ struct buf *bp = ap->a_bp;
+ struct tarfs_zio *zio = vp->v_data;
+
+#ifdef ZSTDIO
+ if (zio->zstd != NULL) {
+ return (tarfs_zstrategy_zstd(zio, bp));
+ }
+#endif
+ bp->b_flags |= B_DONE;
+ bp->b_ioflags |= BIO_ERROR;
+ bp->b_error = EFTYPE;
+ return (0);
+}
+
+static struct vop_vector tarfs_znodeops = {
+ .vop_default = &default_vnodeops,
+
+ .vop_access = tarfs_zaccess,
+ .vop_getattr = tarfs_zgetattr,
+ .vop_read = tarfs_zread,
+ .vop_reclaim = tarfs_zreclaim,
+ .vop_strategy = tarfs_zstrategy,
+};
+VFS_VOP_VECTOR_REGISTER(tarfs_znodeops);
+
+/*
+ * Initializes the decompression layer.
+ */
+static struct tarfs_zio *
+tarfs_zio_init(struct tarfs_mount *tmp, off_t i, off_t o)
+{
+ struct tarfs_zio *zio;
+ struct vnode *zvp;
+
+ zio = malloc(sizeof(*zio), M_TARFSZSTATE, M_ZERO | M_WAITOK);
+ TARFS_DPF(ALLOC, "%s: allocated zio\n", __func__);
+ zio->tmp = tmp;
+ zio->szidx = 128;
+ zio->idx = malloc(zio->szidx * sizeof(*zio->idx), M_TARFSZSTATE,
+ M_ZERO | M_WAITOK);
+ zio->curidx = 0;
+ zio->nidx = 1;
+ zio->idx[zio->curidx].i = zio->ipos = i;
+ zio->idx[zio->curidx].o = zio->opos = o;
+ tmp->zio = zio;
+ TARFS_DPF(ALLOC, "%s: allocated zio index\n", __func__);
+ getnewvnode("tarfs", tmp->vfs, &tarfs_znodeops, &zvp);
+ zvp->v_data = zio;
+ zvp->v_type = VREG;
+ zvp->v_mount = tmp->vfs;
+ tmp->znode = zvp;
+ TARFS_DPF(ZIO, "%s: created zio node\n", __func__);
+ return (zio);
+}
+
+/*
+ * Initializes the I/O layer, including decompression if the signature of
+ * a supported compression format is detected. Returns 0 on success and a
+ * positive errno value on failure.
+ */
+int
+tarfs_io_init(struct tarfs_mount *tmp)
+{
+ uint8_t *block;
+ struct tarfs_zio *zio = NULL;
+ ssize_t res;
+ int error = 0;
+
+ block = malloc(tmp->iosize, M_TEMP, M_ZERO | M_WAITOK);
+ res = tarfs_io_read_buf(tmp, true, block, 0, tmp->iosize);
+ if (res < 0) {
+ return (-res);
+ }
+ if (memcmp(block, XZ_MAGIC, sizeof(XZ_MAGIC)) == 0) {
+ printf("xz compression not supported\n");
+ error = EOPNOTSUPP;
+ goto bad;
+ } else if (memcmp(block, ZLIB_MAGIC, sizeof(ZLIB_MAGIC)) == 0) {
+ printf("zlib compression not supported\n");
+ error = EOPNOTSUPP;
+ goto bad;
+ } else if (memcmp(block, ZSTD_MAGIC, sizeof(ZSTD_MAGIC)) == 0) {
+#ifdef ZSTDIO
+ zio = tarfs_zio_init(tmp, 0, 0);
+ zio->zstd = malloc(sizeof(*zio->zstd), M_TARFSZSTATE, M_WAITOK);
+ zio->zstd->zds = ZSTD_createDStream_advanced(tarfs_zstd_mem);
+ (void)ZSTD_initDStream(zio->zstd->zds);
+#else
+ printf("zstd compression not supported\n");
+ error = EOPNOTSUPP;
+ goto bad;
+#endif
+ }
+bad:
+ free(block, M_TEMP);
+ return (error);
+}
+
+/*
+ * Tears down the decompression layer.
+ */
+static int
+tarfs_zio_fini(struct tarfs_mount *tmp)
+{
+ struct tarfs_zio *zio = tmp->zio;
+ int error = 0;
+
+ if (tmp->znode != NULL) {
+ error = vn_lock(tmp->znode, LK_EXCLUSIVE);
+ if (error != 0) {
+ TARFS_DPF(ALLOC, "%s: failed to lock znode", __func__);
+ return (error);
+ }
+ tmp->znode->v_mount = NULL;
+ vgone(tmp->znode);
+ vput(tmp->znode);
+ tmp->znode = NULL;
+ }
+#ifdef ZSTDIO
+ if (zio->zstd != NULL) {
+ TARFS_DPF(ALLOC, "%s: freeing zstd state\n", __func__);
+ ZSTD_freeDStream(zio->zstd->zds);
+ free(zio->zstd, M_TARFSZSTATE);
+ }
+#endif
+ if (zio->idx != NULL) {
+ TARFS_DPF(ALLOC, "%s: freeing index\n", __func__);
+ free(zio->idx, M_TARFSZSTATE);
+ }
+ TARFS_DPF(ALLOC, "%s: freeing zio\n", __func__);
+ free(zio, M_TARFSZSTATE);
+ tmp->zio = NULL;
+ return (error);
+}
+
+/*
+ * Tears down the I/O layer, including the decompression layer if
+ * applicable.
+ */
+int
+tarfs_io_fini(struct tarfs_mount *tmp)
+{
+ int error = 0;
+
+ if (tmp->zio != NULL) {
+ error = tarfs_zio_fini(tmp);
+ }
+ return (error);
+}
diff --git a/sys/fs/tarfs/tarfs_subr.c b/sys/fs/tarfs/tarfs_subr.c
new file mode 100644
--- /dev/null
+++ b/sys/fs/tarfs/tarfs_subr.c
@@ -0,0 +1,604 @@
+/*-
+ * Copyright (c) 2013 Juniper Networks, Inc.
+ * Copyright (c) 2022 Klara, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_tarfs.h"
+
+#include <sys/param.h>
+#include <sys/stat.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/fcntl.h>
+#include <sys/libkern.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/proc.h>
+#include <sys/queue.h>
+#include <sys/sysctl.h>
+#include <sys/vnode.h>
+
+#include <vm/vm_param.h>
+
+#include <fs/tarfs/tarfs.h>
+#include <fs/tarfs/tarfs_dbg.h>
+
+MALLOC_DEFINE(M_TARFSNAME, "tarfs name", "tarfs file names");
+MALLOC_DEFINE(M_TARFSBLK, "tarfs blk", "tarfs block maps");
+
+SYSCTL_NODE(_vfs, OID_AUTO, tarfs, CTLFLAG_RW, 0, "Tar filesystem");
+
+unsigned int tarfs_ioshift = TARFS_IOSHIFT_DEFAULT;
+
+static int
+tarfs_sysctl_handle_ioshift(SYSCTL_HANDLER_ARGS)
+{
+ unsigned int tmp;
+ int error;
+
+ tmp = *(unsigned int *)arg1;
+ if ((error = SYSCTL_OUT(req, &tmp, sizeof(tmp))) != 0)
+ return (error);
+ if (req->newptr != NULL) {
+ if ((error = SYSCTL_IN(req, &tmp, sizeof(tmp))) != 0)
+ return (error);
+ if (tmp == 0)
+ tmp = TARFS_IOSHIFT_DEFAULT;
+ if (tmp < TARFS_IOSHIFT_MIN)
+ tmp = TARFS_IOSHIFT_MIN;
+ if (tmp > TARFS_IOSHIFT_MAX)
+ tmp = TARFS_IOSHIFT_MAX;
+ *(unsigned int *)arg1 = tmp;
+ }
+ return (0);
+}
+
+SYSCTL_PROC(_vfs_tarfs, OID_AUTO, ioshift,
+ CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW | CTLFLAG_TUN,
+ &tarfs_ioshift, 0, tarfs_sysctl_handle_ioshift, "IU",
+ "Tar filesystem preferred I/O size (log 2)");
+
+#ifdef TARFS_DEBUG
+int tarfs_debug;
+SYSCTL_INT(_vfs_tarfs, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_TUN,
+ &tarfs_debug, 0, "Tar filesystem debug mask");
+#endif /* TARFS_DEBUG */
+
+static void
+tarfs_dump_tree_internal(struct tarfs_node *tnp, int indent)
+{
+ struct tarfs_node *current;
+ const char *name;
+
+ if (tnp->type != VDIR)
+ return;
+
+ TAILQ_FOREACH(current, &tnp->dir.dirhead, dirents) {
+ if (current->name == NULL)
+ name = "<<root>>";
+ else
+ name = current->name;
+ printf("%*s%s\n", indent * 4, "", name);
+ if (current->type == VDIR)
+ tarfs_dump_tree_internal(current, indent + 1);
+ }
+}
+
+void
+tarfs_dump_tree(struct tarfs_node *tnp)
+{
+ const char *name;
+
+ if (tnp == NULL)
+ return;
+
+ if (tnp->name == NULL)
+ name = "<<root>>";
+ else
+ name = tnp->name;
+ printf("%s\n", name);
+
+ tarfs_dump_tree_internal(tnp, 1);
+}
+
+void
+tarfs_print_node(struct tarfs_node *tnp)
+{
+
+ if (tnp == NULL)
+ return;
+
+ printf("%s: node %p\n", __func__, tnp);
+ printf("\tvnode %p\n", tnp->vnode);
+ printf("\ttmp %p\n", tnp->tmp);
+ printf("\ttype %d\n", tnp->type);
+ printf("\tino %lu\n", tnp->ino);
+ printf("\tsize %zu\n", tnp->size);
+ printf("\tname %s\n",
+ (tnp->name == NULL) ? "<<root>>" : tnp->name);
+ printf("\tnamelen %zu\n", tnp->namelen);
+ printf("\tuid %d\n", tnp->uid);
+ printf("\tgid %d\n", tnp->gid);
+ printf("\tmode o%o\n", tnp->mode);
+ printf("\tflags %u\n", tnp->flags);
+ printf("\tnlink %lu\n", tnp->nlink);
+ printf("\tatime %d\n", (int)tnp->atime.tv_sec);
+ printf("\tmtime %d\n", (int)tnp->mtime.tv_sec);
+ printf("\tctime %d\n", (int)tnp->ctime.tv_sec);
+ printf("\tbirthtime %d\n", (int)tnp->birthtime.tv_sec);
+ printf("\tgen %lu\n", tnp->gen);
+ printf("\tparent %p\n", tnp->parent);
+
+ switch (tnp->type) {
+ case VDIR:
+ printf("\tdir.lastcookie %jd\n",
+ tnp->dir.lastcookie);
+ printf("\tdir.lastnode %p\n", tnp->dir.lastnode);
+ break;
+ case VBLK:
+ case VCHR:
+ printf("\trdev %lu\n", tnp->rdev);
+ break;
+ default:
+ break;
+ }
+}
+
+struct tarfs_node *
+tarfs_lookup_node(struct tarfs_node *tnp, struct tarfs_node *f,
+ struct componentname *cnp)
+{
+ boolean_t found;
+ struct tarfs_node *entry;
+
+ TARFS_DPF(LOOKUP, "%s: name: %.*s\n", __func__, (int)cnp->cn_namelen,
+ cnp->cn_nameptr);
+
+ found = false;
+ TAILQ_FOREACH(entry, &tnp->dir.dirhead, dirents) {
+ if (f != NULL && entry != f)
+ continue;
+
+ if (entry->namelen == cnp->cn_namelen &&
+ bcmp(entry->name, cnp->cn_nameptr,
+ entry->namelen) == 0) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (found) {
+ if (entry->type == VREG && entry->other != NULL) {
+ TARFS_DPF_IFF(LOOKUP, "%s: following hard link %p\n",
+ __func__, entry);
+ entry = entry->other;
+ }
+ TARFS_DPF(LOOKUP, "%s: found tarfs_node %p\n", __func__,
+ entry);
+ return (entry);
+ }
+
+ TARFS_DPF(LOOKUP, "%s: no match found\n", __func__);
+ return (NULL);
+}
+
+struct tarfs_node *
+tarfs_lookup_dir(struct tarfs_node *tnp, off_t cookie)
+{
+ struct tarfs_node *current;
+
+ TARFS_DPF(LOOKUP, "%s: tarfs_node %p, cookie %jd\n", __func__, tnp,
+ cookie);
+ TARFS_DPF(LOOKUP, "%s: name: %s\n", __func__,
+ (tnp->name == NULL) ? "<<root>>" : tnp->name);
+
+ if (cookie == tnp->dir.lastcookie &&
+ tnp->dir.lastnode != NULL) {
+ TARFS_DPF(LOOKUP, "%s: Using cached entry: tarfs_node %p, "
+ "cookie %jd\n", __func__, tnp->dir.lastnode,
+ tnp->dir.lastcookie);
+ return (tnp->dir.lastnode);
+ }
+
+ TAILQ_FOREACH(current, &tnp->dir.dirhead, dirents) {
+ TARFS_DPF(LOOKUP, "%s: tarfs_node %p, current %p, ino %lu\n",
+ __func__, tnp, current, current->ino);
+ TARFS_DPF_IFF(LOOKUP, current->name != NULL,
+ "%s: name: %s\n", __func__, current->name);
+ if (current->ino == cookie) {
+ TARFS_DPF(LOOKUP, "%s: Found entry: tarfs_node %p, "
+ "cookie %lu\n", __func__, current,
+ current->ino);
+ break;
+ }
+ }
+
+ return (current);
+}
+
+int
+tarfs_alloc_node(struct tarfs_mount *tmp, const char *name, size_t namelen,
+ enum vtype type, off_t off, size_t sz, time_t mtime, uid_t uid, gid_t gid,
+ mode_t mode, unsigned int flags, const char *linkname, dev_t rdev,
+ struct tarfs_node *parent, struct tarfs_node **retnode)
+{
+ struct tarfs_node *tnp;
+
+ TARFS_DPF(ALLOC, "%s(%.*s)\n", __func__, (int)namelen, name);
+
+ tnp = malloc(sizeof(struct tarfs_node), M_TARFSNODE, M_WAITOK | M_ZERO);
+ mtx_init(&tnp->lock, "tarfs node lock", NULL, MTX_DEF);
+ tnp->gen = arc4random();
+ tnp->tmp = tmp;
+ if (namelen > 0) {
+ tnp->name = malloc(namelen + 1, M_TARFSNAME, M_WAITOK);
+ tnp->namelen = namelen;
+ memcpy(tnp->name, name, namelen);
+ tnp->name[namelen] = '\0';
+ }
+ tnp->type = type;
+ tnp->uid = uid;
+ tnp->gid = gid;
+ tnp->mode = mode;
+ tnp->nlink = 1;
+ vfs_timestamp(&tnp->atime);
+ tnp->mtime.tv_sec = mtime;
+ tnp->birthtime = tnp->atime;
+ tnp->ctime = tnp->mtime;
+ if (parent != NULL) {
+ tnp->ino = alloc_unr(tmp->ino_unr);
+ }
+ tnp->offset = off;
+ tnp->size = tnp->physize = sz;
+ switch (type) {
+ case VDIR:
+ MPASS(parent != tnp);
+ MPASS(parent != NULL || tmp->root == NULL);
+ TAILQ_INIT(&tnp->dir.dirhead);
+ tnp->nlink++;
+ if (parent == NULL) {
+ tnp->ino = TARFS_ROOTINO;
+ }
+ tnp->physize = 0;
+ break;
+ case VLNK:
+ tnp->link.name = malloc(sz + 1, M_TARFSNAME,
+ M_WAITOK);
+ tnp->link.namelen = sz;
+ memcpy(tnp->link.name, linkname, sz);
+ tnp->link.name[sz] = '\0';
+ break;
+ case VREG:
+ /* create dummy block map */
+ tnp->nblk = 1;
+ tnp->blk = malloc(sizeof(*tnp->blk), M_TARFSBLK, M_WAITOK);
+ tnp->blk[0].i = 0;
+ tnp->blk[0].o = 0;
+ tnp->blk[0].l = tnp->physize;
+ break;
+ case VFIFO:
+ /* Nothing extra to do */
+ break;
+ case VBLK:
+ case VCHR:
+ tnp->rdev = rdev;
+ tnp->physize = 0;
+ break;
+ default:
+ panic("%s: type %d not allowed", __func__, type);
+ }
+ if (parent != NULL) {
+ MPASS(parent->type == VDIR);
+ TARFS_NODE_LOCK(parent);
+ TAILQ_INSERT_TAIL(&parent->dir.dirhead, tnp, dirents);
+ parent->size += sizeof(struct tarfs_node);
+ tnp->parent = parent;
+ if (type == VDIR) {
+ parent->nlink++;
+ }
+ TARFS_NODE_UNLOCK(parent);
+ } else {
+ tnp->parent = tnp;
+ }
+ MPASS(tnp->ino != 0);
+
+ TARFS_ALLNODES_LOCK(tmp);
+ TAILQ_INSERT_TAIL(&tmp->allnodes, tnp, entries);
+ TARFS_ALLNODES_UNLOCK(tmp);
+
+ *retnode = tnp;
+ tmp->nfiles++;
+ return (0);
+}
+
+#define is09(ch) ((ch) >= '0' && (ch) <= '9')
+
+int
+tarfs_load_blockmap(struct tarfs_node *tnp, size_t realsize)
+{
+ struct tarfs_blk *blk = NULL;
+ char *map = NULL;
+ size_t nmap = 0, nblk = 0;
+ char *p, *q;
+ ssize_t res;
+ unsigned int i;
+ long n;
+
+ /*
+ * Load the entire map into memory. We don't know how big it is,
+ * but as soon as we start reading it we will know how many
+ * entries it contains, and then we can count newlines.
+ */
+ do {
+ nmap++;
+ if (tnp->size < nmap * TARFS_BLOCKSIZE) {
+ TARFS_DPF(MAP, "%s: map too large\n", __func__);
+ goto bad;
+ }
+ /* grow the map */
+ map = realloc(map, nmap * TARFS_BLOCKSIZE + 1, M_TARFSBLK,
+ M_ZERO | M_WAITOK);
+ /* read an additional block */
+ res = tarfs_io_read_buf(tnp->tmp, false,
+ map + (nmap - 1) * TARFS_BLOCKSIZE,
+ tnp->offset + (nmap - 1) * TARFS_BLOCKSIZE,
+ TARFS_BLOCKSIZE);
+ if (res < 0)
+ return (-res);
+ else if (res < TARFS_BLOCKSIZE)
+ return (EIO);
+ map[nmap * TARFS_BLOCKSIZE] = '\0'; /* sentinel */
+ if (nblk == 0) {
+ n = strtol(p = map, &q, 10);
+ if (q == p || *q != '\n' || n < 1)
+ goto syntax;
+ nblk = n;
+ }
+ for (n = 0, p = map; *p != '\0'; ++p) {
+ if (*p == '\n') {
+ ++n;
+ }
+ }
+ TARFS_DPF(MAP, "%s: %ld newlines in map\n", __func__, n);
+ } while (n < nblk * 2 + 1);
+ TARFS_DPF(MAP, "%s: block map length %zu\n", __func__, nblk);
+ blk = malloc(sizeof(*blk) * nblk, M_TARFSBLK, M_WAITOK | M_ZERO);
+ p = strchr(map, '\n') + 1;
+ for (i = 0; i < nblk; i++) {
+ if (i == 0)
+ blk[i].i = nmap * TARFS_BLOCKSIZE;
+ else
+ blk[i].i = blk[i - 1].i + blk[i - 1].l;
+ n = strtol(p, &q, 10);
+ if (q == p || *q != '\n' || n < 0)
+ goto syntax;
+ p = q + 1;
+ blk[i].o = n;
+ n = strtol(p, &q, 10);
+ if (q == p || *q != '\n' || n < 0)
+ goto syntax;
+ p = q + 1;
+ blk[i].l = n;
+ TARFS_DPF(MAP, "%s: %3d %12zu %12zu %12zu\n", __func__,
+ i, blk[i].i, blk[i].o, blk[i].l);
+ /*
+ * Check block alignment if the block is of non-zero
+ * length (a zero-length block indicates the end of a
+ * trailing hole). Checking i indirectly checks the
+ * previous block's l. It's ok for the final block to
+ * have an uneven length.
+ */
+ if (blk[i].l == 0) {
+ TARFS_DPF(MAP, "%s: zero-length block\n", __func__);
+ } else if (blk[i].i % TARFS_BLOCKSIZE != 0 ||
+ blk[i].o % TARFS_BLOCKSIZE != 0) {
+ TARFS_DPF(MAP, "%s: misaligned map entry\n", __func__);
+ goto bad;
+ }
+ /*
+ * Check that this block starts after the end of the
+ * previous one.
+ */
+ if (i > 0 && blk[i].o < blk[i - 1].o + blk[i - 1].l) {
+ TARFS_DPF(MAP, "%s: overlapping map entries\n", __func__);
+ goto bad;
+ }
+ /*
+ * Check that the block is within the file, both
+ * physically and logically.
+ */
+ if (blk[i].i + blk[i].l > tnp->physize ||
+ blk[i].o + blk[i].l > realsize) {
+ TARFS_DPF(MAP, "%s: map overflow\n", __func__);
+ goto bad;
+ }
+ }
+ free(map, M_TARFSBLK);
+
+ /* store in node */
+ free(tnp->blk, M_TARFSBLK);
+ tnp->nblk = nblk;
+ tnp->blk = blk;
+ tnp->size = realsize;
+ return (0);
+syntax:
+ TARFS_DPF(MAP, "%s: syntax error in block map\n", __func__);
+bad:
+ free(map, M_TARFSBLK);
+ free(blk, M_TARFSBLK);
+ return (EINVAL);
+}
+
+void
+tarfs_free_node(struct tarfs_node *tnp)
+{
+ struct tarfs_mount *tmp;
+
+ MPASS(tnp != NULL);
+ tmp = tnp->tmp;
+
+ switch (tnp->type) {
+ case VLNK:
+ if (tnp->link.name)
+ free(tnp->link.name, M_TARFSNAME);
+ break;
+ default:
+ break;
+ }
+ if (tnp->name != NULL)
+ free(tnp->name, M_TARFSNAME);
+ if (tnp->blk != NULL)
+ free(tnp->blk, M_TARFSBLK);
+ if (tnp->ino >= TARFS_MININO)
+ free_unr(tmp->ino_unr, tnp->ino);
+ free(tnp, M_TARFSNODE);
+ tmp->nfiles--;
+}
+
+int
+tarfs_read_file(struct tarfs_node *tnp, size_t len, struct uio *uiop)
+{
+ struct uio auio;
+ size_t resid = len;
+ size_t copylen;
+ unsigned int i;
+ int error;
+
+ TARFS_DPF(VNODE, "%s(%s, %zu, %zu)\n", __func__,
+ tnp->name, uiop->uio_offset, resid);
+ for (i = 0; i < tnp->nblk && resid > 0; ++i) {
+ if (uiop->uio_offset > tnp->blk[i].o + tnp->blk[i].l) {
+ /* skip this block */
+ continue;
+ }
+ while (resid > 0 &&
+ uiop->uio_offset < tnp->blk[i].o) {
+ /* move out some zeroes... */
+ copylen = tnp->blk[i].o - uiop->uio_offset;
+ if (copylen > resid)
+ copylen = resid;
+ if (copylen > ZERO_REGION_SIZE)
+ copylen = ZERO_REGION_SIZE;
+ auio = *uiop;
+ auio.uio_offset = 0;
+ auio.uio_resid = copylen;
+ error = uiomove(__DECONST(void *, zero_region),
+ copylen, &auio);
+ if (error != 0)
+ return (error);
+ TARFS_DPF(MAP, "%s(%s) = zero %zu\n", __func__,
+ tnp->name, copylen - auio.uio_resid);
+ uiop->uio_offset += copylen - auio.uio_resid;
+ uiop->uio_resid -= copylen - auio.uio_resid;
+ resid -= copylen - auio.uio_resid;
+ }
+ while (resid > 0 &&
+ uiop->uio_offset < tnp->blk[i].o + tnp->blk[i].l) {
+ /* now actual data */
+ copylen = tnp->blk[i].l;
+ if (copylen > resid)
+ copylen = resid;
+ auio = *uiop;
+ auio.uio_offset = tnp->offset + tnp->blk[i].i +
+ uiop->uio_offset - tnp->blk[i].o;
+ auio.uio_resid = copylen;
+ error = tarfs_io_read(tnp->tmp, false, &auio);
+ if (error != 0)
+ return (error);
+ TARFS_DPF(MAP, "%s(%s) = data %zu\n", __func__,
+ tnp->name, copylen - auio.uio_resid);
+ uiop->uio_offset += copylen - auio.uio_resid;
+ uiop->uio_resid -= copylen - auio.uio_resid;
+ resid -= copylen - auio.uio_resid;
+ }
+ }
+ TARFS_DPF(VNODE, "%s(%s) = %zu\n", __func__,
+ tnp->name, len - resid);
+ return (0);
+}
+
+/*
+ * XXX ugly file flag parser which could easily be a finite state machine
+ * driven by a small precomputed table.
+ *
+ * Note that unlike strtofflags(3), we make no attempt to handle negated
+ * flags, since they shouldn't appear in tar files.
+ */
+static const struct tarfs_flag {
+ const char *name;
+ unsigned int flag;
+} tarfs_flags[] = {
+ { "nodump", UF_NODUMP },
+ { "uchg", UF_IMMUTABLE },
+ { "uappnd", UF_APPEND },
+ { "opaque", UF_OPAQUE },
+ { "uunlnk", UF_NOUNLINK },
+ { "arch", SF_ARCHIVED },
+ { "schg", SF_IMMUTABLE },
+ { "sappnd", SF_APPEND },
+ { "sunlnk", SF_NOUNLINK },
+ { NULL, 0 },
+};
+
+unsigned int
+tarfs_strtofflags(const char *str, char **end)
+{
+ const struct tarfs_flag *tf;
+ const char *p, *q;
+ unsigned int ret;
+
+ ret = 0;
+ for (p = q = str; *q != '\0'; p = q + 1) {
+ for (q = p; *q != '\0' && *q != ','; ++q) {
+ if (*q < 'a' || *q > 'z') {
+ goto end;
+ }
+ /* nothing */
+ }
+ for (tf = tarfs_flags; tf->name != NULL; tf++) {
+ if (strncmp(tf->name, p, q - p) == 0 &&
+ tf->name[q - p] == '\0') {
+ TARFS_DPF(ALLOC, "%s: %.*s = 0x%06x\n", __func__,
+ (int)(q - p), p, tf->flag);
+ ret |= tf->flag;
+ break;
+ }
+ }
+ if (tf->name == NULL) {
+ TARFS_DPF(ALLOC, "%s: %.*s = 0x??????\n",
+ __func__, (int)(q - p), p);
+ goto end;
+ }
+ }
+end:
+ if (*end != NULL) {
+ *end = __DECONST(char *, q);
+ }
+ return (ret);
+}
diff --git a/sys/fs/tarfs/tarfs_vfsops.c b/sys/fs/tarfs/tarfs_vfsops.c
new file mode 100644
--- /dev/null
+++ b/sys/fs/tarfs/tarfs_vfsops.c
@@ -0,0 +1,1187 @@
+/*-
+ * Copyright (c) 2013 Juniper Networks, Inc.
+ * Copyright (c) 2022 Klara, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* XXX GNU tar format is not supported by this driver */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_tarfs.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/conf.h>
+#include <sys/fcntl.h>
+#include <sys/libkern.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/mutex.h>
+#include <sys/namei.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/queue.h>
+#include <sys/sbuf.h>
+#include <sys/stat.h>
+#include <sys/uio.h>
+#include <sys/vnode.h>
+
+#include <vm/vm_param.h>
+
+#include <geom/geom.h>
+#include <geom/geom_vfs.h>
+
+#include <fs/tarfs/tarfs.h>
+#include <fs/tarfs/tarfs_dbg.h>
+
+CTASSERT(ZERO_REGION_SIZE > TARFS_BLOCKSIZE);
+
+struct ustar_header {
+ char name[100]; /* File name */
+ char mode[8]; /* Mode flags */
+ char uid[8]; /* User id */
+ char gid[8]; /* Group id */
+ char size[12]; /* Size */
+ char mtime[12]; /* Modified time */
+ char checksum[8]; /* Checksum */
+ char typeflag[1]; /* Type */
+ char linkname[100]; /* "old format" stops here */
+ char magic[6]; /* POSIX UStar "ustar\0" indicator */
+ char version[2]; /* POSIX UStar version "00" */
+ char uname[32]; /* User name */
+ char gname[32]; /* Group name */
+ char major[8]; /* Device major number */
+ char minor[8]; /* Device minor number */
+ char prefix[155]; /* Path prefix */
+};
+
+#define TAR_EOF ((off_t)-1)
+
+#define TAR_TYPE_FILE '0'
+#define TAR_TYPE_HARDLINK '1'
+#define TAR_TYPE_SYMLINK '2'
+#define TAR_TYPE_CHAR '3'
+#define TAR_TYPE_BLOCK '4'
+#define TAR_TYPE_DIRECTORY '5'
+#define TAR_TYPE_FIFO '6'
+#define TAR_TYPE_CONTIG '7'
+#define TAR_TYPE_GLOBAL_EXTHDR 'g'
+#define TAR_TYPE_EXTHDR 'x'
+#define TAR_TYPE_GNU_SPARSE 'S'
+
+#define USTAR_MAGIC (uint8_t []){ 'u', 's', 't', 'a', 'r', 0 }
+#define USTAR_VERSION (uint8_t []){ '0', '0' }
+#define GNUTAR_MAGIC (uint8_t []){ 'u', 's', 't', 'a', 'r', ' ' }
+#define GNUTAR_VERSION (uint8_t []){ ' ', '\x0' }
+
+#define DEFDIRMODE (S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH)
+
+MALLOC_DEFINE(M_TARFSMNT, "tarfs mount", "tarfs mount structures");
+MALLOC_DEFINE(M_TARFSNODE, "tarfs node", "tarfs node structures");
+
+static vfs_mount_t tarfs_mount;
+static vfs_unmount_t tarfs_unmount;
+static vfs_root_t tarfs_root;
+static vfs_statfs_t tarfs_statfs;
+static vfs_fhtovp_t tarfs_fhtovp;
+
+static const char *tarfs_opts[] = {
+ "from", "gid", "mode", "uid", "verify",
+ NULL
+};
+
+/*
+ * Reads a len-width signed octal number from strp. Returns the value.
+ * XXX Does not report errors.
+ */
+static int64_t
+tarfs_str2octal(const char *strp, size_t len)
+{
+ int64_t val;
+ size_t idx;
+ int sign;
+
+ /*
+ * Skip leading spaces or tabs.
+ * XXX why? POSIX requires numeric fields to be 0-padded.
+ */
+ for (idx = 0; idx < len; idx++)
+ if (strp[idx] != ' ' && strp[idx] != '\t')
+ break;
+
+ if (idx == len)
+ return (0);
+
+ if (strp[idx] == '-') {
+ sign = -1;
+ idx++;
+ } else
+ sign = 1;
+
+ val = 0;
+ for (; idx < len; idx++) {
+ if (strp[idx] < '0' || strp[idx] > '7')
+ break;
+ val <<= 3;
+ val += (strp[idx] - '0');
+
+ /* Truncate on overflow */
+ if (val > INT64_MAX / 8) {
+ val = INT64_MAX;
+ break;
+ }
+ }
+
+ return (sign > 0) ? val : -val;
+}
+
+/*
+ * Reads a len-byte extended numeric value from strp. The first byte has
+ * bit 7 set to indicate the format; the remaining 7 bits + the (len - 1)
+ * bytes that follow form a big-endian signed two's complement binary
+ * number. Returns the value. XXX Does not report errors.
+ */
+static int64_t
+tarfs_str2base256(const char *strp, size_t len)
+{
+ int64_t val;
+ size_t idx;
+
+ KASSERT(strp[0] & 0x80, ("not an extended numeric value"));
+
+ /* Sign-extend the first byte */
+ if ((strp[0] & 0x40) != 0)
+ val = (int64_t)-1;
+ else
+ val = 0;
+ val <<= 6;
+ val |= (strp[0] & 0x3f);
+
+ /* Read subsequent bytes */
+ for (idx = 1; idx < len; idx++) {
+ val <<= 8;
+ val |= (0xff & (int64_t)strp[idx]);
+
+ /* Truncate on overflow and underflow */
+ if (val > INT64_MAX / 256) {
+ val = INT64_MAX;
+ break;
+ } else if (val < INT64_MAX / 256) {
+ val = INT64_MIN;
+ break;
+ }
+ }
+
+ return (val);
+}
+
+/*
+ * Read a len-byte numeric field from strp. If bit 7 of the first byte it
+ * set, assume an extended numeric value (signed two's complement);
+ * otherwise, assume a signed octal value.
+ *
+ * XXX practically no error checking or handling
+ */
+static int64_t
+tarfs_str2int64(const char *strp, size_t len)
+{
+
+ if (len < 1)
+ return (0);
+
+ if ((strp[0] & 0x80) != 0)
+ return (tarfs_str2base256(strp, len));
+ return (tarfs_str2octal(strp, len));
+}
+
+/*
+ * Verifies the checksum of a header. Returns true if the checksum is
+ * valid, false otherwise.
+ */
+static boolean_t
+tarfs_checksum(struct ustar_header *hdrp)
+{
+ const unsigned char *ptr;
+ int64_t checksum, hdrsum;
+ size_t idx;
+
+ hdrsum = tarfs_str2int64(hdrp->checksum, sizeof(hdrp->checksum));
+ TARFS_DPF(CHECKSUM, "%s: header checksum %lx\n", __func__, hdrsum);
+
+ checksum = 0;
+ for (ptr = (const unsigned char *)hdrp;
+ ptr < (const unsigned char *)hdrp->checksum; ptr++)
+ checksum += *ptr;
+ for (idx = 0; idx < sizeof(hdrp->checksum); idx++)
+ checksum += 0x20;
+ for (ptr = (const unsigned char *)hdrp->typeflag;
+ ptr < (const unsigned char *)(hdrp + 1); ptr++)
+ checksum += *ptr;
+ TARFS_DPF(CHECKSUM, "%s: calc unsigned checksum %lx\n", __func__,
+ checksum);
+ if (hdrsum == checksum)
+ return (true);
+
+ /*
+ * Repeat test with signed bytes, some older formats use a broken
+ * form of the calculation
+ */
+ checksum = 0;
+ for (ptr = (const unsigned char *)hdrp;
+ ptr < (const unsigned char *)&hdrp->checksum; ptr++)
+ checksum += *((const signed char *)ptr);
+ for (idx = 0; idx < sizeof(hdrp->checksum); idx++)
+ checksum += 0x20;
+ for (ptr = (const unsigned char *)&hdrp->typeflag;
+ ptr < (const unsigned char *)(hdrp + 1); ptr++)
+ checksum += *((const signed char *)ptr);
+ TARFS_DPF(CHECKSUM, "%s: calc signed checksum %lx\n", __func__,
+ checksum);
+ if (hdrsum == checksum)
+ return (true);
+
+ return (false);
+}
+
+
+/*
+ * Looks up a path in the tarfs node tree.
+ *
+ * - If the path exists, stores a pointer to the corresponding tarfs_node
+ * in retnode and a pointer to its parent in retparent.
+ *
+ * - If the path does not exist, but create_dirs is true, creates ancestor
+ * directories and returns NULL in retnode and the parent in retparent.
+ *
+ * - If the path does not exist and create_dirs is false, stops at the
+ * first missing path name component.
+ *
+ * - In all cases, on return, endp and sepp point to the beginning and
+ * end, respectively, of the last-processed path name component.
+ *
+ * - Returns 0 if the node was found, ENOENT if it was not, and some other
+ * positive errno value on failure.
+ */
+static int
+tarfs_lookup_path(struct tarfs_mount *tmp, char *name, size_t namelen,
+ char **endp, char **sepp, struct tarfs_node **retparent,
+ struct tarfs_node **retnode, boolean_t create_dirs)
+{
+ struct componentname cn;
+ struct tarfs_node *parent, *tnp;
+ char *sep;
+ size_t len;
+ int error;
+ boolean_t do_lookup;
+
+ MPASS(name != NULL && namelen != 0);
+
+ do_lookup = true;
+ error = 0;
+ parent = tnp = tmp->root;
+ if (tnp == NULL)
+ panic("%s: root node not yet created", __func__);
+
+ bzero(&cn, sizeof(cn));
+
+ TARFS_DPF(LOOKUP, "%s: Full path: %.*s\n", __func__, (int)namelen,
+ name);
+
+ sep = NULL;
+ for (;;) {
+ /* skip leading slash(es) */
+ while (name[0] == '/' && namelen > 0)
+ name++, namelen--;
+
+ /* did we reach the end? */
+ if (namelen == 0 || name[0] == '\0') {
+ name = do_lookup ? NULL : cn.cn_nameptr;
+ namelen = do_lookup ? 0 : cn.cn_namelen;
+ break;
+ }
+
+ /* locate the next separator */
+ for (sep = name, len = 0;
+ *sep != '\0' && *sep != '/' && len < namelen;
+ sep++, len++)
+ /* nothing */ ;
+
+ /* check for . and .. */
+ if (name[0] == '.' && len <= 2) {
+ if (len == 1) {
+ /* . */
+ name += len;
+ namelen -= len;
+ continue;
+ } else if (name[1] == '.') {
+ /* .. */
+ if (tnp == tmp->root) {
+ error = EINVAL;
+ break;
+ }
+ tnp = tnp->parent;
+ parent = tnp->parent;
+ name += len;
+ namelen -= len;
+ continue;
+ }
+ }
+
+ /* create parent if necessary */
+ if (!do_lookup) {
+ TARFS_DPF(ALLOC, "%s: creating %.*s\n", __func__,
+ (int)cn.cn_namelen, cn.cn_nameptr);
+ error = tarfs_alloc_node(tmp, cn.cn_nameptr,
+ cn.cn_namelen, VDIR, -1, 0, tmp->mtime, 0, 0,
+ DEFDIRMODE, 0, NULL, NODEV, parent, &tnp);
+ if (error != 0)
+ break;
+ }
+
+ parent = tnp;
+ tnp = NULL;
+ cn.cn_nameptr = name;
+ cn.cn_namelen = len;
+ TARFS_DPF(LOOKUP, "%s: Search: %.*s\n", __func__,
+ (int)cn.cn_namelen, cn.cn_nameptr);
+ if (do_lookup) {
+ tnp = tarfs_lookup_node(parent, NULL, &cn);
+ if (tnp == NULL) {
+ do_lookup = false;
+ if (!create_dirs)
+ break;
+ }
+ }
+ name += cn.cn_namelen;
+ namelen -= cn.cn_namelen;
+ }
+
+ TARFS_DPF(LOOKUP, "%s: Parent %p, node %p\n", __func__, parent, tnp);
+
+ if (retparent)
+ *retparent = parent;
+ if (retnode)
+ *retnode = tnp;
+ if (endp) {
+ if (namelen > 0)
+ *endp = name;
+ else
+ *endp = NULL;
+ }
+ if (sepp)
+ *sepp = sep;
+ return (error);
+}
+
+/*
+ * Frees a tarfs_mount structure and everything it references.
+ */
+static void
+tarfs_free_mount(struct tarfs_mount *tmp)
+{
+ struct mount *mp;
+ struct tarfs_node *tnp;
+
+ MPASS(tmp != NULL);
+
+ TARFS_DPF(ALLOC, "%s: Freeing mount structure %p\n", __func__, tmp);
+
+ TARFS_DPF(ALLOC, "%s: freeing tarfs_node structures\n", __func__);
+ while (!TAILQ_EMPTY(&tmp->allnodes)) {
+ tnp = TAILQ_FIRST(&tmp->allnodes);
+ TAILQ_REMOVE(&tmp->allnodes, tnp, entries);
+ tarfs_free_node(tnp);
+ }
+
+ (void)tarfs_io_fini(tmp);
+
+ TARFS_DPF(ALLOC, "%s: deleting unr header\n", __func__);
+ delete_unrhdr(tmp->ino_unr);
+ mp = tmp->vfs;
+ mp->mnt_data = NULL;
+
+ TARFS_DPF(ALLOC, "%s: freeing structure\n", __func__);
+ free(tmp, M_TARFSMNT);
+}
+
+/*
+ * Processes the tar file header at block offset blknump and allocates and
+ * populates a tarfs_node structure for the file it describes. Updated
+ * blknump to point to the next unread tar file block, or TAR_EOF if EOF
+ * is reached. Returns 0 on success or EOF and a positive errno value on
+ * failure.
+ */
+static int
+tarfs_alloc_one(struct tarfs_mount *tmp, off_t *blknump)
+{
+ char block[TARFS_BLOCKSIZE];
+ struct ustar_header *hdrp = (struct ustar_header *)block;
+ struct sbuf *namebuf = NULL;
+ char *exthdr = NULL, *name = NULL, *link = NULL;
+ off_t blknum = *blknump;
+ int endmarker = 0;
+ char *namep, *sep;
+ struct tarfs_node *parent, *tnp;
+ size_t namelen = 0, linklen = 0, realsize = 0, sz;
+ ssize_t res;
+ dev_t rdev;
+ gid_t gid;
+ mode_t mode;
+ time_t mtime;
+ uid_t uid;
+ long major = -1, minor = -1;
+ unsigned int flags = 0;
+ int error;
+ boolean_t sparse = false;
+
+again:
+ /* read next header */
+ res = tarfs_io_read_buf(tmp, false, block,
+ TARFS_BLOCKSIZE * blknum, TARFS_BLOCKSIZE);
+ if (res < 0) {
+ error = -res;
+ goto bad;
+ } else if (res < TARFS_BLOCKSIZE) {
+ goto eof;
+ }
+ blknum++;
+
+ /* check for end marker */
+ if (memcmp(block, zero_region, TARFS_BLOCKSIZE) == 0) {
+ if (endmarker++) {
+ if (exthdr != NULL) {
+ TARFS_DPF(IO, "%s: orphaned extended header at %zu\n",
+ __func__, TARFS_BLOCKSIZE * (blknum - 1));
+ free(exthdr, M_TEMP);
+ }
+ TARFS_DPF(IO, "%s: end of archive at %zu\n", __func__,
+ TARFS_BLOCKSIZE * blknum);
+ tmp->nblocks = blknum;
+ *blknump = TAR_EOF;
+ return (0);
+ }
+ goto again;
+ }
+
+ /* verify magic */
+ if (memcmp(hdrp->magic, USTAR_MAGIC, sizeof(USTAR_MAGIC)) == 0 &&
+ memcmp(hdrp->version, USTAR_VERSION, sizeof(USTAR_VERSION)) == 0) {
+ /* POSIX */
+ } else if (memcmp(hdrp->magic, GNUTAR_MAGIC, sizeof(GNUTAR_MAGIC)) == 0 &&
+ memcmp(hdrp->magic, GNUTAR_MAGIC, sizeof(GNUTAR_MAGIC)) == 0) {
+ TARFS_DPF(ALLOC, "%s: GNU tar format at %zu\n", __func__,
+ TARFS_BLOCKSIZE * (blknum - 1));
+ error = EFTYPE;
+ goto bad;
+ } else {
+ TARFS_DPF(ALLOC, "%s: unsupported TAR format at %zu\n",
+ __func__, TARFS_BLOCKSIZE * (blknum - 1));
+ error = EINVAL;
+ goto bad;
+ }
+
+ /* verify checksum */
+ if (!tarfs_checksum(hdrp)) {
+ TARFS_DPF(ALLOC, "%s: header checksum failed at %zu\n",
+ __func__, TARFS_BLOCKSIZE * (blknum - 1));
+ error = EINVAL;
+ goto bad;
+ }
+
+ /* get standard attributes */
+ mode = tarfs_str2int64(hdrp->mode, sizeof(hdrp->mode));
+ uid = tarfs_str2int64(hdrp->uid, sizeof(hdrp->uid));
+ gid = tarfs_str2int64(hdrp->gid, sizeof(hdrp->gid));
+ sz = tarfs_str2int64(hdrp->size, sizeof(hdrp->size));
+ mtime = tarfs_str2int64(hdrp->mtime, sizeof(hdrp->mtime));
+ rdev = NODEV;
+ TARFS_DPF(ALLOC, "%s: [%c] %zu @%jd %o %d:%d\n", __func__,
+ hdrp->typeflag[0], sz, (intmax_t)mtime, mode, uid, gid);
+
+ /* extended header? */
+ if (hdrp->typeflag[0] == TAR_TYPE_GLOBAL_EXTHDR) {
+ printf("%s: unsupported global extended header at %zd\n",
+ __func__, TARFS_BLOCKSIZE * (blknum - 1));
+ error = EFTYPE;
+ goto bad;
+ }
+ if (hdrp->typeflag[0] == TAR_TYPE_EXTHDR) {
+ if (exthdr != NULL) {
+ TARFS_DPF(IO, "%s: multiple extended headers at %zu\n",
+ __func__, TARFS_BLOCKSIZE * (blknum - 1));
+ error = EFTYPE;
+ goto bad;
+ }
+ /* read the contents of the exthdr */
+ TARFS_DPF(ALLOC, "%s: %zu-byte extended header at %zd\n",
+ __func__, sz, TARFS_BLOCKSIZE * (blknum - 1));
+ exthdr = malloc(sz, M_TEMP, M_WAITOK);
+ res = tarfs_io_read_buf(tmp, false, exthdr,
+ TARFS_BLOCKSIZE * blknum, sz);
+ if (res < 0) {
+ error = -res;
+ goto bad;
+ }
+ if (res < sz) {
+ goto eof;
+ }
+ blknum += TARFS_SZ2BLKS(res);
+ /* XXX TODO: refactor this parser */
+ char *line = exthdr;
+ while (line < exthdr + sz) {
+ char *eol, *key, *value, *sep;
+ size_t len = strtoul(line, &sep, 10);
+ if (len == 0 || sep == line || *sep != ' ') {
+ TARFS_DPF(ALLOC, "%s: exthdr syntax error\n",
+ __func__);
+ error = EINVAL;
+ goto bad;
+ }
+ if (line + len > exthdr + sz) {
+ TARFS_DPF(ALLOC, "%s: exthdr overflow\n",
+ __func__);
+ error = EINVAL;
+ goto bad;
+ }
+ eol = line + len - 1;
+ *eol = '\0';
+ line += len;
+ key = sep + 1;
+ sep = strchr(key, '=');
+ if (sep == NULL) {
+ TARFS_DPF(ALLOC, "%s: exthdr syntax error\n",
+ __func__);
+ error = EINVAL;
+ goto bad;
+ }
+ *sep = '\0';
+ value = sep + 1;
+ TARFS_DPF(ALLOC, "%s: exthdr %s=%s\n", __func__,
+ key, value);
+ if (strcmp(key, "linkpath") == 0) {
+ link = value;
+ linklen = eol - value;
+ } else if (strcmp(key, "GNU.sparse.major") == 0) {
+ sparse = true;
+ major = strtol(value, &sep, 10);
+ if (sep != eol) {
+ printf("exthdr syntax error\n");
+ error = EINVAL;
+ goto bad;
+ }
+ } else if (strcmp(key, "GNU.sparse.minor") == 0) {
+ sparse = true;
+ minor = strtol(value, &sep, 10);
+ if (sep != eol) {
+ printf("exthdr syntax error\n");
+ error = EINVAL;
+ goto bad;
+ }
+ } else if (strcmp(key, "GNU.sparse.name") == 0) {
+ sparse = true;
+ name = value;
+ namelen = eol - value;
+ if (namelen == 0) {
+ printf("exthdr syntax error\n");
+ error = EINVAL;
+ goto bad;
+ }
+ } else if (strcmp(key, "GNU.sparse.realsize") == 0) {
+ sparse = true;
+ realsize = strtoul(value, &sep, 10);
+ if (sep != eol) {
+ printf("exthdr syntax error\n");
+ error = EINVAL;
+ goto bad;
+ }
+ } else if (strcmp(key, "SCHILY.fflags") == 0) {
+ flags |= tarfs_strtofflags(value, &sep);
+ if (sep != eol) {
+ printf("exthdr syntax error\n");
+ error = EINVAL;
+ goto bad;
+ }
+ }
+ }
+ goto again;
+ }
+
+ /* sparse file consistency checks */
+ if (sparse) {
+ TARFS_DPF(ALLOC, "%s: %s: sparse %ld.%ld (%zu bytes)\n", __func__,
+ name, major, minor, realsize);
+ if (major != 1 || minor != 0 || name == NULL || realsize == 0 ||
+ hdrp->typeflag[0] != TAR_TYPE_FILE) {
+ TARFS_DPF(ALLOC, "%s: invalid sparse format\n", __func__);
+ error = EINVAL;
+ goto bad;
+ }
+ }
+
+ /* file name */
+ if (name == NULL) {
+ if (hdrp->prefix[0] != '\0') {
+ namebuf = sbuf_new_auto();
+ sbuf_printf(namebuf, "%.*s/%.*s",
+ (int)sizeof(hdrp->prefix), hdrp->prefix,
+ (int)sizeof(hdrp->name), hdrp->name);
+ sbuf_finish(namebuf);
+ name = sbuf_data(namebuf);
+ namelen = sbuf_len(namebuf);
+ } else {
+ name = hdrp->name;
+ namelen = strnlen(hdrp->name, sizeof(hdrp->name));
+ }
+ }
+
+ error = tarfs_lookup_path(tmp, name, namelen, &namep,
+ &sep, &parent, &tnp, true);
+ if (error != 0)
+ goto bad;
+ if (tnp != NULL) {
+ if (hdrp->typeflag[0] == TAR_TYPE_DIRECTORY) {
+ /* XXX set attributes? */
+ goto skip;
+ }
+ TARFS_DPF(ALLOC, "%s: duplicate file %.*s\n", __func__,
+ (int)namelen, name);
+ error = EINVAL;
+ goto bad;
+ }
+ switch (hdrp->typeflag[0]) {
+ case TAR_TYPE_DIRECTORY:
+ error = tarfs_alloc_node(tmp, namep, sep - namep, VDIR,
+ 0, 0, mtime, uid, gid, mode, flags, NULL, 0,
+ parent, &tnp);
+ break;
+ case TAR_TYPE_FILE:
+ error = tarfs_alloc_node(tmp, namep, sep - namep, VREG,
+ blknum * TARFS_BLOCKSIZE, sz, mtime, uid, gid, mode,
+ flags, NULL, 0, parent, &tnp);
+ if (error == 0 && sparse) {
+ error = tarfs_load_blockmap(tnp, realsize);
+ }
+ break;
+ case TAR_TYPE_HARDLINK:
+ if (link == NULL) {
+ link = hdrp->linkname;
+ linklen = strnlen(link, sizeof(hdrp->linkname));
+ }
+ error = tarfs_alloc_node(tmp, namep, sep - namep, VREG,
+ 0, 0, 0, 0, 0, 0, 0, NULL, 0, parent, &tnp);
+ if (error != 0) {
+ goto bad;
+ }
+ error = tarfs_lookup_path(tmp, link, linklen, NULL,
+ NULL, NULL, &tnp->other, false);
+ if (tnp->other == NULL ||
+ tnp->other->type != VREG ||
+ tnp->other->other != NULL) {
+ TARFS_DPF(ALLOC, "%s: %.*s: dead hard link to %.*s\n",
+ __func__, (int)namelen, name, (int)linklen, link);
+ error = EINVAL;
+ goto bad;
+ }
+ break;
+ case TAR_TYPE_SYMLINK:
+ if (link == NULL) {
+ link = hdrp->linkname;
+ linklen = strnlen(link, sizeof(hdrp->linkname));
+ }
+ error = tarfs_alloc_node(tmp, namep, sep - namep, VLNK,
+ 0, linklen, mtime, uid, gid, mode, flags, link, 0,
+ parent, &tnp);
+ break;
+ case TAR_TYPE_BLOCK:
+ major = tarfs_str2int64(hdrp->major, sizeof(hdrp->major));
+ minor = tarfs_str2int64(hdrp->minor, sizeof(hdrp->minor));
+ rdev = makedev(major, minor);
+ error = tarfs_alloc_node(tmp, namep, sep - namep, VBLK,
+ 0, 0, mtime, uid, gid, mode, flags, NULL, rdev,
+ parent, &tnp);
+ break;
+ case TAR_TYPE_CHAR:
+ major = tarfs_str2int64(hdrp->major, sizeof(hdrp->major));
+ minor = tarfs_str2int64(hdrp->minor, sizeof(hdrp->minor));
+ rdev = makedev(major, minor);
+ error = tarfs_alloc_node(tmp, namep, sep - namep, VCHR,
+ 0, 0, mtime, uid, gid, mode, flags, NULL, rdev,
+ parent, &tnp);
+ break;
+ default:
+ TARFS_DPF(ALLOC, "%s: unsupported type %c for %.*s\n",
+ __func__, hdrp->typeflag[0], (int)namelen, name);
+ error = EINVAL;
+ break;
+ }
+ if (error != 0)
+ goto bad;
+
+skip:
+ blknum += TARFS_SZ2BLKS(sz);
+ tmp->nblocks = blknum;
+ *blknump = blknum;
+ if (exthdr != NULL) {
+ free(exthdr, M_TEMP);
+ }
+ if (namebuf != NULL) {
+ sbuf_delete(namebuf);
+ }
+ return (0);
+eof:
+ TARFS_DPF(IO, "%s: premature end of file\n", __func__);
+ error = EIO;
+ goto bad;
+bad:
+ if (exthdr != NULL) {
+ free(exthdr, M_TEMP);
+ }
+ if (namebuf != NULL) {
+ sbuf_delete(namebuf);
+ }
+ return (error);
+}
+
+/*
+ * Allocates and populates the metadata structures for the tar file
+ * referenced by vp. On success, a pointer to the tarfs_mount structure
+ * is stored in tmpp. Returns 0 on success or a positive errno value on
+ * failure.
+ */
+static int
+tarfs_alloc_mount(struct mount *mp, struct vnode *vp,
+ uid_t root_uid, gid_t root_gid, mode_t root_mode,
+ struct tarfs_mount **tmpp)
+{
+ struct vattr va;
+ struct thread *td = curthread;
+ char *fullpath;
+ struct tarfs_mount *tmp;
+ struct tarfs_node *root;
+ struct g_consumer *cp;
+ struct cdev *dev;
+ off_t blknum;
+ time_t mtime;
+ int error;
+
+ KASSERT(tmpp != NULL, ("tarfs mount return is NULL"));
+ ASSERT_VOP_LOCKED(vp, __func__);
+
+ tmp = NULL;
+ dev = NULL;
+ cp = NULL;
+ fullpath = NULL;
+
+ TARFS_DPF(ALLOC, "%s: Allocating tarfs mount structure for vp %p\n",
+ __func__, vp);
+
+ /* Get source metadata */
+ error = VOP_GETATTR(vp, &va, td->td_ucred);
+ if (error != 0) {
+ return (error);
+ }
+ VOP_UNLOCK(vp);
+ mtime = va.va_mtime.tv_sec;
+
+ /* Allocate and initialize tarfs mount structure */
+ tmp = (struct tarfs_mount *)malloc(sizeof(struct tarfs_mount),
+ M_TARFSMNT, M_WAITOK | M_ZERO);
+ TARFS_DPF(ALLOC, "%s: Allocated mount structure\n", __func__);
+ mp->mnt_data = tmp;
+
+ mtx_init(&tmp->allnode_lock, "tarfs allnode lock", NULL,
+ MTX_DEF);
+ TAILQ_INIT(&tmp->allnodes);
+ tmp->ino_unr = new_unrhdr(TARFS_MININO, INT_MAX, &tmp->allnode_lock);
+ tmp->vp = vp;
+ tmp->vfs = mp;
+ tmp->cp = cp;
+ tmp->dev = dev;
+ tmp->mtime = mtime;
+
+ /*
+ * XXX The decompression layer passes everything through the
+ * buffer cache, and the buffer cache wants to know our blocksize,
+ * but mnt_stat normally isn't populated until after we return, so
+ * we have to cheat a bit.
+ */
+ tmp->iosize = 1U << tarfs_ioshift;
+ mp->mnt_stat.f_iosize = tmp->iosize;
+
+ /* Initialize decompression layer */
+ error = tarfs_io_init(tmp);
+ if (error != 0)
+ goto bad;
+
+ error = tarfs_alloc_node(tmp, NULL, 0, VDIR, 0, 0, mtime, root_uid,
+ root_gid, root_mode & ALLPERMS, 0, NULL, NODEV, NULL, &root);
+ if (error != 0 || root == NULL)
+ goto bad;
+ tmp->root = root;
+
+ blknum = 0;
+ do {
+ if ((error = tarfs_alloc_one(tmp, &blknum)) != 0) {
+ goto bad;
+ }
+ } while (blknum != TAR_EOF);
+
+ *tmpp = tmp;
+
+ TARFS_DPF(ALLOC, "%s: pfsmnt_root %p\n", __func__, tmp->root);
+ return (0);
+
+bad:
+ if (tmp != NULL)
+ tarfs_free_mount(tmp);
+ if (cp != NULL) {
+ g_topology_lock();
+ g_vfs_close(cp);
+ g_topology_unlock();
+ }
+ free(fullpath, M_TEMP);
+ return (error);
+}
+
+/*
+ * VFS Operations.
+ */
+
+static int
+tarfs_mount(struct mount *mp)
+{
+ struct nameidata nd;
+ struct vattr va;
+ struct tarfs_mount *tmp = NULL;
+ struct thread *td = curthread;
+ struct vnode *vp;
+ char *from;
+ uid_t root_uid;
+ gid_t root_gid;
+ mode_t root_mode;
+ int error, flags, len;
+
+ if (mp->mnt_flag & MNT_UPDATE)
+ return (EOPNOTSUPP);
+
+ if (vfs_filteropt(mp->mnt_optnew, tarfs_opts))
+ return (EINVAL);
+
+ vn_lock(mp->mnt_vnodecovered, LK_SHARED | LK_RETRY);
+ error = VOP_GETATTR(mp->mnt_vnodecovered, &va, mp->mnt_cred);
+ VOP_UNLOCK(mp->mnt_vnodecovered);
+ if (error)
+ return (error);
+
+ if (mp->mnt_cred->cr_ruid != 0 ||
+ vfs_scanopt(mp->mnt_optnew, "gid", "%d", &root_gid) != 1)
+ root_gid = va.va_gid;
+ if (mp->mnt_cred->cr_ruid != 0 ||
+ vfs_scanopt(mp->mnt_optnew, "uid", "%d", &root_uid) != 1)
+ root_uid = va.va_uid;
+ if (mp->mnt_cred->cr_ruid != 0 ||
+ vfs_scanopt(mp->mnt_optnew, "mode", "%ho", &root_mode) != 1)
+ root_mode = va.va_mode;
+
+ error = vfs_getopt(mp->mnt_optnew, "from", (void **)&from, &len);
+ if (error != 0 || from[len - 1] != '\0')
+ return (EINVAL);
+
+ /* Find the source tarball */
+ TARFS_DPF(FS, "%s(%s, uid=%u, gid=%u, mode=%o)\n", __func__,
+ from, root_uid, root_gid, root_mode);
+ flags = FREAD;
+ if (vfs_flagopt(mp->mnt_optnew, "verify", NULL, 0)) {
+ flags |= O_VERIFY;
+ }
+ NDINIT(&nd, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF, UIO_SYSSPACE, from);
+ error = namei(&nd);
+ if (error != 0)
+ return (error);
+ NDFREE_PNBUF(&nd);
+ vp = nd.ni_vp;
+ TARFS_DPF(FS, "%s: N: hold %u use %u lock 0x%x\n", __func__,
+ vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
+ /* vp is now held and locked */
+
+ /* Open the source tarball */
+ error = vn_open_vnode(vp, flags, td->td_ucred, td, NULL);
+ if (error != 0) {
+ TARFS_DPF(FS, "%s: failed to open %s: %d\n", __func__,
+ from, error);
+ vput(vp);
+ goto bad;
+ }
+ TARFS_DPF(FS, "%s: O: hold %u use %u lock 0x%x\n", __func__,
+ vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
+ if (vp->v_type != VREG) {
+ TARFS_DPF(FS, "%s: not a regular file\n", __func__);
+ error = EOPNOTSUPP;
+ goto bad_open_locked;
+ }
+ error = priv_check(td, PRIV_VFS_MOUNT_PERM);
+ if (error != 0) {
+ TARFS_DPF(FS, "%s: not permitted to mount\n", __func__);
+ goto bad_open_locked;
+ }
+ if (flags & O_VERIFY) {
+ mp->mnt_flag |= MNT_VERIFIED;
+ }
+
+ /* Allocate the tarfs mount */
+ error = tarfs_alloc_mount(mp, vp, root_uid, root_gid, root_mode, &tmp);
+ /* vp is now held but unlocked */
+ if (error != 0) {
+ TARFS_DPF(FS, "%s: failed to mount %s: %d\n", __func__,
+ from, error);
+ goto bad_open_unlocked;
+ }
+ TARFS_DPF(FS, "%s: M: hold %u use %u lock 0x%x\n", __func__,
+ vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
+
+ /* Unconditionally mount as read-only */
+ MNT_ILOCK(mp);
+ mp->mnt_flag |= (MNT_LOCAL | MNT_RDONLY);
+ MNT_IUNLOCK(mp);
+
+ vfs_getnewfsid(mp);
+ vfs_mountedfrom(mp, "tarfs");
+ TARFS_DPF(FS, "%s: success\n", __func__);
+
+ return (0);
+
+bad_open_locked:
+ /* vp must be held and locked */
+ TARFS_DPF(FS, "%s: L: hold %u use %u lock 0x%x\n", __func__,
+ vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
+ VOP_UNLOCK(vp);
+bad_open_unlocked:
+ /* vp must be held and unlocked */
+ TARFS_DPF(FS, "%s: E: hold %u use %u lock 0x%x\n", __func__,
+ vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
+ (void)vn_close(vp, flags, td->td_ucred, td);
+bad:
+ /* vp must be released and unlocked */
+ TARFS_DPF(FS, "%s: X: hold %u use %u lock 0x%x\n", __func__,
+ vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
+ return (error);
+}
+
+/*
+ * Unmounts a tarfs filesystem.
+ */
+static int
+tarfs_unmount(struct mount *mp, int mntflags)
+{
+ struct thread *td = curthread;
+ struct tarfs_mount *tmp;
+ struct vnode *vp;
+ int error;
+ int flags = 0;
+
+ TARFS_DPF(FS, "%s: Unmounting %p\n", __func__, mp);
+
+ /* Handle forced unmounts */
+ if (mntflags & MNT_FORCE)
+ flags |= FORCECLOSE;
+
+ /* Finalize all pending I/O */
+ error = vflush(mp, 0, flags, curthread);
+ if (error != 0)
+ return (error);
+ tmp = MP_TO_TARFS_MOUNT(mp);
+ vp = tmp->vp;
+
+ MPASS(vp != NULL);
+ TARFS_DPF(FS, "%s: U: hold %u use %u lock 0x%x\n", __func__,
+ vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
+ vn_close(vp, FREAD, td->td_ucred, td);
+ TARFS_DPF(FS, "%s: C: hold %u use %u lock 0x%x\n", __func__,
+ vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
+ tarfs_free_mount(tmp);
+
+ return (0);
+}
+
+/*
+ * Gets the root of a tarfs filesystem. Returns 0 on success or a
+ * positive errno value on failure.
+ */
+static int
+tarfs_root(struct mount *mp, int flags, struct vnode **vpp)
+{
+ struct vnode *nvp;
+ int error;
+
+ TARFS_DPF(FS, "%s: Getting root vnode\n", __func__);
+
+ error = VFS_VGET(mp, TARFS_ROOTINO, LK_EXCLUSIVE, &nvp);
+ if (error != 0)
+ return (error);
+
+ nvp->v_vflag |= VV_ROOT;
+ *vpp = nvp;
+ return (0);
+}
+
+/*
+ * Gets statistics for a tarfs filesystem. Returns 0.
+ */
+static int
+tarfs_statfs(struct mount *mp, struct statfs *sbp)
+{
+ struct tarfs_mount *tmp;
+
+ tmp = MP_TO_TARFS_MOUNT(mp);
+
+ sbp->f_bsize = TARFS_BLOCKSIZE;
+ sbp->f_iosize = tmp->iosize;
+ sbp->f_blocks = tmp->nblocks;
+ sbp->f_bfree = 0;
+ sbp->f_bavail = 0;
+ sbp->f_files = tmp->nfiles;
+ sbp->f_ffree = 0;
+
+ return (0);
+}
+
+/*
+ * Gets a vnode for the given inode. On success, a pointer to the vnode
+ * is stored in vpp. Returns 0 on success or a positive errno value on
+ * failure.
+ */
+static int
+tarfs_vget(struct mount *mp, ino_t ino, int lkflags, struct vnode **vpp)
+{
+ struct tarfs_mount *tmp;
+ struct tarfs_node *tnp;
+ struct thread *td;
+ struct vnode *vp;
+ int error;
+
+ TARFS_DPF(FS, "%s: mp %p, ino %lu, lkflags %d\n", __func__, mp, ino,
+ lkflags);
+
+ td = curthread;
+ error = vfs_hash_get(mp, ino, lkflags, td, vpp, NULL, NULL);
+ if (error != 0)
+ return (error);
+
+ if (*vpp != NULL) {
+ TARFS_DPF(FS, "%s: found hashed vnode %p\n", __func__, *vpp);
+ return (error);
+ }
+
+ TARFS_DPF(FS, "%s: no hashed vnode for inode %lu\n", __func__, ino);
+
+ tmp = MP_TO_TARFS_MOUNT(mp);
+
+ if (ino == TARFS_ZIOINO) {
+ error = vn_lock(tmp->znode, lkflags);
+ if (error != 0)
+ return (error);
+ vref(tmp->znode);
+ *vpp = tmp->znode;
+ return (0);
+ }
+
+ /* XXX Should use hash instead? */
+ TAILQ_FOREACH(tnp, &tmp->allnodes, entries) {
+ if (tnp->ino == ino)
+ break;
+ }
+ TARFS_DPF(FS, "%s: search of all nodes found %p\n", __func__, tnp);
+ if (tnp == NULL)
+ return (ENOENT);
+
+ error = getnewvnode("tarfs", mp, &tarfs_vnodeops, &vp);
+ if (error != 0)
+ goto bad;
+ TARFS_DPF(FS, "%s: allocated vnode\n", __func__);
+ vp->v_data = tnp;
+ vp->v_type = tnp->type;
+ tnp->vnode = vp;
+
+ lockmgr(vp->v_vnlock, lkflags, NULL);
+ error = insmntque(vp, mp);
+ if (error != 0)
+ goto bad;
+ TARFS_DPF(FS, "%s: inserting entry into VFS hash\n", __func__);
+ error = vfs_hash_insert(vp, ino, lkflags, td, vpp, NULL, NULL);
+ if (error != 0 || *vpp != NULL)
+ return (error);
+
+ *vpp = vp;
+ return (0);
+
+bad:
+ *vpp = NULLVP;
+ return (error);
+}
+
+static int
+tarfs_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp)
+{
+ struct tarfs_node *tnp;
+ struct tarfs_fid *tfp;
+ struct vnode *nvp;
+ int error;
+
+ tfp = (struct tarfs_fid *)fhp;
+ MP_TO_TARFS_MOUNT(mp);
+ if (tfp->ino < TARFS_ROOTINO || tfp->ino > INT_MAX)
+ return (ESTALE);
+
+ error = VFS_VGET(mp, tfp->ino, LK_EXCLUSIVE, &nvp);
+ if (error != 0) {
+ *vpp = NULLVP;
+ return (error);
+ }
+ tnp = VP_TO_TARFS_NODE(nvp);
+ if (tnp->mode == 0 ||
+ tnp->gen != tfp->gen ||
+ tnp->nlink <= 0) {
+ vput(nvp);
+ *vpp = NULLVP;
+ return (ESTALE);
+ }
+ *vpp = nvp;
+ return (0);
+}
+
+static struct vfsops tarfs_vfsops = {
+ .vfs_fhtovp = tarfs_fhtovp,
+ .vfs_mount = tarfs_mount,
+ .vfs_root = tarfs_root,
+ .vfs_statfs = tarfs_statfs,
+ .vfs_unmount = tarfs_unmount,
+ .vfs_vget = tarfs_vget,
+};
+VFS_SET(tarfs_vfsops, tarfs, VFCF_READONLY);
+MODULE_VERSION(tarfs, 1);
+MODULE_DEPEND(tarfs, xz, 1, 1, 1);
diff --git a/sys/fs/tarfs/tarfs_vnops.c b/sys/fs/tarfs/tarfs_vnops.c
new file mode 100644
--- /dev/null
+++ b/sys/fs/tarfs/tarfs_vnops.c
@@ -0,0 +1,639 @@
+/*-
+ * Copyright (c) 2013 Juniper Networks, Inc.
+ * Copyright (c) 2022 Klara, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_tarfs.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/dirent.h>
+#include <sys/fcntl.h>
+#include <sys/limits.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+
+#include <fs/tarfs/tarfs.h>
+#include <fs/tarfs/tarfs_dbg.h>
+
+static int
+tarfs_open(struct vop_open_args *ap)
+{
+ struct tarfs_node *tnp;
+ struct vnode *vp;
+
+ vp = ap->a_vp;
+ MPASS(VOP_ISLOCKED(vp));
+ tnp = VP_TO_TARFS_NODE(vp);
+
+ TARFS_DPF(VNODE, "%s(%p=%s, %o)\n", __func__,
+ tnp, tnp->name, ap->a_mode);
+
+ if (vp->v_type != VREG && vp->v_type != VDIR)
+ return (EOPNOTSUPP);
+
+ vnode_create_vobject(vp, tnp->size, ap->a_td);
+ return (0);
+}
+
+static int
+tarfs_close(struct vop_close_args *ap)
+{
+#ifdef TARFS_DEBUG
+ struct tarfs_node *tnp;
+ struct vnode *vp;
+
+ vp = ap->a_vp;
+
+ MPASS(VOP_ISLOCKED(vp));
+ tnp = VP_TO_TARFS_NODE(vp);
+
+ TARFS_DPF(VNODE, "%s(%p=%s)\n", __func__,
+ tnp, tnp->name);
+#else
+ (void)ap;
+#endif
+ return (0);
+}
+
+static int
+tarfs_access(struct vop_access_args *ap)
+{
+ struct tarfs_node *tnp;
+ struct vnode *vp;
+ accmode_t accmode;
+ struct ucred *cred;
+ int error;
+
+ vp = ap->a_vp;
+ accmode = ap->a_accmode;
+ cred = ap->a_cred;
+
+ MPASS(VOP_ISLOCKED(vp));
+ tnp = VP_TO_TARFS_NODE(vp);
+
+ TARFS_DPF(VNODE, "%s(%p=%s, %o)\n", __func__,
+ tnp, tnp->name, accmode);
+
+ switch (vp->v_type) {
+ case VDIR:
+ case VLNK:
+ case VREG:
+ if ((accmode & VWRITE) != 0)
+ return (EROFS);
+ break;
+ case VBLK:
+ case VCHR:
+ case VFIFO:
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ if ((accmode & VWRITE) != 0)
+ return (EPERM);
+
+ error = vaccess(vp->v_type, tnp->mode, tnp->uid,
+ tnp->gid, accmode, cred);
+ return (error);
+}
+
+static int
+tarfs_getattr(struct vop_getattr_args *ap)
+{
+ struct tarfs_node *tnp;
+ struct vnode *vp;
+ struct vattr *vap;
+
+ vp = ap->a_vp;
+ vap = ap->a_vap;
+ tnp = VP_TO_TARFS_NODE(vp);
+
+ TARFS_DPF(VNODE, "%s(%p=%s)\n", __func__,
+ tnp, tnp->name);
+
+ vap->va_type = vp->v_type;
+ vap->va_mode = tnp->mode;
+ vap->va_nlink = tnp->nlink;
+ vap->va_gid = tnp->gid;
+ vap->va_uid = tnp->uid;
+ vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
+ vap->va_fileid = tnp->ino;
+ vap->va_size = tnp->size;
+ vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
+ vap->va_atime = tnp->atime;
+ vap->va_ctime = tnp->ctime;
+ vap->va_mtime = tnp->mtime;
+ vap->va_birthtime = tnp->birthtime;
+ vap->va_gen = tnp->gen;
+ vap->va_flags = tnp->flags;
+ vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ?
+ tnp->rdev : NODEV;
+ vap->va_bytes = round_page(tnp->physize);
+ vap->va_filerev = 0;
+
+ return (0);
+}
+
+static int
+tarfs_lookup(struct vop_cachedlookup_args *ap)
+{
+ struct tarfs_node *dirnode, *parent, *tnp;
+ struct componentname *cnp;
+ struct vnode *dvp;
+ struct vnode **vpp;
+ int error;
+
+ dvp = ap->a_dvp;
+ vpp = ap->a_vpp;
+ cnp = ap->a_cnp;
+
+ *vpp = NULLVP;
+ dirnode = VP_TO_TARFS_NODE(dvp);
+ parent = dirnode->parent;
+ tnp = NULL;
+
+ TARFS_DPF(LOOKUP, "%s(%p=%s, %.*s)\n", __func__,
+ dirnode, dirnode->name,
+ (int)cnp->cn_namelen, cnp->cn_nameptr);
+
+ error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, curthread);
+ if (error != 0)
+ return (error);
+
+ if (cnp->cn_flags & ISDOTDOT) {
+ /* Do not allow .. on the root node */
+ if (parent == NULL || parent == dirnode)
+ return (ENOENT);
+
+ /* Allocate a new vnode on the matching entry */
+ error = vn_vget_ino(dvp, parent->ino, cnp->cn_lkflags,
+ vpp);
+ if (error != 0)
+ return (error);
+ } else if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
+ VREF(dvp);
+ *vpp = dvp;
+#ifdef TARFS_DEBUG
+ } else if (dirnode == dirnode->tmp->root &&
+ (*vpp = dirnode->tmp->znode) != NULL &&
+ cnp->cn_namelen == TARFS_ZIO_NAMELEN &&
+ memcmp(cnp->cn_nameptr, TARFS_ZIO_NAME, TARFS_ZIO_NAMELEN) == 0) {
+ error = vn_lock(*vpp, cnp->cn_lkflags);
+ if (error != 0)
+ return (error);
+ vref(*vpp);
+#endif
+ } else {
+ tnp = tarfs_lookup_node(dirnode, NULL, cnp);
+ if (tnp == NULL) {
+ TARFS_DPF(LOOKUP, "%s(%p=%s, %.*s): file not found\n", __func__,
+ dirnode, dirnode->name,
+ (int)cnp->cn_namelen, cnp->cn_nameptr);
+ return (ENOENT);
+ }
+
+ if ((cnp->cn_flags & ISLASTCN) == 0 &&
+ (tnp->type != VDIR && tnp->type != VLNK))
+ return (ENOTDIR);
+
+ error = vn_vget_ino(dvp, tnp->ino, cnp->cn_lkflags, vpp);
+ if (error != 0)
+ return (error);
+ }
+
+#ifdef TARFS_DEBUG
+ if (tnp == NULL)
+ tnp = VP_TO_TARFS_NODE(*vpp);
+ TARFS_DPF(LOOKUP, "%s: found vnode %p, tarfs_node %p\n", __func__,
+ *vpp, tnp);
+#endif /* TARFS_DEBUG */
+
+ /* Store the result the the cache if MAKEENTRY is specified in flags */
+ if ((cnp->cn_flags & MAKEENTRY) != 0 && cnp->cn_nameiop != CREATE)
+ cache_enter(dvp, *vpp, cnp);
+
+ return (error);
+}
+
+static int
+tarfs_readdir(struct vop_readdir_args *ap)
+{
+ struct dirent cde;
+ struct tarfs_node *current, *tnp;
+ struct vnode *vp;
+ struct uio *uio;
+ int *eofflag;
+ u_long **cookies;
+ int *ncookies;
+ off_t off;
+ u_int idx, ndirents;
+ int error;
+
+ vp = ap->a_vp;
+ uio = ap->a_uio;
+ eofflag = ap->a_eofflag;
+ cookies = ap->a_cookies;
+ ncookies = ap->a_ncookies;
+
+ if (vp->v_type != VDIR)
+ return (ENOTDIR);
+
+ tnp = VP_TO_TARFS_NODE(vp);
+ off = uio->uio_offset;
+ current = NULL;
+ ndirents = 0;
+
+ TARFS_DPF(VNODE, "%s(%p=%s, %zu, %zd)\n", __func__,
+ tnp, tnp->name, uio->uio_offset, uio->uio_resid);
+
+ if (uio->uio_offset == TARFS_COOKIE_EOF) {
+ TARFS_DPF(VNODE, "%s: EOF\n", __func__);
+ return (0);
+ }
+
+ if (uio->uio_offset == TARFS_COOKIE_DOT) {
+ TARFS_DPF(VNODE, "%s: Generating . entry\n", __func__);
+ /* fake . entry */
+ cde.d_fileno = tnp->ino;
+ cde.d_type = DT_DIR;
+ cde.d_namlen = 1;
+ cde.d_name[0] = '.';
+ cde.d_name[1] = '\0';
+ cde.d_reclen = GENERIC_DIRSIZ(&cde);
+ if (cde.d_reclen > uio->uio_resid)
+ goto full;
+ error = uiomove(&cde, cde.d_reclen, uio);
+ if (error)
+ return (error);
+ /* next is .. */
+ uio->uio_offset = TARFS_COOKIE_DOTDOT;
+ ndirents++;
+ }
+
+ if (uio->uio_offset == TARFS_COOKIE_DOTDOT) {
+ TARFS_DPF(VNODE, "%s: Generating .. entry\n", __func__);
+ /* fake .. entry */
+ MPASS(tnp->parent != NULL);
+ TARFS_NODE_LOCK(tnp->parent);
+ cde.d_fileno = tnp->parent->ino;
+ TARFS_NODE_UNLOCK(tnp->parent);
+ cde.d_type = DT_DIR;
+ cde.d_namlen = 2;
+ cde.d_name[0] = '.';
+ cde.d_name[1] = '.';
+ cde.d_name[2] = '\0';
+ cde.d_reclen = GENERIC_DIRSIZ(&cde);
+ if (cde.d_reclen > uio->uio_resid)
+ goto full;
+ error = uiomove(&cde, cde.d_reclen, uio);
+ if (error)
+ return (error);
+ /* next is first child */
+ current = TAILQ_FIRST(&tnp->dir.dirhead);
+ if (current == NULL)
+ goto done;
+ uio->uio_offset = current->ino;
+ TARFS_DPF(VNODE, "%s: [%u] setting current node to %p=%s\n",
+ __func__, ndirents, current, current->name);
+ ndirents++;
+ }
+
+ /* resuming previous call */
+ if (current == NULL) {
+ current = tarfs_lookup_dir(tnp, uio->uio_offset);
+ if (current == NULL) {
+ error = EINVAL;
+ goto done;
+ }
+ uio->uio_offset = current->ino;
+ TARFS_DPF(VNODE, "%s: [%u] setting current node to %p=%s\n",
+ __func__, ndirents, current, current->name);
+ }
+
+ for (;;) {
+ cde.d_fileno = current->ino;
+ switch (current->type) {
+ case VBLK:
+ cde.d_type = DT_BLK;
+ break;
+ case VCHR:
+ cde.d_type = DT_CHR;
+ break;
+ case VDIR:
+ cde.d_type = DT_DIR;
+ break;
+ case VFIFO:
+ cde.d_type = DT_FIFO;
+ break;
+ case VLNK:
+ cde.d_type = DT_LNK;
+ break;
+ case VREG:
+ cde.d_type = DT_REG;
+ break;
+ default:
+ panic("%s: tarfs_node %p, type %d\n", __func__,
+ current, current->type);
+ }
+ cde.d_namlen = current->namelen;
+ MPASS(tnp->namelen < sizeof(cde.d_name));
+ (void)memcpy(cde.d_name, current->name, current->namelen);
+ cde.d_name[current->namelen] = '\0';
+ cde.d_reclen = GENERIC_DIRSIZ(&cde);
+ if (cde.d_reclen > uio->uio_resid)
+ goto full;
+ error = uiomove(&cde, cde.d_reclen, uio);
+ if (error != 0)
+ goto done;
+ ndirents++;
+ /* next sibling */
+ current = TAILQ_NEXT(current, dirents);
+ if (current == NULL)
+ goto done;
+ uio->uio_offset = current->ino;
+ TARFS_DPF(VNODE, "%s: [%u] setting current node to %p=%s\n",
+ __func__, ndirents, current, current->name);
+ }
+full:
+ if (cde.d_reclen > uio->uio_resid) {
+ TARFS_DPF(VNODE, "%s: out of space, returning\n",
+ __func__);
+ error = (ndirents == 0) ? EINVAL : 0;
+ }
+done:
+ TARFS_DPF(VNODE, "%s: %u entries written\n", __func__, ndirents);
+ TARFS_DPF(VNODE, "%s: saving cache information\n", __func__);
+ if (current == NULL) {
+ uio->uio_offset = TARFS_COOKIE_EOF;
+ tnp->dir.lastcookie = 0;
+ tnp->dir.lastnode = NULL;
+ } else {
+ tnp->dir.lastcookie = current->ino;
+ tnp->dir.lastnode = current;
+ }
+
+ if (eofflag != NULL) {
+ TARFS_DPF(VNODE, "%s: Setting EOF flag\n", __func__);
+ *eofflag = (error == 0 && current == NULL);
+ }
+
+ /* Update for NFS */
+ if (error == 0 && cookies != NULL && ncookies != NULL) {
+ TARFS_DPF(VNODE, "%s: Updating NFS cookies\n", __func__);
+ current = NULL;
+ *cookies = malloc(ndirents * sizeof(off_t), M_TEMP, M_WAITOK);
+ *ncookies = ndirents;
+ for (idx = 0; idx < ndirents; idx++) {
+ if (off == TARFS_COOKIE_DOT)
+ off = TARFS_COOKIE_DOTDOT;
+ else {
+ if (off == TARFS_COOKIE_DOTDOT) {
+ current = TAILQ_FIRST(&tnp->dir.dirhead);
+ } else if (current != NULL) {
+ current = TAILQ_NEXT(current, dirents);
+ } else {
+ current = tarfs_lookup_dir(tnp, off);
+ current = TAILQ_NEXT(current, dirents);
+ }
+ if (current == NULL)
+ off = TARFS_COOKIE_EOF;
+ else
+ off = current->ino;
+ }
+
+ TARFS_DPF(VNODE, "%s: [%u] offset %zu\n", __func__,
+ idx, off);
+ (*cookies)[idx] = off;
+ }
+ MPASS(uio->uio_offset == off);
+ }
+
+ return (error);
+}
+
+static int
+tarfs_read(struct vop_read_args *ap)
+{
+ struct tarfs_node *tnp;
+ struct uio *uiop;
+ struct vnode *vp;
+ size_t len;
+ off_t resid;
+ int error;
+
+ uiop = ap->a_uio;
+ vp = ap->a_vp;
+
+ if (vp->v_type == VCHR || vp->v_type == VBLK)
+ return (EOPNOTSUPP);
+
+ if (vp->v_type != VREG)
+ return (EISDIR);
+
+ if (uiop->uio_offset < 0)
+ return (EINVAL);
+
+ tnp = VP_TO_TARFS_NODE(vp);
+ error = 0;
+
+ TARFS_DPF(VNODE, "%s(%p=%s, %zu, %zd)\n", __func__,
+ tnp, tnp->name, uiop->uio_offset, uiop->uio_resid);
+
+ while ((resid = uiop->uio_resid) > 0) {
+ if (tnp->size <= uiop->uio_offset)
+ break;
+ len = MIN(tnp->size - uiop->uio_offset, resid);
+ if (len == 0)
+ break;
+
+ error = tarfs_read_file(tnp, len, uiop);
+ if (error != 0 || resid == uiop->uio_resid)
+ break;
+ }
+
+ return (error);
+}
+
+static int
+tarfs_readlink(struct vop_readlink_args *ap)
+{
+ struct tarfs_node *tnp;
+ struct uio *uiop;
+ struct vnode *vp;
+ int error;
+
+ uiop = ap->a_uio;
+ vp = ap->a_vp;
+
+ MPASS(uiop->uio_offset == 0);
+ MPASS(vp->v_type == VLNK);
+
+ tnp = VP_TO_TARFS_NODE(vp);
+
+ TARFS_DPF(VNODE, "%s(%p=%s)\n", __func__,
+ tnp, tnp->name);
+
+ error = uiomove(tnp->link.name,
+ MIN(tnp->size, uiop->uio_resid), uiop);
+
+ return (error);
+}
+
+static int
+tarfs_reclaim(struct vop_reclaim_args *ap)
+{
+ struct tarfs_node *tnp;
+ struct vnode *vp;
+
+ vp = ap->a_vp;
+ tnp = VP_TO_TARFS_NODE(vp);
+
+ vfs_hash_remove(vp);
+ vnode_destroy_vobject(vp);
+ cache_purge(vp);
+
+ TARFS_NODE_LOCK(tnp);
+ tnp->vnode = NULLVP;
+ vp->v_data = NULL;
+ TARFS_NODE_UNLOCK(tnp);
+
+ return (0);
+}
+
+static int
+tarfs_print(struct vop_print_args *ap)
+{
+ struct tarfs_node *tnp;
+ struct vnode *vp;
+
+ vp = ap->a_vp;
+ tnp = VP_TO_TARFS_NODE(vp);
+
+ printf("tag tarfs, tarfs_node %p, links %lu\n",
+ tnp, tnp->nlink);
+ printf("\tmode 0%o, owner %d, group %d, size %zd\n",
+ tnp->mode, tnp->uid, tnp->gid,
+ tnp->size);
+
+ if (vp->v_type == VFIFO)
+ fifo_printinfo(vp);
+
+ printf("\n");
+
+ return (0);
+}
+
+static int
+tarfs_strategy(struct vop_strategy_args *ap)
+{
+ struct uio auio;
+ struct iovec iov;
+ struct tarfs_node *tnp;
+ struct buf *bp;
+ off_t off;
+ size_t len;
+ int error;
+
+ tnp = VP_TO_TARFS_NODE(ap->a_vp);
+ bp = ap->a_bp;
+ MPASS(bp->b_iocmd == BIO_READ);
+ MPASS(bp->b_iooffset >= 0);
+ MPASS(bp->b_bcount > 0);
+ MPASS(bp->b_bufsize >= bp->b_bcount);
+ TARFS_DPF(VNODE, "%s(%p=%s, %zu, %ld/%ld)\n", __func__, tnp,
+ tnp->name, (size_t)bp->b_iooffset, bp->b_bcount, bp->b_bufsize);
+ iov.iov_base = bp->b_data;
+ iov.iov_len = bp->b_bcount;
+ off = bp->b_iooffset;
+ len = bp->b_bcount;
+ bp->b_resid = len;
+ if (off > tnp->size) {
+ /* XXX read beyond EOF - figure out correct handling */
+ error = EIO;
+ goto out;
+ }
+ if (off + len > tnp->size) {
+ /* clip to file length */
+ len = tnp->size - off;
+ }
+ auio.uio_iov = &iov;
+ auio.uio_iovcnt = 1;
+ auio.uio_offset = off;
+ auio.uio_resid = len;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_rw = UIO_READ;
+ auio.uio_td = curthread;
+ error = tarfs_read_file(tnp, len, &auio);
+ bp->b_resid -= len - auio.uio_resid;
+out:
+ if (error != 0) {
+ bp->b_ioflags |= BIO_ERROR;
+ bp->b_error = error;
+ }
+ bp->b_flags |= B_DONE;
+ return (0);
+}
+
+static int
+tarfs_vptofh(struct vop_vptofh_args *ap)
+{
+ struct tarfs_fid *tfp;
+ struct tarfs_node *tnp;
+
+ tfp = (struct tarfs_fid *)ap->a_fhp;
+ tnp = VP_TO_TARFS_NODE(ap->a_vp);
+
+ tfp->len = sizeof(struct tarfs_fid);
+ tfp->ino = tnp->ino;
+ tfp->gen = tnp->gen;
+
+ return (0);
+}
+
+struct vop_vector tarfs_vnodeops = {
+ .vop_default = &default_vnodeops,
+
+ .vop_access = tarfs_access,
+ .vop_cachedlookup = tarfs_lookup,
+ .vop_close = tarfs_close,
+ .vop_getattr = tarfs_getattr,
+ .vop_lookup = vfs_cache_lookup,
+ .vop_open = tarfs_open,
+ .vop_print = tarfs_print,
+ .vop_read = tarfs_read,
+ .vop_readdir = tarfs_readdir,
+ .vop_readlink = tarfs_readlink,
+ .vop_reclaim = tarfs_reclaim,
+ .vop_strategy = tarfs_strategy,
+ .vop_vptofh = tarfs_vptofh,
+};
+VFS_VOP_VECTOR_REGISTER(tarfs_vnodeops);
diff --git a/sys/modules/Makefile b/sys/modules/Makefile
--- a/sys/modules/Makefile
+++ b/sys/modules/Makefile
@@ -369,6 +369,7 @@
sym \
${_syscons} \
sysvipc \
+ tarfs \
tcp \
${_ti} \
tmpfs \
diff --git a/sys/modules/tarfs/Makefile b/sys/modules/tarfs/Makefile
new file mode 100644
--- /dev/null
+++ b/sys/modules/tarfs/Makefile
@@ -0,0 +1,23 @@
+# $FreeBSD$
+
+.PATH: ${.CURDIR:H:H}/fs/tarfs
+
+KMOD= tarfs
+SRCS= opt_tarfs.h \
+ vnode_if.h \
+ tarfs_io.c \
+ tarfs_subr.c \
+ tarfs_vnops.c \
+ tarfs_vfsops.c
+
+.if !defined(KERNBUILDDIR)
+CFLAGS+= -DZSTDIO
+.ifdef TARFS_DEBUG
+CFLAGS+= -DTARFS_DEBUG
+.endif
+.endif
+
+SRCS+= opt_zstdio.h
+CFLAGS+= -I${SRCTOP}/sys/contrib/zstd/lib/freebsd
+
+.include <bsd.kmod.mk>

File Metadata

Mime Type
text/plain
Expires
Sat, Nov 30, 12:14 AM (14 h, 28 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
9053873
Default Alt Text
D37753.id115033.diff (95 KB)

Event Timeline