Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F81890511
D37753.id115033.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
95 KB
Referenced Files
None
Subscribers
None
D37753.id115033.diff
View Options
diff --git a/share/man/man5/Makefile b/share/man/man5/Makefile
--- a/share/man/man5/Makefile
+++ b/share/man/man5/Makefile
@@ -70,6 +70,7 @@
style.Makefile.5 \
style.mdoc.5 \
sysctl.conf.5 \
+ tarfs.5 \
tmpfs.5 \
unionfs.5
diff --git a/share/man/man5/tarfs.5 b/share/man/man5/tarfs.5
new file mode 100644
--- /dev/null
+++ b/share/man/man5/tarfs.5
@@ -0,0 +1,101 @@
+.\"-
+.\" Copyright (c) 2022 Klara, Inc.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.Dd December 1, 2022
+.Dt TARFS 5
+.Os
+.Sh NAME
+.Nm tarfs
+.Nd tarball filesystem
+.Sh SYNOPSIS
+To compile this driver into the kernel, place the following line in
+your kernel configuration file:
+.Bd -ragged -offset indent
+.Cd "options TARFS"
+.Ed
+.Pp
+Alternatively, to load the driver as a module at boot time, place the
+following line in
+.Xr loader.conf 5 :
+.Bd -literal -offset indent
+tarfs_load="YES"
+.Ed
+.Sh DESCRIPTION
+The
+.Nm
+driver implementes a read-only filesystem backed by a
+.Xr tar 5
+file.
+Currently, only POSIX archives, optionally compressed with
+.Xr zstd 1 ,
+are supported.
+.Pp
+The preferred I/O size for
+.Nm
+filesystems can be adjusted using the
+.Va vfs.tarfs.ioshift
+sysctl setting and tunable.
+Setting it to 0 will reset it to its default value.
+Note that changes to this setting only apply to filesystems mounted
+after the change.
+.Sh DIAGNOSTICS
+If enabled by the
+.Dv TARFS_DEBUG
+kernel option, the
+.Va vfs.tarfs.debug
+sysctl setting can be used to control debugging output from the
+.Nm
+driver.
+Debugging output for individual sections of the driver can be enabled
+by adding together the relevant values from the table below.
+.Bl -column Value Description
+.It 0x01 Ta Memory allocations
+.It 0x02 Ta Checksum calculations
+.It 0x04 Ta Filesystem operations (vfsops)
+.It 0x08 Ta Path lookups
+.It 0x10 Ta File operations (vnops)
+.It 0x20 Ta General I/O
+.It 0x40 Ta Decompression
+.It 0x80 Ta Decompression index
+.It 0x100 Ta Sparse file mapping
+.El
+.Sh SEE ALSO
+.Xr tar 1 ,
+.Xr zstd 1 ,
+.Xr fstab 5 ,
+.Xr tar 5 ,
+.Xr mount 8 ,
+.Xr sysctl 8
+.Sh HISTORY
+.An -nosplit
+The
+.Nm
+driver was developed by
+.An Stephen J. Kiernan Aq Mt stevek@FreeBSD.org
+and
+.An Dag-Erling Smørgrav Aq Mt des@FreeBSD.org
+for Juniper Networks and Klara Systems.
+This manual page was written by
+.An Dag-Erling Smørgrav Aq Mt des@FreeBSD.org
+for Juniper Networks and Klara Systems.
diff --git a/sys/conf/files b/sys/conf/files
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -3614,6 +3614,10 @@
fs/smbfs/smbfs_subr.c optional smbfs
fs/smbfs/smbfs_vfsops.c optional smbfs
fs/smbfs/smbfs_vnops.c optional smbfs
+fs/tarfs/tarfs_io.c optional tarfs compile-with "${NORMAL_C} -I$S/contrib/zstd/lib/freebsd"
+fs/tarfs/tarfs_subr.c optional tarfs
+fs/tarfs/tarfs_vfsops.c optional tarfs
+fs/tarfs/tarfs_vnops.c optional tarfs
fs/udf/osta.c optional udf
fs/udf/udf_iconv.c optional udf_iconv
fs/udf/udf_vfsops.c optional udf
diff --git a/sys/conf/options b/sys/conf/options
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -265,6 +265,7 @@
PROCFS opt_dontuse.h
PSEUDOFS opt_dontuse.h
SMBFS opt_dontuse.h
+TARFS opt_dontuse.h
TMPFS opt_dontuse.h
UDF opt_dontuse.h
UNIONFS opt_dontuse.h
@@ -273,6 +274,9 @@
# Pseudofs debugging
PSEUDOFS_TRACE opt_pseudofs.h
+# Tarfs debugging
+TARFS_DEBUG opt_tarfs.h
+
# In-kernel GSS-API
KGSSAPI opt_kgssapi.h
KGSSAPI_DEBUG opt_kgssapi.h
diff --git a/sys/fs/tarfs/tarfs.h b/sys/fs/tarfs/tarfs.h
new file mode 100644
--- /dev/null
+++ b/sys/fs/tarfs/tarfs.h
@@ -0,0 +1,254 @@
+/*-
+ * Copyright (c) 2013 Juniper Networks, Inc.
+ * Copyright (c) 2022 Klara, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _FS_TARFS_TARFS_H_
+#define _FS_TARFS_TARFS_H_
+
+#ifndef _KERNEL
+#error Should only be included by kernel
+#endif
+
+MALLOC_DECLARE(M_TARFSMNT);
+MALLOC_DECLARE(M_TARFSNODE);
+MALLOC_DECLARE(M_TARFSNAME);
+
+#ifdef SYSCTL_DECL
+SYSCTL_DECL(_vfs_tarfs);
+#endif
+
+struct componentname;
+struct mount;
+struct vnode;
+
+/*
+ * Internal representation of a tarfs file system node.
+ */
+struct tarfs_node {
+ TAILQ_ENTRY(tarfs_node) entries;
+ TAILQ_ENTRY(tarfs_node) dirents;
+
+ struct mtx lock;
+
+ struct vnode *vnode;
+ struct tarfs_mount *tmp;
+ enum vtype type;
+ ino_t ino;
+ off_t offset;
+ size_t size;
+ size_t physize;
+ char *name;
+ size_t namelen;
+
+ /* Node attributes */
+ uid_t uid;
+ gid_t gid;
+ mode_t mode;
+ unsigned int flags;
+ nlink_t nlink;
+ struct timespec atime;
+ struct timespec mtime;
+ struct timespec ctime;
+ struct timespec birthtime;
+ unsigned long gen;
+
+ /* Block map */
+ size_t nblk;
+ struct tarfs_blk *blk;
+
+ struct tarfs_node *parent;
+ union {
+ /* VDIR */
+ struct {
+ TAILQ_HEAD(, tarfs_node) dirhead;
+ off_t lastcookie;
+ struct tarfs_node *lastnode;
+ } dir;
+
+ /* VLNK */
+ struct {
+ char *name;
+ size_t namelen;
+ } link;
+
+ /* VBLK or VCHR */
+ dev_t rdev;
+
+ /* VREG */
+ struct tarfs_node *other;
+ };
+};
+
+/*
+ * Entry in sparse file block map.
+ */
+struct tarfs_blk {
+ off_t i; /* input (physical) offset */
+ off_t o; /* output (logical) offset */
+ size_t l; /* length */
+};
+
+/*
+ * Decompression buffer.
+ */
+#define TARFS_ZBUF_SIZE 1048576
+struct tarfs_zbuf {
+ u_char buf[TARFS_ZBUF_SIZE];
+ size_t off; /* offset of contents */
+ size_t len; /* length of contents */
+};
+
+/*
+ * Internal representation of a tarfs mount point.
+ */
+struct tarfs_mount {
+ TAILQ_HEAD(, tarfs_node) allnodes;
+ struct mtx allnode_lock;
+
+ struct g_consumer *cp;
+ struct cdev *dev;
+ struct tarfs_node *root;
+ struct vnode *vp;
+ struct mount *vfs;
+ ino_t ino;
+ struct unrhdr *ino_unr;
+ size_t iosize;
+ size_t nblocks;
+ size_t nfiles;
+ time_t mtime; /* default mtime for directories */
+
+ struct tarfs_zio *zio;
+ struct vnode *znode;
+};
+
+struct tarfs_zio {
+ struct tarfs_mount *tmp;
+
+ /* decompression state */
+#ifdef ZSTDIO
+ struct tarfs_zstd *zstd; /* decompression state (zstd) */
+#endif
+ off_t ipos; /* current input position */
+ off_t opos; /* current output position */
+
+ /* index of compression frames */
+ unsigned int curidx; /* current index position*/
+ unsigned int nidx; /* number of index entries */
+ unsigned int szidx; /* index capacity */
+ struct tarfs_idx { off_t i, o; } *idx;
+};
+
+struct tarfs_fid {
+ u_short len; /* length of data in bytes */
+ u_short data0; /* force alignment */
+ ino_t ino;
+ unsigned long gen;
+};
+
+#define TARFS_NODE_LOCK(tnp) \
+ mtx_lock(&(tnp)->lock)
+#define TARFS_NODE_UNLOCK(tnp) \
+ mtx_unlock(&(tnp)->lock)
+#define TARFS_ALLNODES_LOCK(tnp) \
+ mtx_lock(&(tmp)->allnode_lock)
+#define TARFS_ALLNODES_UNLOCK(tnp) \
+ mtx_unlock(&(tmp)->allnode_lock)
+
+/*
+ * Data and metadata within tar files are aligned on 512-byte boundaries,
+ * to match the block size of the magnetic tapes they were originally
+ * intended for.
+ */
+#define TARFS_BSHIFT 9
+#define TARFS_BLOCKSIZE (size_t)(1U<<TARFS_BSHIFT)
+#define TARFS_BLKOFF(l) ((l) % TARFS_BLOCKSIZE)
+#define TARFS_BLKNUM(l) ((l) >> TARFS_BSHIFT)
+#define TARFS_SZ2BLKS(sz) (((sz) + TARFS_BLOCKSIZE - 1) / TARFS_BLOCKSIZE)
+
+/*
+ * Our preferred I/O size.
+ */
+extern unsigned int tarfs_ioshift;
+#define TARFS_IOSHIFT_MIN TARFS_BSHIFT
+#define TARFS_IOSHIFT_DEFAULT PAGE_SHIFT
+#define TARFS_IOSHIFT_MAX PAGE_SHIFT
+
+#define TARFS_ROOTINO ((ino_t)3)
+#define TARFS_ZIOINO ((ino_t)4)
+#define TARFS_MININO ((ino_t)65535)
+
+#define TARFS_COOKIE_DOT 0
+#define TARFS_COOKIE_DOTDOT 1
+#define TARFS_COOKIE_EOF OFF_MAX
+
+#define TARFS_ZIO_NAME ".tar"
+#define TARFS_ZIO_NAMELEN (sizeof(TARFS_ZIO_NAME) - 1)
+
+extern struct vop_vector tarfs_vnodeops;
+
+static inline
+struct tarfs_mount *
+MP_TO_TARFS_MOUNT(struct mount *mp)
+{
+
+ MPASS(mp != NULL && mp->mnt_data != NULL);
+ return (mp->mnt_data);
+}
+
+static inline
+struct tarfs_node *
+VP_TO_TARFS_NODE(struct vnode *vp)
+{
+
+ MPASS(vp != NULL && vp->v_data != NULL);
+ return (vp->v_data);
+}
+
+int tarfs_alloc_node(struct tarfs_mount *tmp, const char *name,
+ size_t namelen, enum vtype type, off_t off, size_t sz,
+ time_t mtime, uid_t uid, gid_t gid, mode_t mode,
+ unsigned int flags, const char *linkname, dev_t rdev,
+ struct tarfs_node *parent, struct tarfs_node **node);
+int tarfs_load_blockmap(struct tarfs_node *tnp, size_t realsize);
+void tarfs_dump_tree(struct tarfs_node *tnp);
+void tarfs_free_node(struct tarfs_node *tnp);
+struct tarfs_node *
+ tarfs_lookup_dir(struct tarfs_node *tnp, off_t cookie);
+struct tarfs_node *
+ tarfs_lookup_node(struct tarfs_node *tnp, struct tarfs_node *f,
+ struct componentname *cnp);
+void tarfs_print_node(struct tarfs_node *tnp);
+int tarfs_read_file(struct tarfs_node *tnp, size_t len, struct uio *uiop);
+
+int tarfs_io_init(struct tarfs_mount *tmp);
+int tarfs_io_fini(struct tarfs_mount *tmp);
+int tarfs_io_read(struct tarfs_mount *tmp, bool raw,
+ struct uio *uiop);
+ssize_t tarfs_io_read_buf(struct tarfs_mount *tmp, bool raw,
+ void *buf, off_t off, size_t len);
+unsigned int
+ tarfs_strtofflags(const char *str, char **end);
+
+#endif /* _FS_TARFS_TARFS_H_ */
diff --git a/sys/fs/tarfs/tarfs_dbg.h b/sys/fs/tarfs/tarfs_dbg.h
new file mode 100644
--- /dev/null
+++ b/sys/fs/tarfs/tarfs_dbg.h
@@ -0,0 +1,63 @@
+/*-
+ * Copyright (c) 2013 Juniper Networks, Inc.
+ * Copyright (c) 2022 Klara, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _FS_TARFS_TARFS_DBG_H_
+#define _FS_TARFS_TARFS_DBG_H_
+
+#ifndef _KERNEL
+#error Should only be included by kernel
+#endif
+
+#ifdef TARFS_DEBUG
+extern int tarfs_debug;
+
+#define TARFS_DEBUG_ALLOC 0x01
+#define TARFS_DEBUG_CHECKSUM 0x02
+#define TARFS_DEBUG_FS 0x04
+#define TARFS_DEBUG_LOOKUP 0x08
+#define TARFS_DEBUG_VNODE 0x10
+#define TARFS_DEBUG_IO 0x20
+#define TARFS_DEBUG_ZIO 0x40
+#define TARFS_DEBUG_ZIDX 0x80
+#define TARFS_DEBUG_MAP 0x100
+
+#define TARFS_DPF(category, fmt, ...) \
+ do { \
+ if ((tarfs_debug & TARFS_DEBUG_##category) != 0) \
+ printf(fmt, ## __VA_ARGS__); \
+ } while (0)
+#define TARFS_DPF_IFF(category, cond, fmt, ...) \
+ do { \
+ if ((cond) \
+ && (tarfs_debug & TARFS_DEBUG_##category) != 0) \
+ printf(fmt, ## __VA_ARGS__); \
+ } while (0)
+#else
+#define TARFS_DPF(category, fmt, ...)
+#define TARFS_DPF_IFF(category, cond, fmt, ...)
+#endif
+
+#endif /* _FS_TARFS_TARFS_DBG_H_ */
diff --git a/sys/fs/tarfs/tarfs_io.c b/sys/fs/tarfs/tarfs_io.c
new file mode 100644
--- /dev/null
+++ b/sys/fs/tarfs/tarfs_io.c
@@ -0,0 +1,663 @@
+/*-
+ * Copyright (c) 2013 Juniper Networks, Inc.
+ * Copyright (c) 2022 Klara, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_tarfs.h"
+#include "opt_zstdio.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/counter.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/sysctl.h>
+#include <sys/uio.h>
+#include <sys/vnode.h>
+
+#ifdef ZSTDIO
+#define ZSTD_STATIC_LINKING_ONLY
+#include <contrib/zstd/lib/zstd.h>
+#endif
+
+#include <fs/tarfs/tarfs.h>
+#include <fs/tarfs/tarfs_dbg.h>
+
+#ifdef TARFS_DEBUG
+SYSCTL_NODE(_vfs_tarfs, OID_AUTO, zio, CTLFLAG_RD, 0,
+ "Tar filesystem decompression layer");
+COUNTER_U64_DEFINE_EARLY(tarfs_zio_inflated);
+SYSCTL_COUNTER_U64(_vfs_tarfs_zio, OID_AUTO, inflated, CTLFLAG_RD,
+ &tarfs_zio_inflated, "Amount of compressed data inflated.");
+COUNTER_U64_DEFINE_EARLY(tarfs_zio_consumed);
+SYSCTL_COUNTER_U64(_vfs_tarfs_zio, OID_AUTO, consumed, CTLFLAG_RD,
+ &tarfs_zio_consumed, "Amount of compressed data consumed.");
+
+static int
+tarfs_sysctl_handle_zio_reset(SYSCTL_HANDLER_ARGS)
+{
+ unsigned int tmp;
+ int error;
+
+ tmp = 0;
+ if ((error = SYSCTL_OUT(req, &tmp, sizeof(tmp))) != 0)
+ return (error);
+ if (req->newptr != NULL) {
+ if ((error = SYSCTL_IN(req, &tmp, sizeof(tmp))) != 0)
+ return (error);
+ counter_u64_zero(tarfs_zio_inflated);
+ counter_u64_zero(tarfs_zio_consumed);
+ }
+ return (0);
+}
+
+SYSCTL_PROC(_vfs_tarfs_zio, OID_AUTO, reset,
+ CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW,
+ NULL, 0, tarfs_sysctl_handle_zio_reset, "IU",
+ "Reset compression counters.");
+#endif
+
+MALLOC_DEFINE(M_TARFSZSTATE, "tarfs zstate", "tarfs decompression state");
+MALLOC_DEFINE(M_TARFSZBUF, "tarfs zbuf", "tarfs decompression buffers");
+
+#define XZ_MAGIC (uint8_t[]){ 0xfd, 0x37, 0x7a, 0x58, 0x5a }
+#define ZLIB_MAGIC (uint8_t[]){ 0x1f, 0x8b, 0x08 }
+#define ZSTD_MAGIC (uint8_t[]){ 0x28, 0xb5, 0x2f, 0xfd }
+
+#ifdef ZSTDIO
+struct tarfs_zstd {
+ ZSTD_DStream *zds;
+};
+#endif
+
+/* XXX review use of curthread / uio_td / td_cred */
+
+/*
+ * Reads from the tar file according to the provided uio. If the archive
+ * is compressed and raw is false, reads the decompressed stream;
+ * otherwise, reads directly from the original file. Returns 0 on success
+ * and a positive errno value on failure.
+ */
+int
+tarfs_io_read(struct tarfs_mount *tmp, bool raw, struct uio *uiop)
+{
+ void *rl = NULL;
+ off_t off = uiop->uio_offset;
+ size_t len = uiop->uio_resid;
+ int error;
+
+ if (raw || tmp->znode == NULL) {
+ error = vn_lock(tmp->vp, LK_EXCLUSIVE);
+ if (error == 0) {
+ rl = vn_rangelock_rlock(tmp->vp, off, off + len);
+ error = VOP_READ(tmp->vp, uiop, IO_DIRECT,
+ uiop->uio_td->td_ucred);
+ vn_rangelock_unlock(tmp->vp, rl);
+ VOP_UNLOCK(tmp->vp);
+ }
+ } else {
+ error = vn_lock(tmp->znode, LK_EXCLUSIVE);
+ if (error == 0) {
+ error = VOP_READ(tmp->znode, uiop, IO_DIRECT,
+ uiop->uio_td->td_ucred);
+ VOP_UNLOCK(tmp->znode);
+ }
+ }
+ TARFS_DPF(IO, "%s(%zu, %zu) = %d (resid %zd)\n", __func__,
+ (size_t)off, len, error, uiop->uio_resid);
+ return (error);
+}
+
+/*
+ * Reads from the tar file into the provided buffer. If the archive is
+ * compressed and raw is false, reads the decompressed stream; otherwise,
+ * reads directly from the original file. Returns the number of bytes
+ * read on success, 0 on EOF, and a negative errno value on failure.
+ */
+ssize_t
+tarfs_io_read_buf(struct tarfs_mount *tmp, bool raw,
+ void *buf, off_t off, size_t len)
+{
+ struct uio auio;
+ struct iovec aiov;
+ ssize_t res;
+ int error;
+
+ if (len == 0) {
+ TARFS_DPF(IO, "%s(%zu, %zu) null\n", __func__,
+ (size_t)off, len);
+ return (0);
+ }
+ aiov.iov_base = buf;
+ aiov.iov_len = len;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_offset = off;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_rw = UIO_READ;
+ auio.uio_resid = len;
+ auio.uio_td = curthread;
+ error = tarfs_io_read(tmp, raw, &auio);
+ if (error != 0) {
+ TARFS_DPF(IO, "%s(%zu, %zu) error %d\n", __func__,
+ (size_t)off, len, error);
+ return (-error);
+ }
+ res = len - auio.uio_resid;
+ if (res == 0 && len != 0) {
+ TARFS_DPF(IO, "%s(%zu, %zu) eof\n", __func__,
+ (size_t)off, len);
+ } else {
+ TARFS_DPF(IO, "%s(%zu, %zu) read %zd | %*D\n", __func__,
+ (size_t)off, len, res,
+ (int)(res > 8 ? 8 : res), (uint8_t *)buf, " ");
+ }
+ return (res);
+}
+
+#ifdef ZSTDIO
+static void *
+tarfs_zstate_alloc(void *opaque, size_t size)
+{
+
+ (void)opaque;
+ return (malloc(size, M_TARFSZSTATE, M_WAITOK));
+}
+#endif
+
+#ifdef ZSTDIO
+static void
+tarfs_zstate_free(void *opaque, void *address)
+{
+
+ (void)opaque;
+ free(address, M_TARFSZSTATE);
+}
+#endif
+
+#ifdef ZSTDIO
+static ZSTD_customMem tarfs_zstd_mem = {
+ tarfs_zstate_alloc,
+ tarfs_zstate_free,
+ NULL,
+};
+#endif
+
+/*
+ * Updates the decompression frame index, recording the current input and
+ * output offsets in a new index entry, and growing the index if
+ * necessary.
+ */
+static void
+tarfs_zio_update_index(struct tarfs_zio *zio, off_t i, off_t o)
+{
+
+ if (++zio->curidx >= zio->nidx) {
+ if (++zio->nidx > zio->szidx) {
+ zio->szidx *= 2;
+ zio->idx = realloc(zio->idx,
+ zio->szidx * sizeof(*zio->idx),
+ M_TARFSZSTATE, M_ZERO | M_WAITOK);
+ TARFS_DPF(ALLOC, "%s: resized zio index\n", __func__);
+ }
+ zio->idx[zio->curidx].i = i;
+ zio->idx[zio->curidx].o = o;
+ TARFS_DPF(ZIDX, "%s: index %u = i %zu o %zu\n", __func__,
+ zio->curidx, (size_t)zio->idx[zio->curidx].i,
+ (size_t)zio->idx[zio->curidx].o);
+ }
+ MPASS(zio->idx[zio->curidx].i == i);
+ MPASS(zio->idx[zio->curidx].o == o);
+}
+
+/*
+ * VOP_ACCESS for zio node.
+ */
+static int
+tarfs_zaccess(struct vop_access_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ struct tarfs_zio *zio = vp->v_data;
+ struct tarfs_mount *tmp = zio->tmp;
+ accmode_t accmode = ap->a_accmode;
+ int error = EPERM;
+
+ if (accmode == VREAD)
+ error = VOP_ACCESS(tmp->vp, accmode, ap->a_cred, ap->a_td);
+ TARFS_DPF(ZIO, "%s(%d) = %d\n", __func__, accmode, error);
+ return (error);
+}
+
+/*
+ * VOP_GETATTR for zio node.
+ */
+static int
+tarfs_zgetattr(struct vop_getattr_args *ap)
+{
+ struct vattr va;
+ struct vnode *vp = ap->a_vp;
+ struct tarfs_zio *zio = vp->v_data;
+ struct tarfs_mount *tmp = zio->tmp;
+ struct vattr *vap = ap->a_vap;
+ int error = 0;
+
+ VATTR_NULL(vap);
+ error = VOP_GETATTR(tmp->vp, &va, ap->a_cred);
+ if (error == 0) {
+ vap->va_type = VREG;
+ vap->va_mode = va.va_mode;
+ vap->va_nlink = 1;
+ vap->va_gid = va.va_gid;
+ vap->va_uid = va.va_uid;
+ vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
+ vap->va_fileid = TARFS_ZIOINO;
+ vap->va_size = zio->idx[zio->nidx - 1].o;
+ vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
+ vap->va_atime = va.va_atime;
+ vap->va_ctime = va.va_ctime;
+ vap->va_mtime = va.va_mtime;
+ vap->va_birthtime = tmp->root->birthtime;
+ vap->va_bytes = va.va_bytes;
+ }
+ TARFS_DPF(ZIO, "%s() = %d\n", __func__, error);
+ return (error);
+}
+
+/*
+ * VOP_READ for zio node.
+ */
+static int
+tarfs_zread(struct vop_read_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ struct tarfs_zio *zio = vp->v_data;
+ struct tarfs_mount *tmp = zio->tmp;
+ struct uio *uiop = ap->a_uio;
+ struct buf *bp;
+ off_t off = uiop->uio_offset;
+ size_t len = uiop->uio_resid;
+ int error;
+
+ error = bread(vp, off / tmp->iosize,
+ (off + len + tmp->iosize - 1) / tmp->iosize - off / tmp->iosize,
+ uiop->uio_td->td_ucred, &bp);
+ if (error == 0) {
+ if (off % tmp->iosize + len > bp->b_bufsize)
+ len = bp->b_bufsize - off % tmp->iosize;
+ error = uiomove(bp->b_data + off % tmp->iosize, len, uiop);
+ brelse(bp);
+ }
+ TARFS_DPF(ZIO, "%s(%zu, %zu) = %d (resid %zd)\n", __func__,
+ (size_t)off, len, error, uiop->uio_resid);
+ return (error);
+}
+
+/*
+ * VOP_RECLAIM for zio node.
+ */
+static int
+tarfs_zreclaim(struct vop_reclaim_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+
+ TARFS_DPF(ZIO, "%s(%p)\n", __func__, vp);
+ vp->v_data = NULL;
+ vnode_destroy_vobject(vp);
+ cache_purge(vp);
+ return (0);
+}
+
+#ifdef ZSTDIO
+/*
+ * VOP_STRATEGY for zio node, zstd edition.
+ */
+static int
+tarfs_zstrategy_zstd(struct tarfs_zio *zio, struct buf *bp)
+{
+ void *buf = NULL, *rl = NULL;
+ struct uio auio;
+ struct iovec aiov;
+ struct tarfs_mount *tmp = zio->tmp;
+ struct tarfs_zstd *zstd = zio->zstd;
+ struct vattr va;
+ ZSTD_inBuffer zib;
+ ZSTD_outBuffer zob;
+ off_t ipos, opos;
+ size_t ilen, olen;
+ size_t zerror;
+ off_t off = bp->b_blkno * tmp->iosize;
+ size_t len = bp->b_bufsize;
+ size_t bsize;
+ int error;
+ bool reset = false;
+
+ TARFS_DPF(ZIO, "%s: bufsize %ld bcount %ld resid %ld\n", __func__,
+ bp->b_bufsize, bp->b_bcount, bp->b_resid);
+
+ /* lock tarball */
+ error = vn_lock(tmp->vp, LK_EXCLUSIVE);
+ if (error != 0) {
+ goto fail_unlocked;
+ }
+
+ /* check size */
+ error = VOP_GETATTR(tmp->vp, &va, bp->b_rcred);
+ if (error != 0) {
+ goto fail;
+ }
+ /* do we have to rewind? */
+ if (off < zio->opos) {
+ while (zio->curidx > 0 && off < zio->idx[zio->curidx].o)
+ zio->curidx--;
+ reset = true;
+ }
+ /* advance to the nearest index entry */
+ if (off > zio->opos) {
+ // XXX maybe do a binary search instead
+ while (zio->curidx < zio->nidx - 1 &&
+ off >= zio->idx[zio->curidx + 1].o) {
+ zio->curidx++;
+ reset = true;
+ }
+ }
+ /* reset the decompression stream if needed */
+ if (reset) {
+ zio->ipos = zio->idx[zio->curidx].i;
+ zio->opos = zio->idx[zio->curidx].o;
+ ZSTD_resetDStream(zstd->zds);
+ TARFS_DPF(ZIDX, "%s: skipping to index %u = i %zu o %zu\n", __func__,
+ zio->curidx, (size_t)zio->ipos, (size_t)zio->opos);
+ } else {
+ TARFS_DPF(ZIDX, "%s: continuing at i %zu o %zu\n", __func__,
+ (size_t)zio->ipos, (size_t)zio->opos);
+ }
+ if (zio->ipos >= va.va_size) {
+ error = EIO;
+ goto fail;
+ }
+ MPASS(zio->opos <= off);
+ bsize = MAXBSIZE; // XXX should probably use ZSTD_CStreamOutSize()
+ buf = malloc(bsize, M_TEMP, M_WAITOK);
+ zib.src = NULL;
+ zib.size = 0;
+ zib.pos = 0;
+ zob.dst = bp->b_data;
+ zob.size = bp->b_bufsize;
+ zob.pos = 0;
+ bp->b_resid = len;
+ error = 0;
+ rl = vn_rangelock_rlock(tmp->vp, zio->ipos, OFF_MAX);
+ while (bp->b_resid > 0) {
+ if (zib.pos == zib.size) {
+ /* request data from the underlying file */
+ aiov.iov_base = buf;
+ aiov.iov_len = bsize;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_offset = zio->ipos;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_rw = UIO_READ;
+ auio.uio_resid = aiov.iov_len;
+ auio.uio_td = curthread;
+ error = VOP_READ(tmp->vp, &auio, IO_DIRECT, bp->b_rcred);
+ if (error != 0)
+ goto fail;
+ TARFS_DPF(ZIO, "%s: req %zu+%zu got %zu+%zu\n", __func__,
+ (size_t)zio->ipos, bsize,
+ (size_t)zio->ipos, bsize - auio.uio_resid);
+ zib.src = buf;
+ zib.size = bsize - auio.uio_resid;
+ zib.pos = 0;
+ }
+ MPASS(zib.pos <= zib.size);
+ if (zib.pos == zib.size) {
+ TARFS_DPF(ZIO, "%s: end of file after i %zu o %zu\n", __func__,
+ (size_t)zio->ipos, (size_t)zio->opos);
+ goto fail;
+ }
+ if (zio->opos < off) {
+ /* to be discarded */
+ zob.size = min(off - zio->opos, bp->b_bufsize);
+ zob.pos = 0;
+ } else {
+ zob.size = bp->b_bufsize;
+ zob.pos = zio->opos - off;
+ if (zob.size > zob.pos + bp->b_resid)
+ zob.size = zob.pos + bp->b_resid;
+ }
+ ipos = zib.pos;
+ opos = zob.pos;
+ /* decompress as much as possible */
+ zerror = ZSTD_decompressStream(zstd->zds, &zob, &zib);
+ zio->ipos += ilen = zib.pos - ipos;
+ zio->opos += olen = zob.pos - opos;
+ if (zio->opos > off)
+ bp->b_resid -= olen;
+ if (ZSTD_isError(zerror)) {
+ TARFS_DPF(ZIO, "%s: inflate failed after i %zu o %zu: %s\n", __func__,
+ (size_t)zio->ipos, (size_t)zio->opos, ZSTD_getErrorName(zerror));
+ error = EIO;
+ goto fail;
+ }
+ if (zerror == 0 && olen == 0) {
+ TARFS_DPF(ZIO, "%s: end of stream after i %zu o %zu\n", __func__,
+ (size_t)zio->ipos, (size_t)zio->opos);
+ break;
+ }
+ if (zerror == 0) {
+ TARFS_DPF(ZIO, "%s: end of frame after i %zu o %zu\n", __func__,
+ (size_t)zio->ipos, (size_t)zio->opos);
+ tarfs_zio_update_index(zio, zio->ipos, zio->opos);
+ }
+ TARFS_DPF(ZIO, "%s: inflated %zu\n", __func__, olen);
+#ifdef TARFS_DEBUG
+ counter_u64_add(tarfs_zio_inflated, olen);
+#endif
+ }
+fail:
+ if (rl != NULL)
+ vn_rangelock_unlock(tmp->vp, rl);
+ VOP_UNLOCK(tmp->vp);
+fail_unlocked:
+ if (buf != NULL)
+ free(buf, M_TEMP);
+ TARFS_DPF(ZIO, "%s(%zu, %zu) = %d (resid %zd)\n", __func__,
+ (size_t)off, len, error, bp->b_resid);
+#ifdef TARFS_DEBUG
+ counter_u64_add(tarfs_zio_consumed, len - bp->b_resid);
+#endif
+ bp->b_flags |= B_DONE;
+ bp->b_error = error;
+ if (error != 0) {
+ bp->b_ioflags |= BIO_ERROR;
+ zio->curidx = 0;
+ zio->ipos = zio->idx[0].i;
+ zio->opos = zio->idx[0].o;
+ ZSTD_resetDStream(zstd->zds);
+ }
+ return (0);
+}
+#endif
+
+/*
+ * VOP_STRATEGY for zio node.
+ */
+static int
+tarfs_zstrategy(struct vop_strategy_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ struct buf *bp = ap->a_bp;
+ struct tarfs_zio *zio = vp->v_data;
+
+#ifdef ZSTDIO
+ if (zio->zstd != NULL) {
+ return (tarfs_zstrategy_zstd(zio, bp));
+ }
+#endif
+ bp->b_flags |= B_DONE;
+ bp->b_ioflags |= BIO_ERROR;
+ bp->b_error = EFTYPE;
+ return (0);
+}
+
+static struct vop_vector tarfs_znodeops = {
+ .vop_default = &default_vnodeops,
+
+ .vop_access = tarfs_zaccess,
+ .vop_getattr = tarfs_zgetattr,
+ .vop_read = tarfs_zread,
+ .vop_reclaim = tarfs_zreclaim,
+ .vop_strategy = tarfs_zstrategy,
+};
+VFS_VOP_VECTOR_REGISTER(tarfs_znodeops);
+
+/*
+ * Initializes the decompression layer.
+ */
+static struct tarfs_zio *
+tarfs_zio_init(struct tarfs_mount *tmp, off_t i, off_t o)
+{
+ struct tarfs_zio *zio;
+ struct vnode *zvp;
+
+ zio = malloc(sizeof(*zio), M_TARFSZSTATE, M_ZERO | M_WAITOK);
+ TARFS_DPF(ALLOC, "%s: allocated zio\n", __func__);
+ zio->tmp = tmp;
+ zio->szidx = 128;
+ zio->idx = malloc(zio->szidx * sizeof(*zio->idx), M_TARFSZSTATE,
+ M_ZERO | M_WAITOK);
+ zio->curidx = 0;
+ zio->nidx = 1;
+ zio->idx[zio->curidx].i = zio->ipos = i;
+ zio->idx[zio->curidx].o = zio->opos = o;
+ tmp->zio = zio;
+ TARFS_DPF(ALLOC, "%s: allocated zio index\n", __func__);
+ getnewvnode("tarfs", tmp->vfs, &tarfs_znodeops, &zvp);
+ zvp->v_data = zio;
+ zvp->v_type = VREG;
+ zvp->v_mount = tmp->vfs;
+ tmp->znode = zvp;
+ TARFS_DPF(ZIO, "%s: created zio node\n", __func__);
+ return (zio);
+}
+
+/*
+ * Initializes the I/O layer, including decompression if the signature of
+ * a supported compression format is detected. Returns 0 on success and a
+ * positive errno value on failure.
+ */
+int
+tarfs_io_init(struct tarfs_mount *tmp)
+{
+ uint8_t *block;
+ struct tarfs_zio *zio = NULL;
+ ssize_t res;
+ int error = 0;
+
+ block = malloc(tmp->iosize, M_TEMP, M_ZERO | M_WAITOK);
+ res = tarfs_io_read_buf(tmp, true, block, 0, tmp->iosize);
+ if (res < 0) {
+ return (-res);
+ }
+ if (memcmp(block, XZ_MAGIC, sizeof(XZ_MAGIC)) == 0) {
+ printf("xz compression not supported\n");
+ error = EOPNOTSUPP;
+ goto bad;
+ } else if (memcmp(block, ZLIB_MAGIC, sizeof(ZLIB_MAGIC)) == 0) {
+ printf("zlib compression not supported\n");
+ error = EOPNOTSUPP;
+ goto bad;
+ } else if (memcmp(block, ZSTD_MAGIC, sizeof(ZSTD_MAGIC)) == 0) {
+#ifdef ZSTDIO
+ zio = tarfs_zio_init(tmp, 0, 0);
+ zio->zstd = malloc(sizeof(*zio->zstd), M_TARFSZSTATE, M_WAITOK);
+ zio->zstd->zds = ZSTD_createDStream_advanced(tarfs_zstd_mem);
+ (void)ZSTD_initDStream(zio->zstd->zds);
+#else
+ printf("zstd compression not supported\n");
+ error = EOPNOTSUPP;
+ goto bad;
+#endif
+ }
+bad:
+ free(block, M_TEMP);
+ return (error);
+}
+
+/*
+ * Tears down the decompression layer.
+ */
+static int
+tarfs_zio_fini(struct tarfs_mount *tmp)
+{
+ struct tarfs_zio *zio = tmp->zio;
+ int error = 0;
+
+ if (tmp->znode != NULL) {
+ error = vn_lock(tmp->znode, LK_EXCLUSIVE);
+ if (error != 0) {
+ TARFS_DPF(ALLOC, "%s: failed to lock znode", __func__);
+ return (error);
+ }
+ tmp->znode->v_mount = NULL;
+ vgone(tmp->znode);
+ vput(tmp->znode);
+ tmp->znode = NULL;
+ }
+#ifdef ZSTDIO
+ if (zio->zstd != NULL) {
+ TARFS_DPF(ALLOC, "%s: freeing zstd state\n", __func__);
+ ZSTD_freeDStream(zio->zstd->zds);
+ free(zio->zstd, M_TARFSZSTATE);
+ }
+#endif
+ if (zio->idx != NULL) {
+ TARFS_DPF(ALLOC, "%s: freeing index\n", __func__);
+ free(zio->idx, M_TARFSZSTATE);
+ }
+ TARFS_DPF(ALLOC, "%s: freeing zio\n", __func__);
+ free(zio, M_TARFSZSTATE);
+ tmp->zio = NULL;
+ return (error);
+}
+
+/*
+ * Tears down the I/O layer, including the decompression layer if
+ * applicable.
+ */
+int
+tarfs_io_fini(struct tarfs_mount *tmp)
+{
+ int error = 0;
+
+ if (tmp->zio != NULL) {
+ error = tarfs_zio_fini(tmp);
+ }
+ return (error);
+}
diff --git a/sys/fs/tarfs/tarfs_subr.c b/sys/fs/tarfs/tarfs_subr.c
new file mode 100644
--- /dev/null
+++ b/sys/fs/tarfs/tarfs_subr.c
@@ -0,0 +1,604 @@
+/*-
+ * Copyright (c) 2013 Juniper Networks, Inc.
+ * Copyright (c) 2022 Klara, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_tarfs.h"
+
+#include <sys/param.h>
+#include <sys/stat.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/fcntl.h>
+#include <sys/libkern.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/proc.h>
+#include <sys/queue.h>
+#include <sys/sysctl.h>
+#include <sys/vnode.h>
+
+#include <vm/vm_param.h>
+
+#include <fs/tarfs/tarfs.h>
+#include <fs/tarfs/tarfs_dbg.h>
+
+MALLOC_DEFINE(M_TARFSNAME, "tarfs name", "tarfs file names");
+MALLOC_DEFINE(M_TARFSBLK, "tarfs blk", "tarfs block maps");
+
+SYSCTL_NODE(_vfs, OID_AUTO, tarfs, CTLFLAG_RW, 0, "Tar filesystem");
+
+unsigned int tarfs_ioshift = TARFS_IOSHIFT_DEFAULT;
+
+static int
+tarfs_sysctl_handle_ioshift(SYSCTL_HANDLER_ARGS)
+{
+ unsigned int tmp;
+ int error;
+
+ tmp = *(unsigned int *)arg1;
+ if ((error = SYSCTL_OUT(req, &tmp, sizeof(tmp))) != 0)
+ return (error);
+ if (req->newptr != NULL) {
+ if ((error = SYSCTL_IN(req, &tmp, sizeof(tmp))) != 0)
+ return (error);
+ if (tmp == 0)
+ tmp = TARFS_IOSHIFT_DEFAULT;
+ if (tmp < TARFS_IOSHIFT_MIN)
+ tmp = TARFS_IOSHIFT_MIN;
+ if (tmp > TARFS_IOSHIFT_MAX)
+ tmp = TARFS_IOSHIFT_MAX;
+ *(unsigned int *)arg1 = tmp;
+ }
+ return (0);
+}
+
+SYSCTL_PROC(_vfs_tarfs, OID_AUTO, ioshift,
+ CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW | CTLFLAG_TUN,
+ &tarfs_ioshift, 0, tarfs_sysctl_handle_ioshift, "IU",
+ "Tar filesystem preferred I/O size (log 2)");
+
+#ifdef TARFS_DEBUG
+int tarfs_debug;
+SYSCTL_INT(_vfs_tarfs, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_TUN,
+ &tarfs_debug, 0, "Tar filesystem debug mask");
+#endif /* TARFS_DEBUG */
+
+static void
+tarfs_dump_tree_internal(struct tarfs_node *tnp, int indent)
+{
+ struct tarfs_node *current;
+ const char *name;
+
+ if (tnp->type != VDIR)
+ return;
+
+ TAILQ_FOREACH(current, &tnp->dir.dirhead, dirents) {
+ if (current->name == NULL)
+ name = "<<root>>";
+ else
+ name = current->name;
+ printf("%*s%s\n", indent * 4, "", name);
+ if (current->type == VDIR)
+ tarfs_dump_tree_internal(current, indent + 1);
+ }
+}
+
+void
+tarfs_dump_tree(struct tarfs_node *tnp)
+{
+ const char *name;
+
+ if (tnp == NULL)
+ return;
+
+ if (tnp->name == NULL)
+ name = "<<root>>";
+ else
+ name = tnp->name;
+ printf("%s\n", name);
+
+ tarfs_dump_tree_internal(tnp, 1);
+}
+
+void
+tarfs_print_node(struct tarfs_node *tnp)
+{
+
+ if (tnp == NULL)
+ return;
+
+ printf("%s: node %p\n", __func__, tnp);
+ printf("\tvnode %p\n", tnp->vnode);
+ printf("\ttmp %p\n", tnp->tmp);
+ printf("\ttype %d\n", tnp->type);
+ printf("\tino %lu\n", tnp->ino);
+ printf("\tsize %zu\n", tnp->size);
+ printf("\tname %s\n",
+ (tnp->name == NULL) ? "<<root>>" : tnp->name);
+ printf("\tnamelen %zu\n", tnp->namelen);
+ printf("\tuid %d\n", tnp->uid);
+ printf("\tgid %d\n", tnp->gid);
+ printf("\tmode o%o\n", tnp->mode);
+ printf("\tflags %u\n", tnp->flags);
+ printf("\tnlink %lu\n", tnp->nlink);
+ printf("\tatime %d\n", (int)tnp->atime.tv_sec);
+ printf("\tmtime %d\n", (int)tnp->mtime.tv_sec);
+ printf("\tctime %d\n", (int)tnp->ctime.tv_sec);
+ printf("\tbirthtime %d\n", (int)tnp->birthtime.tv_sec);
+ printf("\tgen %lu\n", tnp->gen);
+ printf("\tparent %p\n", tnp->parent);
+
+ switch (tnp->type) {
+ case VDIR:
+ printf("\tdir.lastcookie %jd\n",
+ tnp->dir.lastcookie);
+ printf("\tdir.lastnode %p\n", tnp->dir.lastnode);
+ break;
+ case VBLK:
+ case VCHR:
+ printf("\trdev %lu\n", tnp->rdev);
+ break;
+ default:
+ break;
+ }
+}
+
+struct tarfs_node *
+tarfs_lookup_node(struct tarfs_node *tnp, struct tarfs_node *f,
+ struct componentname *cnp)
+{
+ boolean_t found;
+ struct tarfs_node *entry;
+
+ TARFS_DPF(LOOKUP, "%s: name: %.*s\n", __func__, (int)cnp->cn_namelen,
+ cnp->cn_nameptr);
+
+ found = false;
+ TAILQ_FOREACH(entry, &tnp->dir.dirhead, dirents) {
+ if (f != NULL && entry != f)
+ continue;
+
+ if (entry->namelen == cnp->cn_namelen &&
+ bcmp(entry->name, cnp->cn_nameptr,
+ entry->namelen) == 0) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (found) {
+ if (entry->type == VREG && entry->other != NULL) {
+ TARFS_DPF_IFF(LOOKUP, "%s: following hard link %p\n",
+ __func__, entry);
+ entry = entry->other;
+ }
+ TARFS_DPF(LOOKUP, "%s: found tarfs_node %p\n", __func__,
+ entry);
+ return (entry);
+ }
+
+ TARFS_DPF(LOOKUP, "%s: no match found\n", __func__);
+ return (NULL);
+}
+
+struct tarfs_node *
+tarfs_lookup_dir(struct tarfs_node *tnp, off_t cookie)
+{
+ struct tarfs_node *current;
+
+ TARFS_DPF(LOOKUP, "%s: tarfs_node %p, cookie %jd\n", __func__, tnp,
+ cookie);
+ TARFS_DPF(LOOKUP, "%s: name: %s\n", __func__,
+ (tnp->name == NULL) ? "<<root>>" : tnp->name);
+
+ if (cookie == tnp->dir.lastcookie &&
+ tnp->dir.lastnode != NULL) {
+ TARFS_DPF(LOOKUP, "%s: Using cached entry: tarfs_node %p, "
+ "cookie %jd\n", __func__, tnp->dir.lastnode,
+ tnp->dir.lastcookie);
+ return (tnp->dir.lastnode);
+ }
+
+ TAILQ_FOREACH(current, &tnp->dir.dirhead, dirents) {
+ TARFS_DPF(LOOKUP, "%s: tarfs_node %p, current %p, ino %lu\n",
+ __func__, tnp, current, current->ino);
+ TARFS_DPF_IFF(LOOKUP, current->name != NULL,
+ "%s: name: %s\n", __func__, current->name);
+ if (current->ino == cookie) {
+ TARFS_DPF(LOOKUP, "%s: Found entry: tarfs_node %p, "
+ "cookie %lu\n", __func__, current,
+ current->ino);
+ break;
+ }
+ }
+
+ return (current);
+}
+
+int
+tarfs_alloc_node(struct tarfs_mount *tmp, const char *name, size_t namelen,
+ enum vtype type, off_t off, size_t sz, time_t mtime, uid_t uid, gid_t gid,
+ mode_t mode, unsigned int flags, const char *linkname, dev_t rdev,
+ struct tarfs_node *parent, struct tarfs_node **retnode)
+{
+ struct tarfs_node *tnp;
+
+ TARFS_DPF(ALLOC, "%s(%.*s)\n", __func__, (int)namelen, name);
+
+ tnp = malloc(sizeof(struct tarfs_node), M_TARFSNODE, M_WAITOK | M_ZERO);
+ mtx_init(&tnp->lock, "tarfs node lock", NULL, MTX_DEF);
+ tnp->gen = arc4random();
+ tnp->tmp = tmp;
+ if (namelen > 0) {
+ tnp->name = malloc(namelen + 1, M_TARFSNAME, M_WAITOK);
+ tnp->namelen = namelen;
+ memcpy(tnp->name, name, namelen);
+ tnp->name[namelen] = '\0';
+ }
+ tnp->type = type;
+ tnp->uid = uid;
+ tnp->gid = gid;
+ tnp->mode = mode;
+ tnp->nlink = 1;
+ vfs_timestamp(&tnp->atime);
+ tnp->mtime.tv_sec = mtime;
+ tnp->birthtime = tnp->atime;
+ tnp->ctime = tnp->mtime;
+ if (parent != NULL) {
+ tnp->ino = alloc_unr(tmp->ino_unr);
+ }
+ tnp->offset = off;
+ tnp->size = tnp->physize = sz;
+ switch (type) {
+ case VDIR:
+ MPASS(parent != tnp);
+ MPASS(parent != NULL || tmp->root == NULL);
+ TAILQ_INIT(&tnp->dir.dirhead);
+ tnp->nlink++;
+ if (parent == NULL) {
+ tnp->ino = TARFS_ROOTINO;
+ }
+ tnp->physize = 0;
+ break;
+ case VLNK:
+ tnp->link.name = malloc(sz + 1, M_TARFSNAME,
+ M_WAITOK);
+ tnp->link.namelen = sz;
+ memcpy(tnp->link.name, linkname, sz);
+ tnp->link.name[sz] = '\0';
+ break;
+ case VREG:
+ /* create dummy block map */
+ tnp->nblk = 1;
+ tnp->blk = malloc(sizeof(*tnp->blk), M_TARFSBLK, M_WAITOK);
+ tnp->blk[0].i = 0;
+ tnp->blk[0].o = 0;
+ tnp->blk[0].l = tnp->physize;
+ break;
+ case VFIFO:
+ /* Nothing extra to do */
+ break;
+ case VBLK:
+ case VCHR:
+ tnp->rdev = rdev;
+ tnp->physize = 0;
+ break;
+ default:
+ panic("%s: type %d not allowed", __func__, type);
+ }
+ if (parent != NULL) {
+ MPASS(parent->type == VDIR);
+ TARFS_NODE_LOCK(parent);
+ TAILQ_INSERT_TAIL(&parent->dir.dirhead, tnp, dirents);
+ parent->size += sizeof(struct tarfs_node);
+ tnp->parent = parent;
+ if (type == VDIR) {
+ parent->nlink++;
+ }
+ TARFS_NODE_UNLOCK(parent);
+ } else {
+ tnp->parent = tnp;
+ }
+ MPASS(tnp->ino != 0);
+
+ TARFS_ALLNODES_LOCK(tmp);
+ TAILQ_INSERT_TAIL(&tmp->allnodes, tnp, entries);
+ TARFS_ALLNODES_UNLOCK(tmp);
+
+ *retnode = tnp;
+ tmp->nfiles++;
+ return (0);
+}
+
+#define is09(ch) ((ch) >= '0' && (ch) <= '9')
+
+int
+tarfs_load_blockmap(struct tarfs_node *tnp, size_t realsize)
+{
+ struct tarfs_blk *blk = NULL;
+ char *map = NULL;
+ size_t nmap = 0, nblk = 0;
+ char *p, *q;
+ ssize_t res;
+ unsigned int i;
+ long n;
+
+ /*
+ * Load the entire map into memory. We don't know how big it is,
+ * but as soon as we start reading it we will know how many
+ * entries it contains, and then we can count newlines.
+ */
+ do {
+ nmap++;
+ if (tnp->size < nmap * TARFS_BLOCKSIZE) {
+ TARFS_DPF(MAP, "%s: map too large\n", __func__);
+ goto bad;
+ }
+ /* grow the map */
+ map = realloc(map, nmap * TARFS_BLOCKSIZE + 1, M_TARFSBLK,
+ M_ZERO | M_WAITOK);
+ /* read an additional block */
+ res = tarfs_io_read_buf(tnp->tmp, false,
+ map + (nmap - 1) * TARFS_BLOCKSIZE,
+ tnp->offset + (nmap - 1) * TARFS_BLOCKSIZE,
+ TARFS_BLOCKSIZE);
+ if (res < 0)
+ return (-res);
+ else if (res < TARFS_BLOCKSIZE)
+ return (EIO);
+ map[nmap * TARFS_BLOCKSIZE] = '\0'; /* sentinel */
+ if (nblk == 0) {
+ n = strtol(p = map, &q, 10);
+ if (q == p || *q != '\n' || n < 1)
+ goto syntax;
+ nblk = n;
+ }
+ for (n = 0, p = map; *p != '\0'; ++p) {
+ if (*p == '\n') {
+ ++n;
+ }
+ }
+ TARFS_DPF(MAP, "%s: %ld newlines in map\n", __func__, n);
+ } while (n < nblk * 2 + 1);
+ TARFS_DPF(MAP, "%s: block map length %zu\n", __func__, nblk);
+ blk = malloc(sizeof(*blk) * nblk, M_TARFSBLK, M_WAITOK | M_ZERO);
+ p = strchr(map, '\n') + 1;
+ for (i = 0; i < nblk; i++) {
+ if (i == 0)
+ blk[i].i = nmap * TARFS_BLOCKSIZE;
+ else
+ blk[i].i = blk[i - 1].i + blk[i - 1].l;
+ n = strtol(p, &q, 10);
+ if (q == p || *q != '\n' || n < 0)
+ goto syntax;
+ p = q + 1;
+ blk[i].o = n;
+ n = strtol(p, &q, 10);
+ if (q == p || *q != '\n' || n < 0)
+ goto syntax;
+ p = q + 1;
+ blk[i].l = n;
+ TARFS_DPF(MAP, "%s: %3d %12zu %12zu %12zu\n", __func__,
+ i, blk[i].i, blk[i].o, blk[i].l);
+ /*
+ * Check block alignment if the block is of non-zero
+ * length (a zero-length block indicates the end of a
+ * trailing hole). Checking i indirectly checks the
+ * previous block's l. It's ok for the final block to
+ * have an uneven length.
+ */
+ if (blk[i].l == 0) {
+ TARFS_DPF(MAP, "%s: zero-length block\n", __func__);
+ } else if (blk[i].i % TARFS_BLOCKSIZE != 0 ||
+ blk[i].o % TARFS_BLOCKSIZE != 0) {
+ TARFS_DPF(MAP, "%s: misaligned map entry\n", __func__);
+ goto bad;
+ }
+ /*
+ * Check that this block starts after the end of the
+ * previous one.
+ */
+ if (i > 0 && blk[i].o < blk[i - 1].o + blk[i - 1].l) {
+ TARFS_DPF(MAP, "%s: overlapping map entries\n", __func__);
+ goto bad;
+ }
+ /*
+ * Check that the block is within the file, both
+ * physically and logically.
+ */
+ if (blk[i].i + blk[i].l > tnp->physize ||
+ blk[i].o + blk[i].l > realsize) {
+ TARFS_DPF(MAP, "%s: map overflow\n", __func__);
+ goto bad;
+ }
+ }
+ free(map, M_TARFSBLK);
+
+ /* store in node */
+ free(tnp->blk, M_TARFSBLK);
+ tnp->nblk = nblk;
+ tnp->blk = blk;
+ tnp->size = realsize;
+ return (0);
+syntax:
+ TARFS_DPF(MAP, "%s: syntax error in block map\n", __func__);
+bad:
+ free(map, M_TARFSBLK);
+ free(blk, M_TARFSBLK);
+ return (EINVAL);
+}
+
+void
+tarfs_free_node(struct tarfs_node *tnp)
+{
+ struct tarfs_mount *tmp;
+
+ MPASS(tnp != NULL);
+ tmp = tnp->tmp;
+
+ switch (tnp->type) {
+ case VLNK:
+ if (tnp->link.name)
+ free(tnp->link.name, M_TARFSNAME);
+ break;
+ default:
+ break;
+ }
+ if (tnp->name != NULL)
+ free(tnp->name, M_TARFSNAME);
+ if (tnp->blk != NULL)
+ free(tnp->blk, M_TARFSBLK);
+ if (tnp->ino >= TARFS_MININO)
+ free_unr(tmp->ino_unr, tnp->ino);
+ free(tnp, M_TARFSNODE);
+ tmp->nfiles--;
+}
+
+int
+tarfs_read_file(struct tarfs_node *tnp, size_t len, struct uio *uiop)
+{
+ struct uio auio;
+ size_t resid = len;
+ size_t copylen;
+ unsigned int i;
+ int error;
+
+ TARFS_DPF(VNODE, "%s(%s, %zu, %zu)\n", __func__,
+ tnp->name, uiop->uio_offset, resid);
+ for (i = 0; i < tnp->nblk && resid > 0; ++i) {
+ if (uiop->uio_offset > tnp->blk[i].o + tnp->blk[i].l) {
+ /* skip this block */
+ continue;
+ }
+ while (resid > 0 &&
+ uiop->uio_offset < tnp->blk[i].o) {
+ /* move out some zeroes... */
+ copylen = tnp->blk[i].o - uiop->uio_offset;
+ if (copylen > resid)
+ copylen = resid;
+ if (copylen > ZERO_REGION_SIZE)
+ copylen = ZERO_REGION_SIZE;
+ auio = *uiop;
+ auio.uio_offset = 0;
+ auio.uio_resid = copylen;
+ error = uiomove(__DECONST(void *, zero_region),
+ copylen, &auio);
+ if (error != 0)
+ return (error);
+ TARFS_DPF(MAP, "%s(%s) = zero %zu\n", __func__,
+ tnp->name, copylen - auio.uio_resid);
+ uiop->uio_offset += copylen - auio.uio_resid;
+ uiop->uio_resid -= copylen - auio.uio_resid;
+ resid -= copylen - auio.uio_resid;
+ }
+ while (resid > 0 &&
+ uiop->uio_offset < tnp->blk[i].o + tnp->blk[i].l) {
+ /* now actual data */
+ copylen = tnp->blk[i].l;
+ if (copylen > resid)
+ copylen = resid;
+ auio = *uiop;
+ auio.uio_offset = tnp->offset + tnp->blk[i].i +
+ uiop->uio_offset - tnp->blk[i].o;
+ auio.uio_resid = copylen;
+ error = tarfs_io_read(tnp->tmp, false, &auio);
+ if (error != 0)
+ return (error);
+ TARFS_DPF(MAP, "%s(%s) = data %zu\n", __func__,
+ tnp->name, copylen - auio.uio_resid);
+ uiop->uio_offset += copylen - auio.uio_resid;
+ uiop->uio_resid -= copylen - auio.uio_resid;
+ resid -= copylen - auio.uio_resid;
+ }
+ }
+ TARFS_DPF(VNODE, "%s(%s) = %zu\n", __func__,
+ tnp->name, len - resid);
+ return (0);
+}
+
+/*
+ * XXX ugly file flag parser which could easily be a finite state machine
+ * driven by a small precomputed table.
+ *
+ * Note that unlike strtofflags(3), we make no attempt to handle negated
+ * flags, since they shouldn't appear in tar files.
+ */
+static const struct tarfs_flag {
+ const char *name;
+ unsigned int flag;
+} tarfs_flags[] = {
+ { "nodump", UF_NODUMP },
+ { "uchg", UF_IMMUTABLE },
+ { "uappnd", UF_APPEND },
+ { "opaque", UF_OPAQUE },
+ { "uunlnk", UF_NOUNLINK },
+ { "arch", SF_ARCHIVED },
+ { "schg", SF_IMMUTABLE },
+ { "sappnd", SF_APPEND },
+ { "sunlnk", SF_NOUNLINK },
+ { NULL, 0 },
+};
+
+unsigned int
+tarfs_strtofflags(const char *str, char **end)
+{
+ const struct tarfs_flag *tf;
+ const char *p, *q;
+ unsigned int ret;
+
+ ret = 0;
+ for (p = q = str; *q != '\0'; p = q + 1) {
+ for (q = p; *q != '\0' && *q != ','; ++q) {
+ if (*q < 'a' || *q > 'z') {
+ goto end;
+ }
+ /* nothing */
+ }
+ for (tf = tarfs_flags; tf->name != NULL; tf++) {
+ if (strncmp(tf->name, p, q - p) == 0 &&
+ tf->name[q - p] == '\0') {
+ TARFS_DPF(ALLOC, "%s: %.*s = 0x%06x\n", __func__,
+ (int)(q - p), p, tf->flag);
+ ret |= tf->flag;
+ break;
+ }
+ }
+ if (tf->name == NULL) {
+ TARFS_DPF(ALLOC, "%s: %.*s = 0x??????\n",
+ __func__, (int)(q - p), p);
+ goto end;
+ }
+ }
+end:
+ if (*end != NULL) {
+ *end = __DECONST(char *, q);
+ }
+ return (ret);
+}
diff --git a/sys/fs/tarfs/tarfs_vfsops.c b/sys/fs/tarfs/tarfs_vfsops.c
new file mode 100644
--- /dev/null
+++ b/sys/fs/tarfs/tarfs_vfsops.c
@@ -0,0 +1,1187 @@
+/*-
+ * Copyright (c) 2013 Juniper Networks, Inc.
+ * Copyright (c) 2022 Klara, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* XXX GNU tar format is not supported by this driver */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_tarfs.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/conf.h>
+#include <sys/fcntl.h>
+#include <sys/libkern.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/mutex.h>
+#include <sys/namei.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/queue.h>
+#include <sys/sbuf.h>
+#include <sys/stat.h>
+#include <sys/uio.h>
+#include <sys/vnode.h>
+
+#include <vm/vm_param.h>
+
+#include <geom/geom.h>
+#include <geom/geom_vfs.h>
+
+#include <fs/tarfs/tarfs.h>
+#include <fs/tarfs/tarfs_dbg.h>
+
+CTASSERT(ZERO_REGION_SIZE > TARFS_BLOCKSIZE);
+
+struct ustar_header {
+ char name[100]; /* File name */
+ char mode[8]; /* Mode flags */
+ char uid[8]; /* User id */
+ char gid[8]; /* Group id */
+ char size[12]; /* Size */
+ char mtime[12]; /* Modified time */
+ char checksum[8]; /* Checksum */
+ char typeflag[1]; /* Type */
+ char linkname[100]; /* "old format" stops here */
+ char magic[6]; /* POSIX UStar "ustar\0" indicator */
+ char version[2]; /* POSIX UStar version "00" */
+ char uname[32]; /* User name */
+ char gname[32]; /* Group name */
+ char major[8]; /* Device major number */
+ char minor[8]; /* Device minor number */
+ char prefix[155]; /* Path prefix */
+};
+
+#define TAR_EOF ((off_t)-1)
+
+#define TAR_TYPE_FILE '0'
+#define TAR_TYPE_HARDLINK '1'
+#define TAR_TYPE_SYMLINK '2'
+#define TAR_TYPE_CHAR '3'
+#define TAR_TYPE_BLOCK '4'
+#define TAR_TYPE_DIRECTORY '5'
+#define TAR_TYPE_FIFO '6'
+#define TAR_TYPE_CONTIG '7'
+#define TAR_TYPE_GLOBAL_EXTHDR 'g'
+#define TAR_TYPE_EXTHDR 'x'
+#define TAR_TYPE_GNU_SPARSE 'S'
+
+#define USTAR_MAGIC (uint8_t []){ 'u', 's', 't', 'a', 'r', 0 }
+#define USTAR_VERSION (uint8_t []){ '0', '0' }
+#define GNUTAR_MAGIC (uint8_t []){ 'u', 's', 't', 'a', 'r', ' ' }
+#define GNUTAR_VERSION (uint8_t []){ ' ', '\x0' }
+
+#define DEFDIRMODE (S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH)
+
+MALLOC_DEFINE(M_TARFSMNT, "tarfs mount", "tarfs mount structures");
+MALLOC_DEFINE(M_TARFSNODE, "tarfs node", "tarfs node structures");
+
+static vfs_mount_t tarfs_mount;
+static vfs_unmount_t tarfs_unmount;
+static vfs_root_t tarfs_root;
+static vfs_statfs_t tarfs_statfs;
+static vfs_fhtovp_t tarfs_fhtovp;
+
+static const char *tarfs_opts[] = {
+ "from", "gid", "mode", "uid", "verify",
+ NULL
+};
+
+/*
+ * Reads a len-width signed octal number from strp. Returns the value.
+ * XXX Does not report errors.
+ */
+static int64_t
+tarfs_str2octal(const char *strp, size_t len)
+{
+ int64_t val;
+ size_t idx;
+ int sign;
+
+ /*
+ * Skip leading spaces or tabs.
+ * XXX why? POSIX requires numeric fields to be 0-padded.
+ */
+ for (idx = 0; idx < len; idx++)
+ if (strp[idx] != ' ' && strp[idx] != '\t')
+ break;
+
+ if (idx == len)
+ return (0);
+
+ if (strp[idx] == '-') {
+ sign = -1;
+ idx++;
+ } else
+ sign = 1;
+
+ val = 0;
+ for (; idx < len; idx++) {
+ if (strp[idx] < '0' || strp[idx] > '7')
+ break;
+ val <<= 3;
+ val += (strp[idx] - '0');
+
+ /* Truncate on overflow */
+ if (val > INT64_MAX / 8) {
+ val = INT64_MAX;
+ break;
+ }
+ }
+
+ return (sign > 0) ? val : -val;
+}
+
+/*
+ * Reads a len-byte extended numeric value from strp. The first byte has
+ * bit 7 set to indicate the format; the remaining 7 bits + the (len - 1)
+ * bytes that follow form a big-endian signed two's complement binary
+ * number. Returns the value. XXX Does not report errors.
+ */
+static int64_t
+tarfs_str2base256(const char *strp, size_t len)
+{
+ int64_t val;
+ size_t idx;
+
+ KASSERT(strp[0] & 0x80, ("not an extended numeric value"));
+
+ /* Sign-extend the first byte */
+ if ((strp[0] & 0x40) != 0)
+ val = (int64_t)-1;
+ else
+ val = 0;
+ val <<= 6;
+ val |= (strp[0] & 0x3f);
+
+ /* Read subsequent bytes */
+ for (idx = 1; idx < len; idx++) {
+ val <<= 8;
+ val |= (0xff & (int64_t)strp[idx]);
+
+ /* Truncate on overflow and underflow */
+ if (val > INT64_MAX / 256) {
+ val = INT64_MAX;
+ break;
+ } else if (val < INT64_MAX / 256) {
+ val = INT64_MIN;
+ break;
+ }
+ }
+
+ return (val);
+}
+
+/*
+ * Read a len-byte numeric field from strp. If bit 7 of the first byte it
+ * set, assume an extended numeric value (signed two's complement);
+ * otherwise, assume a signed octal value.
+ *
+ * XXX practically no error checking or handling
+ */
+static int64_t
+tarfs_str2int64(const char *strp, size_t len)
+{
+
+ if (len < 1)
+ return (0);
+
+ if ((strp[0] & 0x80) != 0)
+ return (tarfs_str2base256(strp, len));
+ return (tarfs_str2octal(strp, len));
+}
+
+/*
+ * Verifies the checksum of a header. Returns true if the checksum is
+ * valid, false otherwise.
+ */
+static boolean_t
+tarfs_checksum(struct ustar_header *hdrp)
+{
+ const unsigned char *ptr;
+ int64_t checksum, hdrsum;
+ size_t idx;
+
+ hdrsum = tarfs_str2int64(hdrp->checksum, sizeof(hdrp->checksum));
+ TARFS_DPF(CHECKSUM, "%s: header checksum %lx\n", __func__, hdrsum);
+
+ checksum = 0;
+ for (ptr = (const unsigned char *)hdrp;
+ ptr < (const unsigned char *)hdrp->checksum; ptr++)
+ checksum += *ptr;
+ for (idx = 0; idx < sizeof(hdrp->checksum); idx++)
+ checksum += 0x20;
+ for (ptr = (const unsigned char *)hdrp->typeflag;
+ ptr < (const unsigned char *)(hdrp + 1); ptr++)
+ checksum += *ptr;
+ TARFS_DPF(CHECKSUM, "%s: calc unsigned checksum %lx\n", __func__,
+ checksum);
+ if (hdrsum == checksum)
+ return (true);
+
+ /*
+ * Repeat test with signed bytes, some older formats use a broken
+ * form of the calculation
+ */
+ checksum = 0;
+ for (ptr = (const unsigned char *)hdrp;
+ ptr < (const unsigned char *)&hdrp->checksum; ptr++)
+ checksum += *((const signed char *)ptr);
+ for (idx = 0; idx < sizeof(hdrp->checksum); idx++)
+ checksum += 0x20;
+ for (ptr = (const unsigned char *)&hdrp->typeflag;
+ ptr < (const unsigned char *)(hdrp + 1); ptr++)
+ checksum += *((const signed char *)ptr);
+ TARFS_DPF(CHECKSUM, "%s: calc signed checksum %lx\n", __func__,
+ checksum);
+ if (hdrsum == checksum)
+ return (true);
+
+ return (false);
+}
+
+
+/*
+ * Looks up a path in the tarfs node tree.
+ *
+ * - If the path exists, stores a pointer to the corresponding tarfs_node
+ * in retnode and a pointer to its parent in retparent.
+ *
+ * - If the path does not exist, but create_dirs is true, creates ancestor
+ * directories and returns NULL in retnode and the parent in retparent.
+ *
+ * - If the path does not exist and create_dirs is false, stops at the
+ * first missing path name component.
+ *
+ * - In all cases, on return, endp and sepp point to the beginning and
+ * end, respectively, of the last-processed path name component.
+ *
+ * - Returns 0 if the node was found, ENOENT if it was not, and some other
+ * positive errno value on failure.
+ */
+static int
+tarfs_lookup_path(struct tarfs_mount *tmp, char *name, size_t namelen,
+ char **endp, char **sepp, struct tarfs_node **retparent,
+ struct tarfs_node **retnode, boolean_t create_dirs)
+{
+ struct componentname cn;
+ struct tarfs_node *parent, *tnp;
+ char *sep;
+ size_t len;
+ int error;
+ boolean_t do_lookup;
+
+ MPASS(name != NULL && namelen != 0);
+
+ do_lookup = true;
+ error = 0;
+ parent = tnp = tmp->root;
+ if (tnp == NULL)
+ panic("%s: root node not yet created", __func__);
+
+ bzero(&cn, sizeof(cn));
+
+ TARFS_DPF(LOOKUP, "%s: Full path: %.*s\n", __func__, (int)namelen,
+ name);
+
+ sep = NULL;
+ for (;;) {
+ /* skip leading slash(es) */
+ while (name[0] == '/' && namelen > 0)
+ name++, namelen--;
+
+ /* did we reach the end? */
+ if (namelen == 0 || name[0] == '\0') {
+ name = do_lookup ? NULL : cn.cn_nameptr;
+ namelen = do_lookup ? 0 : cn.cn_namelen;
+ break;
+ }
+
+ /* locate the next separator */
+ for (sep = name, len = 0;
+ *sep != '\0' && *sep != '/' && len < namelen;
+ sep++, len++)
+ /* nothing */ ;
+
+ /* check for . and .. */
+ if (name[0] == '.' && len <= 2) {
+ if (len == 1) {
+ /* . */
+ name += len;
+ namelen -= len;
+ continue;
+ } else if (name[1] == '.') {
+ /* .. */
+ if (tnp == tmp->root) {
+ error = EINVAL;
+ break;
+ }
+ tnp = tnp->parent;
+ parent = tnp->parent;
+ name += len;
+ namelen -= len;
+ continue;
+ }
+ }
+
+ /* create parent if necessary */
+ if (!do_lookup) {
+ TARFS_DPF(ALLOC, "%s: creating %.*s\n", __func__,
+ (int)cn.cn_namelen, cn.cn_nameptr);
+ error = tarfs_alloc_node(tmp, cn.cn_nameptr,
+ cn.cn_namelen, VDIR, -1, 0, tmp->mtime, 0, 0,
+ DEFDIRMODE, 0, NULL, NODEV, parent, &tnp);
+ if (error != 0)
+ break;
+ }
+
+ parent = tnp;
+ tnp = NULL;
+ cn.cn_nameptr = name;
+ cn.cn_namelen = len;
+ TARFS_DPF(LOOKUP, "%s: Search: %.*s\n", __func__,
+ (int)cn.cn_namelen, cn.cn_nameptr);
+ if (do_lookup) {
+ tnp = tarfs_lookup_node(parent, NULL, &cn);
+ if (tnp == NULL) {
+ do_lookup = false;
+ if (!create_dirs)
+ break;
+ }
+ }
+ name += cn.cn_namelen;
+ namelen -= cn.cn_namelen;
+ }
+
+ TARFS_DPF(LOOKUP, "%s: Parent %p, node %p\n", __func__, parent, tnp);
+
+ if (retparent)
+ *retparent = parent;
+ if (retnode)
+ *retnode = tnp;
+ if (endp) {
+ if (namelen > 0)
+ *endp = name;
+ else
+ *endp = NULL;
+ }
+ if (sepp)
+ *sepp = sep;
+ return (error);
+}
+
+/*
+ * Frees a tarfs_mount structure and everything it references.
+ */
+static void
+tarfs_free_mount(struct tarfs_mount *tmp)
+{
+ struct mount *mp;
+ struct tarfs_node *tnp;
+
+ MPASS(tmp != NULL);
+
+ TARFS_DPF(ALLOC, "%s: Freeing mount structure %p\n", __func__, tmp);
+
+ TARFS_DPF(ALLOC, "%s: freeing tarfs_node structures\n", __func__);
+ while (!TAILQ_EMPTY(&tmp->allnodes)) {
+ tnp = TAILQ_FIRST(&tmp->allnodes);
+ TAILQ_REMOVE(&tmp->allnodes, tnp, entries);
+ tarfs_free_node(tnp);
+ }
+
+ (void)tarfs_io_fini(tmp);
+
+ TARFS_DPF(ALLOC, "%s: deleting unr header\n", __func__);
+ delete_unrhdr(tmp->ino_unr);
+ mp = tmp->vfs;
+ mp->mnt_data = NULL;
+
+ TARFS_DPF(ALLOC, "%s: freeing structure\n", __func__);
+ free(tmp, M_TARFSMNT);
+}
+
+/*
+ * Processes the tar file header at block offset blknump and allocates and
+ * populates a tarfs_node structure for the file it describes. Updated
+ * blknump to point to the next unread tar file block, or TAR_EOF if EOF
+ * is reached. Returns 0 on success or EOF and a positive errno value on
+ * failure.
+ */
+static int
+tarfs_alloc_one(struct tarfs_mount *tmp, off_t *blknump)
+{
+ char block[TARFS_BLOCKSIZE];
+ struct ustar_header *hdrp = (struct ustar_header *)block;
+ struct sbuf *namebuf = NULL;
+ char *exthdr = NULL, *name = NULL, *link = NULL;
+ off_t blknum = *blknump;
+ int endmarker = 0;
+ char *namep, *sep;
+ struct tarfs_node *parent, *tnp;
+ size_t namelen = 0, linklen = 0, realsize = 0, sz;
+ ssize_t res;
+ dev_t rdev;
+ gid_t gid;
+ mode_t mode;
+ time_t mtime;
+ uid_t uid;
+ long major = -1, minor = -1;
+ unsigned int flags = 0;
+ int error;
+ boolean_t sparse = false;
+
+again:
+ /* read next header */
+ res = tarfs_io_read_buf(tmp, false, block,
+ TARFS_BLOCKSIZE * blknum, TARFS_BLOCKSIZE);
+ if (res < 0) {
+ error = -res;
+ goto bad;
+ } else if (res < TARFS_BLOCKSIZE) {
+ goto eof;
+ }
+ blknum++;
+
+ /* check for end marker */
+ if (memcmp(block, zero_region, TARFS_BLOCKSIZE) == 0) {
+ if (endmarker++) {
+ if (exthdr != NULL) {
+ TARFS_DPF(IO, "%s: orphaned extended header at %zu\n",
+ __func__, TARFS_BLOCKSIZE * (blknum - 1));
+ free(exthdr, M_TEMP);
+ }
+ TARFS_DPF(IO, "%s: end of archive at %zu\n", __func__,
+ TARFS_BLOCKSIZE * blknum);
+ tmp->nblocks = blknum;
+ *blknump = TAR_EOF;
+ return (0);
+ }
+ goto again;
+ }
+
+ /* verify magic */
+ if (memcmp(hdrp->magic, USTAR_MAGIC, sizeof(USTAR_MAGIC)) == 0 &&
+ memcmp(hdrp->version, USTAR_VERSION, sizeof(USTAR_VERSION)) == 0) {
+ /* POSIX */
+ } else if (memcmp(hdrp->magic, GNUTAR_MAGIC, sizeof(GNUTAR_MAGIC)) == 0 &&
+ memcmp(hdrp->magic, GNUTAR_MAGIC, sizeof(GNUTAR_MAGIC)) == 0) {
+ TARFS_DPF(ALLOC, "%s: GNU tar format at %zu\n", __func__,
+ TARFS_BLOCKSIZE * (blknum - 1));
+ error = EFTYPE;
+ goto bad;
+ } else {
+ TARFS_DPF(ALLOC, "%s: unsupported TAR format at %zu\n",
+ __func__, TARFS_BLOCKSIZE * (blknum - 1));
+ error = EINVAL;
+ goto bad;
+ }
+
+ /* verify checksum */
+ if (!tarfs_checksum(hdrp)) {
+ TARFS_DPF(ALLOC, "%s: header checksum failed at %zu\n",
+ __func__, TARFS_BLOCKSIZE * (blknum - 1));
+ error = EINVAL;
+ goto bad;
+ }
+
+ /* get standard attributes */
+ mode = tarfs_str2int64(hdrp->mode, sizeof(hdrp->mode));
+ uid = tarfs_str2int64(hdrp->uid, sizeof(hdrp->uid));
+ gid = tarfs_str2int64(hdrp->gid, sizeof(hdrp->gid));
+ sz = tarfs_str2int64(hdrp->size, sizeof(hdrp->size));
+ mtime = tarfs_str2int64(hdrp->mtime, sizeof(hdrp->mtime));
+ rdev = NODEV;
+ TARFS_DPF(ALLOC, "%s: [%c] %zu @%jd %o %d:%d\n", __func__,
+ hdrp->typeflag[0], sz, (intmax_t)mtime, mode, uid, gid);
+
+ /* extended header? */
+ if (hdrp->typeflag[0] == TAR_TYPE_GLOBAL_EXTHDR) {
+ printf("%s: unsupported global extended header at %zd\n",
+ __func__, TARFS_BLOCKSIZE * (blknum - 1));
+ error = EFTYPE;
+ goto bad;
+ }
+ if (hdrp->typeflag[0] == TAR_TYPE_EXTHDR) {
+ if (exthdr != NULL) {
+ TARFS_DPF(IO, "%s: multiple extended headers at %zu\n",
+ __func__, TARFS_BLOCKSIZE * (blknum - 1));
+ error = EFTYPE;
+ goto bad;
+ }
+ /* read the contents of the exthdr */
+ TARFS_DPF(ALLOC, "%s: %zu-byte extended header at %zd\n",
+ __func__, sz, TARFS_BLOCKSIZE * (blknum - 1));
+ exthdr = malloc(sz, M_TEMP, M_WAITOK);
+ res = tarfs_io_read_buf(tmp, false, exthdr,
+ TARFS_BLOCKSIZE * blknum, sz);
+ if (res < 0) {
+ error = -res;
+ goto bad;
+ }
+ if (res < sz) {
+ goto eof;
+ }
+ blknum += TARFS_SZ2BLKS(res);
+ /* XXX TODO: refactor this parser */
+ char *line = exthdr;
+ while (line < exthdr + sz) {
+ char *eol, *key, *value, *sep;
+ size_t len = strtoul(line, &sep, 10);
+ if (len == 0 || sep == line || *sep != ' ') {
+ TARFS_DPF(ALLOC, "%s: exthdr syntax error\n",
+ __func__);
+ error = EINVAL;
+ goto bad;
+ }
+ if (line + len > exthdr + sz) {
+ TARFS_DPF(ALLOC, "%s: exthdr overflow\n",
+ __func__);
+ error = EINVAL;
+ goto bad;
+ }
+ eol = line + len - 1;
+ *eol = '\0';
+ line += len;
+ key = sep + 1;
+ sep = strchr(key, '=');
+ if (sep == NULL) {
+ TARFS_DPF(ALLOC, "%s: exthdr syntax error\n",
+ __func__);
+ error = EINVAL;
+ goto bad;
+ }
+ *sep = '\0';
+ value = sep + 1;
+ TARFS_DPF(ALLOC, "%s: exthdr %s=%s\n", __func__,
+ key, value);
+ if (strcmp(key, "linkpath") == 0) {
+ link = value;
+ linklen = eol - value;
+ } else if (strcmp(key, "GNU.sparse.major") == 0) {
+ sparse = true;
+ major = strtol(value, &sep, 10);
+ if (sep != eol) {
+ printf("exthdr syntax error\n");
+ error = EINVAL;
+ goto bad;
+ }
+ } else if (strcmp(key, "GNU.sparse.minor") == 0) {
+ sparse = true;
+ minor = strtol(value, &sep, 10);
+ if (sep != eol) {
+ printf("exthdr syntax error\n");
+ error = EINVAL;
+ goto bad;
+ }
+ } else if (strcmp(key, "GNU.sparse.name") == 0) {
+ sparse = true;
+ name = value;
+ namelen = eol - value;
+ if (namelen == 0) {
+ printf("exthdr syntax error\n");
+ error = EINVAL;
+ goto bad;
+ }
+ } else if (strcmp(key, "GNU.sparse.realsize") == 0) {
+ sparse = true;
+ realsize = strtoul(value, &sep, 10);
+ if (sep != eol) {
+ printf("exthdr syntax error\n");
+ error = EINVAL;
+ goto bad;
+ }
+ } else if (strcmp(key, "SCHILY.fflags") == 0) {
+ flags |= tarfs_strtofflags(value, &sep);
+ if (sep != eol) {
+ printf("exthdr syntax error\n");
+ error = EINVAL;
+ goto bad;
+ }
+ }
+ }
+ goto again;
+ }
+
+ /* sparse file consistency checks */
+ if (sparse) {
+ TARFS_DPF(ALLOC, "%s: %s: sparse %ld.%ld (%zu bytes)\n", __func__,
+ name, major, minor, realsize);
+ if (major != 1 || minor != 0 || name == NULL || realsize == 0 ||
+ hdrp->typeflag[0] != TAR_TYPE_FILE) {
+ TARFS_DPF(ALLOC, "%s: invalid sparse format\n", __func__);
+ error = EINVAL;
+ goto bad;
+ }
+ }
+
+ /* file name */
+ if (name == NULL) {
+ if (hdrp->prefix[0] != '\0') {
+ namebuf = sbuf_new_auto();
+ sbuf_printf(namebuf, "%.*s/%.*s",
+ (int)sizeof(hdrp->prefix), hdrp->prefix,
+ (int)sizeof(hdrp->name), hdrp->name);
+ sbuf_finish(namebuf);
+ name = sbuf_data(namebuf);
+ namelen = sbuf_len(namebuf);
+ } else {
+ name = hdrp->name;
+ namelen = strnlen(hdrp->name, sizeof(hdrp->name));
+ }
+ }
+
+ error = tarfs_lookup_path(tmp, name, namelen, &namep,
+ &sep, &parent, &tnp, true);
+ if (error != 0)
+ goto bad;
+ if (tnp != NULL) {
+ if (hdrp->typeflag[0] == TAR_TYPE_DIRECTORY) {
+ /* XXX set attributes? */
+ goto skip;
+ }
+ TARFS_DPF(ALLOC, "%s: duplicate file %.*s\n", __func__,
+ (int)namelen, name);
+ error = EINVAL;
+ goto bad;
+ }
+ switch (hdrp->typeflag[0]) {
+ case TAR_TYPE_DIRECTORY:
+ error = tarfs_alloc_node(tmp, namep, sep - namep, VDIR,
+ 0, 0, mtime, uid, gid, mode, flags, NULL, 0,
+ parent, &tnp);
+ break;
+ case TAR_TYPE_FILE:
+ error = tarfs_alloc_node(tmp, namep, sep - namep, VREG,
+ blknum * TARFS_BLOCKSIZE, sz, mtime, uid, gid, mode,
+ flags, NULL, 0, parent, &tnp);
+ if (error == 0 && sparse) {
+ error = tarfs_load_blockmap(tnp, realsize);
+ }
+ break;
+ case TAR_TYPE_HARDLINK:
+ if (link == NULL) {
+ link = hdrp->linkname;
+ linklen = strnlen(link, sizeof(hdrp->linkname));
+ }
+ error = tarfs_alloc_node(tmp, namep, sep - namep, VREG,
+ 0, 0, 0, 0, 0, 0, 0, NULL, 0, parent, &tnp);
+ if (error != 0) {
+ goto bad;
+ }
+ error = tarfs_lookup_path(tmp, link, linklen, NULL,
+ NULL, NULL, &tnp->other, false);
+ if (tnp->other == NULL ||
+ tnp->other->type != VREG ||
+ tnp->other->other != NULL) {
+ TARFS_DPF(ALLOC, "%s: %.*s: dead hard link to %.*s\n",
+ __func__, (int)namelen, name, (int)linklen, link);
+ error = EINVAL;
+ goto bad;
+ }
+ break;
+ case TAR_TYPE_SYMLINK:
+ if (link == NULL) {
+ link = hdrp->linkname;
+ linklen = strnlen(link, sizeof(hdrp->linkname));
+ }
+ error = tarfs_alloc_node(tmp, namep, sep - namep, VLNK,
+ 0, linklen, mtime, uid, gid, mode, flags, link, 0,
+ parent, &tnp);
+ break;
+ case TAR_TYPE_BLOCK:
+ major = tarfs_str2int64(hdrp->major, sizeof(hdrp->major));
+ minor = tarfs_str2int64(hdrp->minor, sizeof(hdrp->minor));
+ rdev = makedev(major, minor);
+ error = tarfs_alloc_node(tmp, namep, sep - namep, VBLK,
+ 0, 0, mtime, uid, gid, mode, flags, NULL, rdev,
+ parent, &tnp);
+ break;
+ case TAR_TYPE_CHAR:
+ major = tarfs_str2int64(hdrp->major, sizeof(hdrp->major));
+ minor = tarfs_str2int64(hdrp->minor, sizeof(hdrp->minor));
+ rdev = makedev(major, minor);
+ error = tarfs_alloc_node(tmp, namep, sep - namep, VCHR,
+ 0, 0, mtime, uid, gid, mode, flags, NULL, rdev,
+ parent, &tnp);
+ break;
+ default:
+ TARFS_DPF(ALLOC, "%s: unsupported type %c for %.*s\n",
+ __func__, hdrp->typeflag[0], (int)namelen, name);
+ error = EINVAL;
+ break;
+ }
+ if (error != 0)
+ goto bad;
+
+skip:
+ blknum += TARFS_SZ2BLKS(sz);
+ tmp->nblocks = blknum;
+ *blknump = blknum;
+ if (exthdr != NULL) {
+ free(exthdr, M_TEMP);
+ }
+ if (namebuf != NULL) {
+ sbuf_delete(namebuf);
+ }
+ return (0);
+eof:
+ TARFS_DPF(IO, "%s: premature end of file\n", __func__);
+ error = EIO;
+ goto bad;
+bad:
+ if (exthdr != NULL) {
+ free(exthdr, M_TEMP);
+ }
+ if (namebuf != NULL) {
+ sbuf_delete(namebuf);
+ }
+ return (error);
+}
+
+/*
+ * Allocates and populates the metadata structures for the tar file
+ * referenced by vp. On success, a pointer to the tarfs_mount structure
+ * is stored in tmpp. Returns 0 on success or a positive errno value on
+ * failure.
+ */
+static int
+tarfs_alloc_mount(struct mount *mp, struct vnode *vp,
+ uid_t root_uid, gid_t root_gid, mode_t root_mode,
+ struct tarfs_mount **tmpp)
+{
+ struct vattr va;
+ struct thread *td = curthread;
+ char *fullpath;
+ struct tarfs_mount *tmp;
+ struct tarfs_node *root;
+ struct g_consumer *cp;
+ struct cdev *dev;
+ off_t blknum;
+ time_t mtime;
+ int error;
+
+ KASSERT(tmpp != NULL, ("tarfs mount return is NULL"));
+ ASSERT_VOP_LOCKED(vp, __func__);
+
+ tmp = NULL;
+ dev = NULL;
+ cp = NULL;
+ fullpath = NULL;
+
+ TARFS_DPF(ALLOC, "%s: Allocating tarfs mount structure for vp %p\n",
+ __func__, vp);
+
+ /* Get source metadata */
+ error = VOP_GETATTR(vp, &va, td->td_ucred);
+ if (error != 0) {
+ return (error);
+ }
+ VOP_UNLOCK(vp);
+ mtime = va.va_mtime.tv_sec;
+
+ /* Allocate and initialize tarfs mount structure */
+ tmp = (struct tarfs_mount *)malloc(sizeof(struct tarfs_mount),
+ M_TARFSMNT, M_WAITOK | M_ZERO);
+ TARFS_DPF(ALLOC, "%s: Allocated mount structure\n", __func__);
+ mp->mnt_data = tmp;
+
+ mtx_init(&tmp->allnode_lock, "tarfs allnode lock", NULL,
+ MTX_DEF);
+ TAILQ_INIT(&tmp->allnodes);
+ tmp->ino_unr = new_unrhdr(TARFS_MININO, INT_MAX, &tmp->allnode_lock);
+ tmp->vp = vp;
+ tmp->vfs = mp;
+ tmp->cp = cp;
+ tmp->dev = dev;
+ tmp->mtime = mtime;
+
+ /*
+ * XXX The decompression layer passes everything through the
+ * buffer cache, and the buffer cache wants to know our blocksize,
+ * but mnt_stat normally isn't populated until after we return, so
+ * we have to cheat a bit.
+ */
+ tmp->iosize = 1U << tarfs_ioshift;
+ mp->mnt_stat.f_iosize = tmp->iosize;
+
+ /* Initialize decompression layer */
+ error = tarfs_io_init(tmp);
+ if (error != 0)
+ goto bad;
+
+ error = tarfs_alloc_node(tmp, NULL, 0, VDIR, 0, 0, mtime, root_uid,
+ root_gid, root_mode & ALLPERMS, 0, NULL, NODEV, NULL, &root);
+ if (error != 0 || root == NULL)
+ goto bad;
+ tmp->root = root;
+
+ blknum = 0;
+ do {
+ if ((error = tarfs_alloc_one(tmp, &blknum)) != 0) {
+ goto bad;
+ }
+ } while (blknum != TAR_EOF);
+
+ *tmpp = tmp;
+
+ TARFS_DPF(ALLOC, "%s: pfsmnt_root %p\n", __func__, tmp->root);
+ return (0);
+
+bad:
+ if (tmp != NULL)
+ tarfs_free_mount(tmp);
+ if (cp != NULL) {
+ g_topology_lock();
+ g_vfs_close(cp);
+ g_topology_unlock();
+ }
+ free(fullpath, M_TEMP);
+ return (error);
+}
+
+/*
+ * VFS Operations.
+ */
+
+static int
+tarfs_mount(struct mount *mp)
+{
+ struct nameidata nd;
+ struct vattr va;
+ struct tarfs_mount *tmp = NULL;
+ struct thread *td = curthread;
+ struct vnode *vp;
+ char *from;
+ uid_t root_uid;
+ gid_t root_gid;
+ mode_t root_mode;
+ int error, flags, len;
+
+ if (mp->mnt_flag & MNT_UPDATE)
+ return (EOPNOTSUPP);
+
+ if (vfs_filteropt(mp->mnt_optnew, tarfs_opts))
+ return (EINVAL);
+
+ vn_lock(mp->mnt_vnodecovered, LK_SHARED | LK_RETRY);
+ error = VOP_GETATTR(mp->mnt_vnodecovered, &va, mp->mnt_cred);
+ VOP_UNLOCK(mp->mnt_vnodecovered);
+ if (error)
+ return (error);
+
+ if (mp->mnt_cred->cr_ruid != 0 ||
+ vfs_scanopt(mp->mnt_optnew, "gid", "%d", &root_gid) != 1)
+ root_gid = va.va_gid;
+ if (mp->mnt_cred->cr_ruid != 0 ||
+ vfs_scanopt(mp->mnt_optnew, "uid", "%d", &root_uid) != 1)
+ root_uid = va.va_uid;
+ if (mp->mnt_cred->cr_ruid != 0 ||
+ vfs_scanopt(mp->mnt_optnew, "mode", "%ho", &root_mode) != 1)
+ root_mode = va.va_mode;
+
+ error = vfs_getopt(mp->mnt_optnew, "from", (void **)&from, &len);
+ if (error != 0 || from[len - 1] != '\0')
+ return (EINVAL);
+
+ /* Find the source tarball */
+ TARFS_DPF(FS, "%s(%s, uid=%u, gid=%u, mode=%o)\n", __func__,
+ from, root_uid, root_gid, root_mode);
+ flags = FREAD;
+ if (vfs_flagopt(mp->mnt_optnew, "verify", NULL, 0)) {
+ flags |= O_VERIFY;
+ }
+ NDINIT(&nd, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF, UIO_SYSSPACE, from);
+ error = namei(&nd);
+ if (error != 0)
+ return (error);
+ NDFREE_PNBUF(&nd);
+ vp = nd.ni_vp;
+ TARFS_DPF(FS, "%s: N: hold %u use %u lock 0x%x\n", __func__,
+ vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
+ /* vp is now held and locked */
+
+ /* Open the source tarball */
+ error = vn_open_vnode(vp, flags, td->td_ucred, td, NULL);
+ if (error != 0) {
+ TARFS_DPF(FS, "%s: failed to open %s: %d\n", __func__,
+ from, error);
+ vput(vp);
+ goto bad;
+ }
+ TARFS_DPF(FS, "%s: O: hold %u use %u lock 0x%x\n", __func__,
+ vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
+ if (vp->v_type != VREG) {
+ TARFS_DPF(FS, "%s: not a regular file\n", __func__);
+ error = EOPNOTSUPP;
+ goto bad_open_locked;
+ }
+ error = priv_check(td, PRIV_VFS_MOUNT_PERM);
+ if (error != 0) {
+ TARFS_DPF(FS, "%s: not permitted to mount\n", __func__);
+ goto bad_open_locked;
+ }
+ if (flags & O_VERIFY) {
+ mp->mnt_flag |= MNT_VERIFIED;
+ }
+
+ /* Allocate the tarfs mount */
+ error = tarfs_alloc_mount(mp, vp, root_uid, root_gid, root_mode, &tmp);
+ /* vp is now held but unlocked */
+ if (error != 0) {
+ TARFS_DPF(FS, "%s: failed to mount %s: %d\n", __func__,
+ from, error);
+ goto bad_open_unlocked;
+ }
+ TARFS_DPF(FS, "%s: M: hold %u use %u lock 0x%x\n", __func__,
+ vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
+
+ /* Unconditionally mount as read-only */
+ MNT_ILOCK(mp);
+ mp->mnt_flag |= (MNT_LOCAL | MNT_RDONLY);
+ MNT_IUNLOCK(mp);
+
+ vfs_getnewfsid(mp);
+ vfs_mountedfrom(mp, "tarfs");
+ TARFS_DPF(FS, "%s: success\n", __func__);
+
+ return (0);
+
+bad_open_locked:
+ /* vp must be held and locked */
+ TARFS_DPF(FS, "%s: L: hold %u use %u lock 0x%x\n", __func__,
+ vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
+ VOP_UNLOCK(vp);
+bad_open_unlocked:
+ /* vp must be held and unlocked */
+ TARFS_DPF(FS, "%s: E: hold %u use %u lock 0x%x\n", __func__,
+ vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
+ (void)vn_close(vp, flags, td->td_ucred, td);
+bad:
+ /* vp must be released and unlocked */
+ TARFS_DPF(FS, "%s: X: hold %u use %u lock 0x%x\n", __func__,
+ vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
+ return (error);
+}
+
+/*
+ * Unmounts a tarfs filesystem.
+ */
+static int
+tarfs_unmount(struct mount *mp, int mntflags)
+{
+ struct thread *td = curthread;
+ struct tarfs_mount *tmp;
+ struct vnode *vp;
+ int error;
+ int flags = 0;
+
+ TARFS_DPF(FS, "%s: Unmounting %p\n", __func__, mp);
+
+ /* Handle forced unmounts */
+ if (mntflags & MNT_FORCE)
+ flags |= FORCECLOSE;
+
+ /* Finalize all pending I/O */
+ error = vflush(mp, 0, flags, curthread);
+ if (error != 0)
+ return (error);
+ tmp = MP_TO_TARFS_MOUNT(mp);
+ vp = tmp->vp;
+
+ MPASS(vp != NULL);
+ TARFS_DPF(FS, "%s: U: hold %u use %u lock 0x%x\n", __func__,
+ vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
+ vn_close(vp, FREAD, td->td_ucred, td);
+ TARFS_DPF(FS, "%s: C: hold %u use %u lock 0x%x\n", __func__,
+ vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
+ tarfs_free_mount(tmp);
+
+ return (0);
+}
+
+/*
+ * Gets the root of a tarfs filesystem. Returns 0 on success or a
+ * positive errno value on failure.
+ */
+static int
+tarfs_root(struct mount *mp, int flags, struct vnode **vpp)
+{
+ struct vnode *nvp;
+ int error;
+
+ TARFS_DPF(FS, "%s: Getting root vnode\n", __func__);
+
+ error = VFS_VGET(mp, TARFS_ROOTINO, LK_EXCLUSIVE, &nvp);
+ if (error != 0)
+ return (error);
+
+ nvp->v_vflag |= VV_ROOT;
+ *vpp = nvp;
+ return (0);
+}
+
+/*
+ * Gets statistics for a tarfs filesystem. Returns 0.
+ */
+static int
+tarfs_statfs(struct mount *mp, struct statfs *sbp)
+{
+ struct tarfs_mount *tmp;
+
+ tmp = MP_TO_TARFS_MOUNT(mp);
+
+ sbp->f_bsize = TARFS_BLOCKSIZE;
+ sbp->f_iosize = tmp->iosize;
+ sbp->f_blocks = tmp->nblocks;
+ sbp->f_bfree = 0;
+ sbp->f_bavail = 0;
+ sbp->f_files = tmp->nfiles;
+ sbp->f_ffree = 0;
+
+ return (0);
+}
+
+/*
+ * Gets a vnode for the given inode. On success, a pointer to the vnode
+ * is stored in vpp. Returns 0 on success or a positive errno value on
+ * failure.
+ */
+static int
+tarfs_vget(struct mount *mp, ino_t ino, int lkflags, struct vnode **vpp)
+{
+ struct tarfs_mount *tmp;
+ struct tarfs_node *tnp;
+ struct thread *td;
+ struct vnode *vp;
+ int error;
+
+ TARFS_DPF(FS, "%s: mp %p, ino %lu, lkflags %d\n", __func__, mp, ino,
+ lkflags);
+
+ td = curthread;
+ error = vfs_hash_get(mp, ino, lkflags, td, vpp, NULL, NULL);
+ if (error != 0)
+ return (error);
+
+ if (*vpp != NULL) {
+ TARFS_DPF(FS, "%s: found hashed vnode %p\n", __func__, *vpp);
+ return (error);
+ }
+
+ TARFS_DPF(FS, "%s: no hashed vnode for inode %lu\n", __func__, ino);
+
+ tmp = MP_TO_TARFS_MOUNT(mp);
+
+ if (ino == TARFS_ZIOINO) {
+ error = vn_lock(tmp->znode, lkflags);
+ if (error != 0)
+ return (error);
+ vref(tmp->znode);
+ *vpp = tmp->znode;
+ return (0);
+ }
+
+ /* XXX Should use hash instead? */
+ TAILQ_FOREACH(tnp, &tmp->allnodes, entries) {
+ if (tnp->ino == ino)
+ break;
+ }
+ TARFS_DPF(FS, "%s: search of all nodes found %p\n", __func__, tnp);
+ if (tnp == NULL)
+ return (ENOENT);
+
+ error = getnewvnode("tarfs", mp, &tarfs_vnodeops, &vp);
+ if (error != 0)
+ goto bad;
+ TARFS_DPF(FS, "%s: allocated vnode\n", __func__);
+ vp->v_data = tnp;
+ vp->v_type = tnp->type;
+ tnp->vnode = vp;
+
+ lockmgr(vp->v_vnlock, lkflags, NULL);
+ error = insmntque(vp, mp);
+ if (error != 0)
+ goto bad;
+ TARFS_DPF(FS, "%s: inserting entry into VFS hash\n", __func__);
+ error = vfs_hash_insert(vp, ino, lkflags, td, vpp, NULL, NULL);
+ if (error != 0 || *vpp != NULL)
+ return (error);
+
+ *vpp = vp;
+ return (0);
+
+bad:
+ *vpp = NULLVP;
+ return (error);
+}
+
+static int
+tarfs_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp)
+{
+ struct tarfs_node *tnp;
+ struct tarfs_fid *tfp;
+ struct vnode *nvp;
+ int error;
+
+ tfp = (struct tarfs_fid *)fhp;
+ MP_TO_TARFS_MOUNT(mp);
+ if (tfp->ino < TARFS_ROOTINO || tfp->ino > INT_MAX)
+ return (ESTALE);
+
+ error = VFS_VGET(mp, tfp->ino, LK_EXCLUSIVE, &nvp);
+ if (error != 0) {
+ *vpp = NULLVP;
+ return (error);
+ }
+ tnp = VP_TO_TARFS_NODE(nvp);
+ if (tnp->mode == 0 ||
+ tnp->gen != tfp->gen ||
+ tnp->nlink <= 0) {
+ vput(nvp);
+ *vpp = NULLVP;
+ return (ESTALE);
+ }
+ *vpp = nvp;
+ return (0);
+}
+
+static struct vfsops tarfs_vfsops = {
+ .vfs_fhtovp = tarfs_fhtovp,
+ .vfs_mount = tarfs_mount,
+ .vfs_root = tarfs_root,
+ .vfs_statfs = tarfs_statfs,
+ .vfs_unmount = tarfs_unmount,
+ .vfs_vget = tarfs_vget,
+};
+VFS_SET(tarfs_vfsops, tarfs, VFCF_READONLY);
+MODULE_VERSION(tarfs, 1);
+MODULE_DEPEND(tarfs, xz, 1, 1, 1);
diff --git a/sys/fs/tarfs/tarfs_vnops.c b/sys/fs/tarfs/tarfs_vnops.c
new file mode 100644
--- /dev/null
+++ b/sys/fs/tarfs/tarfs_vnops.c
@@ -0,0 +1,639 @@
+/*-
+ * Copyright (c) 2013 Juniper Networks, Inc.
+ * Copyright (c) 2022 Klara, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_tarfs.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/dirent.h>
+#include <sys/fcntl.h>
+#include <sys/limits.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+
+#include <fs/tarfs/tarfs.h>
+#include <fs/tarfs/tarfs_dbg.h>
+
+static int
+tarfs_open(struct vop_open_args *ap)
+{
+ struct tarfs_node *tnp;
+ struct vnode *vp;
+
+ vp = ap->a_vp;
+ MPASS(VOP_ISLOCKED(vp));
+ tnp = VP_TO_TARFS_NODE(vp);
+
+ TARFS_DPF(VNODE, "%s(%p=%s, %o)\n", __func__,
+ tnp, tnp->name, ap->a_mode);
+
+ if (vp->v_type != VREG && vp->v_type != VDIR)
+ return (EOPNOTSUPP);
+
+ vnode_create_vobject(vp, tnp->size, ap->a_td);
+ return (0);
+}
+
+static int
+tarfs_close(struct vop_close_args *ap)
+{
+#ifdef TARFS_DEBUG
+ struct tarfs_node *tnp;
+ struct vnode *vp;
+
+ vp = ap->a_vp;
+
+ MPASS(VOP_ISLOCKED(vp));
+ tnp = VP_TO_TARFS_NODE(vp);
+
+ TARFS_DPF(VNODE, "%s(%p=%s)\n", __func__,
+ tnp, tnp->name);
+#else
+ (void)ap;
+#endif
+ return (0);
+}
+
+static int
+tarfs_access(struct vop_access_args *ap)
+{
+ struct tarfs_node *tnp;
+ struct vnode *vp;
+ accmode_t accmode;
+ struct ucred *cred;
+ int error;
+
+ vp = ap->a_vp;
+ accmode = ap->a_accmode;
+ cred = ap->a_cred;
+
+ MPASS(VOP_ISLOCKED(vp));
+ tnp = VP_TO_TARFS_NODE(vp);
+
+ TARFS_DPF(VNODE, "%s(%p=%s, %o)\n", __func__,
+ tnp, tnp->name, accmode);
+
+ switch (vp->v_type) {
+ case VDIR:
+ case VLNK:
+ case VREG:
+ if ((accmode & VWRITE) != 0)
+ return (EROFS);
+ break;
+ case VBLK:
+ case VCHR:
+ case VFIFO:
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ if ((accmode & VWRITE) != 0)
+ return (EPERM);
+
+ error = vaccess(vp->v_type, tnp->mode, tnp->uid,
+ tnp->gid, accmode, cred);
+ return (error);
+}
+
+static int
+tarfs_getattr(struct vop_getattr_args *ap)
+{
+ struct tarfs_node *tnp;
+ struct vnode *vp;
+ struct vattr *vap;
+
+ vp = ap->a_vp;
+ vap = ap->a_vap;
+ tnp = VP_TO_TARFS_NODE(vp);
+
+ TARFS_DPF(VNODE, "%s(%p=%s)\n", __func__,
+ tnp, tnp->name);
+
+ vap->va_type = vp->v_type;
+ vap->va_mode = tnp->mode;
+ vap->va_nlink = tnp->nlink;
+ vap->va_gid = tnp->gid;
+ vap->va_uid = tnp->uid;
+ vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
+ vap->va_fileid = tnp->ino;
+ vap->va_size = tnp->size;
+ vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
+ vap->va_atime = tnp->atime;
+ vap->va_ctime = tnp->ctime;
+ vap->va_mtime = tnp->mtime;
+ vap->va_birthtime = tnp->birthtime;
+ vap->va_gen = tnp->gen;
+ vap->va_flags = tnp->flags;
+ vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ?
+ tnp->rdev : NODEV;
+ vap->va_bytes = round_page(tnp->physize);
+ vap->va_filerev = 0;
+
+ return (0);
+}
+
+static int
+tarfs_lookup(struct vop_cachedlookup_args *ap)
+{
+ struct tarfs_node *dirnode, *parent, *tnp;
+ struct componentname *cnp;
+ struct vnode *dvp;
+ struct vnode **vpp;
+ int error;
+
+ dvp = ap->a_dvp;
+ vpp = ap->a_vpp;
+ cnp = ap->a_cnp;
+
+ *vpp = NULLVP;
+ dirnode = VP_TO_TARFS_NODE(dvp);
+ parent = dirnode->parent;
+ tnp = NULL;
+
+ TARFS_DPF(LOOKUP, "%s(%p=%s, %.*s)\n", __func__,
+ dirnode, dirnode->name,
+ (int)cnp->cn_namelen, cnp->cn_nameptr);
+
+ error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, curthread);
+ if (error != 0)
+ return (error);
+
+ if (cnp->cn_flags & ISDOTDOT) {
+ /* Do not allow .. on the root node */
+ if (parent == NULL || parent == dirnode)
+ return (ENOENT);
+
+ /* Allocate a new vnode on the matching entry */
+ error = vn_vget_ino(dvp, parent->ino, cnp->cn_lkflags,
+ vpp);
+ if (error != 0)
+ return (error);
+ } else if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
+ VREF(dvp);
+ *vpp = dvp;
+#ifdef TARFS_DEBUG
+ } else if (dirnode == dirnode->tmp->root &&
+ (*vpp = dirnode->tmp->znode) != NULL &&
+ cnp->cn_namelen == TARFS_ZIO_NAMELEN &&
+ memcmp(cnp->cn_nameptr, TARFS_ZIO_NAME, TARFS_ZIO_NAMELEN) == 0) {
+ error = vn_lock(*vpp, cnp->cn_lkflags);
+ if (error != 0)
+ return (error);
+ vref(*vpp);
+#endif
+ } else {
+ tnp = tarfs_lookup_node(dirnode, NULL, cnp);
+ if (tnp == NULL) {
+ TARFS_DPF(LOOKUP, "%s(%p=%s, %.*s): file not found\n", __func__,
+ dirnode, dirnode->name,
+ (int)cnp->cn_namelen, cnp->cn_nameptr);
+ return (ENOENT);
+ }
+
+ if ((cnp->cn_flags & ISLASTCN) == 0 &&
+ (tnp->type != VDIR && tnp->type != VLNK))
+ return (ENOTDIR);
+
+ error = vn_vget_ino(dvp, tnp->ino, cnp->cn_lkflags, vpp);
+ if (error != 0)
+ return (error);
+ }
+
+#ifdef TARFS_DEBUG
+ if (tnp == NULL)
+ tnp = VP_TO_TARFS_NODE(*vpp);
+ TARFS_DPF(LOOKUP, "%s: found vnode %p, tarfs_node %p\n", __func__,
+ *vpp, tnp);
+#endif /* TARFS_DEBUG */
+
+ /* Store the result the the cache if MAKEENTRY is specified in flags */
+ if ((cnp->cn_flags & MAKEENTRY) != 0 && cnp->cn_nameiop != CREATE)
+ cache_enter(dvp, *vpp, cnp);
+
+ return (error);
+}
+
+static int
+tarfs_readdir(struct vop_readdir_args *ap)
+{
+ struct dirent cde;
+ struct tarfs_node *current, *tnp;
+ struct vnode *vp;
+ struct uio *uio;
+ int *eofflag;
+ u_long **cookies;
+ int *ncookies;
+ off_t off;
+ u_int idx, ndirents;
+ int error;
+
+ vp = ap->a_vp;
+ uio = ap->a_uio;
+ eofflag = ap->a_eofflag;
+ cookies = ap->a_cookies;
+ ncookies = ap->a_ncookies;
+
+ if (vp->v_type != VDIR)
+ return (ENOTDIR);
+
+ tnp = VP_TO_TARFS_NODE(vp);
+ off = uio->uio_offset;
+ current = NULL;
+ ndirents = 0;
+
+ TARFS_DPF(VNODE, "%s(%p=%s, %zu, %zd)\n", __func__,
+ tnp, tnp->name, uio->uio_offset, uio->uio_resid);
+
+ if (uio->uio_offset == TARFS_COOKIE_EOF) {
+ TARFS_DPF(VNODE, "%s: EOF\n", __func__);
+ return (0);
+ }
+
+ if (uio->uio_offset == TARFS_COOKIE_DOT) {
+ TARFS_DPF(VNODE, "%s: Generating . entry\n", __func__);
+ /* fake . entry */
+ cde.d_fileno = tnp->ino;
+ cde.d_type = DT_DIR;
+ cde.d_namlen = 1;
+ cde.d_name[0] = '.';
+ cde.d_name[1] = '\0';
+ cde.d_reclen = GENERIC_DIRSIZ(&cde);
+ if (cde.d_reclen > uio->uio_resid)
+ goto full;
+ error = uiomove(&cde, cde.d_reclen, uio);
+ if (error)
+ return (error);
+ /* next is .. */
+ uio->uio_offset = TARFS_COOKIE_DOTDOT;
+ ndirents++;
+ }
+
+ if (uio->uio_offset == TARFS_COOKIE_DOTDOT) {
+ TARFS_DPF(VNODE, "%s: Generating .. entry\n", __func__);
+ /* fake .. entry */
+ MPASS(tnp->parent != NULL);
+ TARFS_NODE_LOCK(tnp->parent);
+ cde.d_fileno = tnp->parent->ino;
+ TARFS_NODE_UNLOCK(tnp->parent);
+ cde.d_type = DT_DIR;
+ cde.d_namlen = 2;
+ cde.d_name[0] = '.';
+ cde.d_name[1] = '.';
+ cde.d_name[2] = '\0';
+ cde.d_reclen = GENERIC_DIRSIZ(&cde);
+ if (cde.d_reclen > uio->uio_resid)
+ goto full;
+ error = uiomove(&cde, cde.d_reclen, uio);
+ if (error)
+ return (error);
+ /* next is first child */
+ current = TAILQ_FIRST(&tnp->dir.dirhead);
+ if (current == NULL)
+ goto done;
+ uio->uio_offset = current->ino;
+ TARFS_DPF(VNODE, "%s: [%u] setting current node to %p=%s\n",
+ __func__, ndirents, current, current->name);
+ ndirents++;
+ }
+
+ /* resuming previous call */
+ if (current == NULL) {
+ current = tarfs_lookup_dir(tnp, uio->uio_offset);
+ if (current == NULL) {
+ error = EINVAL;
+ goto done;
+ }
+ uio->uio_offset = current->ino;
+ TARFS_DPF(VNODE, "%s: [%u] setting current node to %p=%s\n",
+ __func__, ndirents, current, current->name);
+ }
+
+ for (;;) {
+ cde.d_fileno = current->ino;
+ switch (current->type) {
+ case VBLK:
+ cde.d_type = DT_BLK;
+ break;
+ case VCHR:
+ cde.d_type = DT_CHR;
+ break;
+ case VDIR:
+ cde.d_type = DT_DIR;
+ break;
+ case VFIFO:
+ cde.d_type = DT_FIFO;
+ break;
+ case VLNK:
+ cde.d_type = DT_LNK;
+ break;
+ case VREG:
+ cde.d_type = DT_REG;
+ break;
+ default:
+ panic("%s: tarfs_node %p, type %d\n", __func__,
+ current, current->type);
+ }
+ cde.d_namlen = current->namelen;
+ MPASS(tnp->namelen < sizeof(cde.d_name));
+ (void)memcpy(cde.d_name, current->name, current->namelen);
+ cde.d_name[current->namelen] = '\0';
+ cde.d_reclen = GENERIC_DIRSIZ(&cde);
+ if (cde.d_reclen > uio->uio_resid)
+ goto full;
+ error = uiomove(&cde, cde.d_reclen, uio);
+ if (error != 0)
+ goto done;
+ ndirents++;
+ /* next sibling */
+ current = TAILQ_NEXT(current, dirents);
+ if (current == NULL)
+ goto done;
+ uio->uio_offset = current->ino;
+ TARFS_DPF(VNODE, "%s: [%u] setting current node to %p=%s\n",
+ __func__, ndirents, current, current->name);
+ }
+full:
+ if (cde.d_reclen > uio->uio_resid) {
+ TARFS_DPF(VNODE, "%s: out of space, returning\n",
+ __func__);
+ error = (ndirents == 0) ? EINVAL : 0;
+ }
+done:
+ TARFS_DPF(VNODE, "%s: %u entries written\n", __func__, ndirents);
+ TARFS_DPF(VNODE, "%s: saving cache information\n", __func__);
+ if (current == NULL) {
+ uio->uio_offset = TARFS_COOKIE_EOF;
+ tnp->dir.lastcookie = 0;
+ tnp->dir.lastnode = NULL;
+ } else {
+ tnp->dir.lastcookie = current->ino;
+ tnp->dir.lastnode = current;
+ }
+
+ if (eofflag != NULL) {
+ TARFS_DPF(VNODE, "%s: Setting EOF flag\n", __func__);
+ *eofflag = (error == 0 && current == NULL);
+ }
+
+ /* Update for NFS */
+ if (error == 0 && cookies != NULL && ncookies != NULL) {
+ TARFS_DPF(VNODE, "%s: Updating NFS cookies\n", __func__);
+ current = NULL;
+ *cookies = malloc(ndirents * sizeof(off_t), M_TEMP, M_WAITOK);
+ *ncookies = ndirents;
+ for (idx = 0; idx < ndirents; idx++) {
+ if (off == TARFS_COOKIE_DOT)
+ off = TARFS_COOKIE_DOTDOT;
+ else {
+ if (off == TARFS_COOKIE_DOTDOT) {
+ current = TAILQ_FIRST(&tnp->dir.dirhead);
+ } else if (current != NULL) {
+ current = TAILQ_NEXT(current, dirents);
+ } else {
+ current = tarfs_lookup_dir(tnp, off);
+ current = TAILQ_NEXT(current, dirents);
+ }
+ if (current == NULL)
+ off = TARFS_COOKIE_EOF;
+ else
+ off = current->ino;
+ }
+
+ TARFS_DPF(VNODE, "%s: [%u] offset %zu\n", __func__,
+ idx, off);
+ (*cookies)[idx] = off;
+ }
+ MPASS(uio->uio_offset == off);
+ }
+
+ return (error);
+}
+
+static int
+tarfs_read(struct vop_read_args *ap)
+{
+ struct tarfs_node *tnp;
+ struct uio *uiop;
+ struct vnode *vp;
+ size_t len;
+ off_t resid;
+ int error;
+
+ uiop = ap->a_uio;
+ vp = ap->a_vp;
+
+ if (vp->v_type == VCHR || vp->v_type == VBLK)
+ return (EOPNOTSUPP);
+
+ if (vp->v_type != VREG)
+ return (EISDIR);
+
+ if (uiop->uio_offset < 0)
+ return (EINVAL);
+
+ tnp = VP_TO_TARFS_NODE(vp);
+ error = 0;
+
+ TARFS_DPF(VNODE, "%s(%p=%s, %zu, %zd)\n", __func__,
+ tnp, tnp->name, uiop->uio_offset, uiop->uio_resid);
+
+ while ((resid = uiop->uio_resid) > 0) {
+ if (tnp->size <= uiop->uio_offset)
+ break;
+ len = MIN(tnp->size - uiop->uio_offset, resid);
+ if (len == 0)
+ break;
+
+ error = tarfs_read_file(tnp, len, uiop);
+ if (error != 0 || resid == uiop->uio_resid)
+ break;
+ }
+
+ return (error);
+}
+
+static int
+tarfs_readlink(struct vop_readlink_args *ap)
+{
+ struct tarfs_node *tnp;
+ struct uio *uiop;
+ struct vnode *vp;
+ int error;
+
+ uiop = ap->a_uio;
+ vp = ap->a_vp;
+
+ MPASS(uiop->uio_offset == 0);
+ MPASS(vp->v_type == VLNK);
+
+ tnp = VP_TO_TARFS_NODE(vp);
+
+ TARFS_DPF(VNODE, "%s(%p=%s)\n", __func__,
+ tnp, tnp->name);
+
+ error = uiomove(tnp->link.name,
+ MIN(tnp->size, uiop->uio_resid), uiop);
+
+ return (error);
+}
+
+static int
+tarfs_reclaim(struct vop_reclaim_args *ap)
+{
+ struct tarfs_node *tnp;
+ struct vnode *vp;
+
+ vp = ap->a_vp;
+ tnp = VP_TO_TARFS_NODE(vp);
+
+ vfs_hash_remove(vp);
+ vnode_destroy_vobject(vp);
+ cache_purge(vp);
+
+ TARFS_NODE_LOCK(tnp);
+ tnp->vnode = NULLVP;
+ vp->v_data = NULL;
+ TARFS_NODE_UNLOCK(tnp);
+
+ return (0);
+}
+
+static int
+tarfs_print(struct vop_print_args *ap)
+{
+ struct tarfs_node *tnp;
+ struct vnode *vp;
+
+ vp = ap->a_vp;
+ tnp = VP_TO_TARFS_NODE(vp);
+
+ printf("tag tarfs, tarfs_node %p, links %lu\n",
+ tnp, tnp->nlink);
+ printf("\tmode 0%o, owner %d, group %d, size %zd\n",
+ tnp->mode, tnp->uid, tnp->gid,
+ tnp->size);
+
+ if (vp->v_type == VFIFO)
+ fifo_printinfo(vp);
+
+ printf("\n");
+
+ return (0);
+}
+
+static int
+tarfs_strategy(struct vop_strategy_args *ap)
+{
+ struct uio auio;
+ struct iovec iov;
+ struct tarfs_node *tnp;
+ struct buf *bp;
+ off_t off;
+ size_t len;
+ int error;
+
+ tnp = VP_TO_TARFS_NODE(ap->a_vp);
+ bp = ap->a_bp;
+ MPASS(bp->b_iocmd == BIO_READ);
+ MPASS(bp->b_iooffset >= 0);
+ MPASS(bp->b_bcount > 0);
+ MPASS(bp->b_bufsize >= bp->b_bcount);
+ TARFS_DPF(VNODE, "%s(%p=%s, %zu, %ld/%ld)\n", __func__, tnp,
+ tnp->name, (size_t)bp->b_iooffset, bp->b_bcount, bp->b_bufsize);
+ iov.iov_base = bp->b_data;
+ iov.iov_len = bp->b_bcount;
+ off = bp->b_iooffset;
+ len = bp->b_bcount;
+ bp->b_resid = len;
+ if (off > tnp->size) {
+ /* XXX read beyond EOF - figure out correct handling */
+ error = EIO;
+ goto out;
+ }
+ if (off + len > tnp->size) {
+ /* clip to file length */
+ len = tnp->size - off;
+ }
+ auio.uio_iov = &iov;
+ auio.uio_iovcnt = 1;
+ auio.uio_offset = off;
+ auio.uio_resid = len;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_rw = UIO_READ;
+ auio.uio_td = curthread;
+ error = tarfs_read_file(tnp, len, &auio);
+ bp->b_resid -= len - auio.uio_resid;
+out:
+ if (error != 0) {
+ bp->b_ioflags |= BIO_ERROR;
+ bp->b_error = error;
+ }
+ bp->b_flags |= B_DONE;
+ return (0);
+}
+
+static int
+tarfs_vptofh(struct vop_vptofh_args *ap)
+{
+ struct tarfs_fid *tfp;
+ struct tarfs_node *tnp;
+
+ tfp = (struct tarfs_fid *)ap->a_fhp;
+ tnp = VP_TO_TARFS_NODE(ap->a_vp);
+
+ tfp->len = sizeof(struct tarfs_fid);
+ tfp->ino = tnp->ino;
+ tfp->gen = tnp->gen;
+
+ return (0);
+}
+
+struct vop_vector tarfs_vnodeops = {
+ .vop_default = &default_vnodeops,
+
+ .vop_access = tarfs_access,
+ .vop_cachedlookup = tarfs_lookup,
+ .vop_close = tarfs_close,
+ .vop_getattr = tarfs_getattr,
+ .vop_lookup = vfs_cache_lookup,
+ .vop_open = tarfs_open,
+ .vop_print = tarfs_print,
+ .vop_read = tarfs_read,
+ .vop_readdir = tarfs_readdir,
+ .vop_readlink = tarfs_readlink,
+ .vop_reclaim = tarfs_reclaim,
+ .vop_strategy = tarfs_strategy,
+ .vop_vptofh = tarfs_vptofh,
+};
+VFS_VOP_VECTOR_REGISTER(tarfs_vnodeops);
diff --git a/sys/modules/Makefile b/sys/modules/Makefile
--- a/sys/modules/Makefile
+++ b/sys/modules/Makefile
@@ -369,6 +369,7 @@
sym \
${_syscons} \
sysvipc \
+ tarfs \
tcp \
${_ti} \
tmpfs \
diff --git a/sys/modules/tarfs/Makefile b/sys/modules/tarfs/Makefile
new file mode 100644
--- /dev/null
+++ b/sys/modules/tarfs/Makefile
@@ -0,0 +1,23 @@
+# $FreeBSD$
+
+.PATH: ${.CURDIR:H:H}/fs/tarfs
+
+KMOD= tarfs
+SRCS= opt_tarfs.h \
+ vnode_if.h \
+ tarfs_io.c \
+ tarfs_subr.c \
+ tarfs_vnops.c \
+ tarfs_vfsops.c
+
+.if !defined(KERNBUILDDIR)
+CFLAGS+= -DZSTDIO
+.ifdef TARFS_DEBUG
+CFLAGS+= -DTARFS_DEBUG
+.endif
+.endif
+
+SRCS+= opt_zstdio.h
+CFLAGS+= -I${SRCTOP}/sys/contrib/zstd/lib/freebsd
+
+.include <bsd.kmod.mk>
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sat, Nov 30, 12:14 AM (14 h, 28 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
9053873
Default Alt Text
D37753.id115033.diff (95 KB)
Attached To
Mode
D37753: Add tarfs, a filesystem backed by tarballs.
Attached
Detach File
Event Timeline
Log In to Comment