diff --git a/sys/fs/unionfs/union_vfsops.c b/sys/fs/unionfs/union_vfsops.c --- a/sys/fs/unionfs/union_vfsops.c +++ b/sys/fs/unionfs/union_vfsops.c @@ -73,6 +73,8 @@ { struct vnode *lowerrootvp; struct vnode *upperrootvp; + struct vnode *lvp1; + struct vnode *lvp2; struct unionfs_mount *ump; char *target; char *tmp; @@ -276,11 +278,32 @@ */ VOP_UNLOCK(ump->um_uppervp); + /* + * Detect common cases in which constructing a unionfs hierarchy + * would produce deadlock (or failed locking assertions) upon + * use of the resulting unionfs vnodes. This typically happens + * when the requested upper and lower filesytems (which themselves + * may be unionfs instances and/or nullfs aliases) end up resolving + * to the same base-layer files. Note that this is not meant to be + * an exhaustive check of all possible deadlock-producing scenarios. + */ + lvp1 = lvp2 = NULL; + VOP_GETLOWVNODE(ump->um_lowervp, &lvp1, FREAD); + VOP_GETLOWVNODE(ump->um_uppervp, &lvp2, FREAD); + if (lvp1 != NULL && lvp1 == lvp2) + error = EDEADLK; + if (lvp1 != NULL) + vrele(lvp1); + if (lvp2 != NULL) + vrele(lvp2); + /* * Get the unionfs root vnode. */ - error = unionfs_nodeget(mp, ump->um_uppervp, ump->um_lowervp, - NULL, &(ump->um_rootvp), NULL); + if (error == 0) { + error = unionfs_nodeget(mp, ump->um_uppervp, ump->um_lowervp, + NULL, &(ump->um_rootvp), NULL); + } if (error != 0) { vrele(upperrootvp); free(ump, M_UNIONFSMNT); diff --git a/sys/fs/unionfs/union_vnops.c b/sys/fs/unionfs/union_vnops.c --- a/sys/fs/unionfs/union_vnops.c +++ b/sys/fs/unionfs/union_vnops.c @@ -2105,6 +2105,51 @@ return (error); } +static int +unionfs_getlowvnode(struct vop_getlowvnode_args *ap) +{ + struct unionfs_node *unp; + struct vnode *vp, *basevp; + + vp = ap->a_vp; + VI_LOCK(vp); + unp = VTOUNIONFS(vp); + if (unp == NULL) { + VI_UNLOCK(vp); + return (EBADF); + } + + if (ap->a_flags & FWRITE) { + basevp = unp->un_uppervp; + /* + * If we're being asked to resolve a write and an upper + * vnode is not present, tell the caller that our unionfs + * vnode will resolve the write so that we can do the + * copy-up if/when the write eventually happens. + * We could proactively do the copy-up here, but that would + * require additional locking as well as the addition of + * a 'cred' argument to VOP_GETLOWVNODE(). Such a copy-up + * could also ultimately be redundant or unnecessary + * depending upon what the caller proceeds to do. + */ + if (basevp == NULL) { + VI_UNLOCK(vp); + return (vop_stdgetlowvnode(ap)); + } + } else { + basevp = (unp->un_uppervp != NULL) ? + unp->un_uppervp : unp->un_lowervp; + } + + VNASSERT(basevp != NULL, vp, ("%s: no upper/lower vnode", __func__)); + + vholdnz(basevp); + VI_UNLOCK(vp); + VOP_GETLOWVNODE(basevp, ap->a_vplp, ap->a_flags); + vdrop(basevp); + return (0); +} + static int unionfs_inactive(struct vop_inactive_args *ap) { @@ -3000,6 +3045,7 @@ .vop_getattr = unionfs_getattr, .vop_getextattr = unionfs_getextattr, .vop_getwritemount = unionfs_getwritemount, + .vop_getlowvnode = unionfs_getlowvnode, .vop_inactive = unionfs_inactive, .vop_need_inactive = vop_stdneed_inactive, .vop_islocked = vop_stdislocked, @@ -3039,5 +3085,6 @@ .vop_unp_bind = unionfs_unp_bind, .vop_unp_connect = unionfs_unp_connect, .vop_unp_detach = unionfs_unp_detach, + .vop_copy_file_range = vop_stdcopy_file_range, }; VFS_VOP_VECTOR_REGISTER(unionfs_vnodeops); diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c --- a/sys/kern/vfs_default.c +++ b/sys/kern/vfs_default.c @@ -77,13 +77,11 @@ static int vop_stdis_text(struct vop_is_text_args *ap); static int vop_stdunset_text(struct vop_unset_text_args *ap); static int vop_stdadd_writecount(struct vop_add_writecount_args *ap); -static int vop_stdcopy_file_range(struct vop_copy_file_range_args *ap); static int vop_stdfdatasync(struct vop_fdatasync_args *ap); static int vop_stdgetpages_async(struct vop_getpages_async_args *ap); static int vop_stdread_pgcache(struct vop_read_pgcache_args *ap); static int vop_stdstat(struct vop_stat_args *ap); static int vop_stdvput_pair(struct vop_vput_pair_args *ap); -static int vop_stdgetlowvnode(struct vop_getlowvnode_args *ap); /* * This vnode table stores what we want to do if the filesystem doesn't @@ -1426,7 +1424,7 @@ return (0); } -static int +int vop_stdcopy_file_range(struct vop_copy_file_range_args *ap) { int error; @@ -1636,7 +1634,7 @@ return (0); } -static int +int vop_stdgetlowvnode(struct vop_getlowvnode_args *ap) { vref(ap->a_vp); diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src --- a/sys/kern/vnode_if.src +++ b/sys/kern/vnode_if.src @@ -469,7 +469,7 @@ OUT struct mount **mpp; }; -%% getwritevnode vp = = = +%% getlowvnode vp = = = vop_getlowvnode { IN struct vnode *vp; diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -906,6 +906,8 @@ int vop_stdunp_connect(struct vop_unp_connect_args *ap); int vop_stdunp_detach(struct vop_unp_detach_args *ap); int vop_stdadd_writecount_nomsync(struct vop_add_writecount_args *ap); +int vop_stdcopy_file_range(struct vop_copy_file_range_args *ap); +int vop_stdgetlowvnode(struct vop_getlowvnode_args *ap); int vop_eopnotsupp(struct vop_generic_args *ap); int vop_ebadf(struct vop_generic_args *ap); int vop_einval(struct vop_generic_args *ap);