diff --git a/sbin/growfs/growfs.c b/sbin/growfs/growfs.c --- a/sbin/growfs/growfs.c +++ b/sbin/growfs/growfs.c @@ -85,6 +85,10 @@ #include "debug.h" +#ifndef UFSEXTEND +#define UFSEXTEND _IOW('U', 3, fsid_t) +#endif + #ifdef FS_DEBUG int _dbg_lvl_ = (DL_INFO); /* DL_TRC */ #endif /* FS_DEBUG */ @@ -105,7 +109,7 @@ static struct csum *fscs; /* cylinder summary */ -static void growfs(int, int, unsigned int); +static void growfs(int, int, unsigned int, const fsid_t *, int); static void rdfs(ufs2_daddr_t, size_t, void *, int); static void wtfs(ufs2_daddr_t, size_t, void *, int, unsigned int); static int charsperline(void); @@ -120,6 +124,8 @@ static void updclst(int); static void mount_reload(const struct statfs *stfs); static void cgckhash(struct cg *); +static void ufssuspend(int fd, const fsid_t *fsid, const char *errmsg); +static void ufsresume(int fd, const fsid_t *fsid, const char *errmsg); /* * Here we actually start growing the file system. We basically read the @@ -133,7 +139,7 @@ * copies. */ static void -growfs(int fsi, int fso, unsigned int Nflag) +growfs(int fsi, int fso, unsigned int Nflag, const fsid_t *suspfs, int suspendlater) { DBG_FUNC("growfs") time_t modtime; @@ -173,28 +179,6 @@ #endif /* FS_DEBUG */ DBG_PRINT0("fscs read\n"); - /* - * Do all needed changes in the former last cylinder group. - */ - updjcg(osblock.fs_ncg - 1, modtime, fsi, fso, Nflag); - - /* - * Dump out summary information about file system. - */ -#ifdef FS_DEBUG -#define B2MBFACTOR (1 / (1024.0 * 1024.0)) - printf("growfs: %.1fMB (%jd sectors) block size %d, fragment size %d\n", - (float)sblock.fs_size * sblock.fs_fsize * B2MBFACTOR, - (intmax_t)fsbtodb(&sblock, sblock.fs_size), sblock.fs_bsize, - sblock.fs_fsize); - printf("\tusing %d cylinder groups of %.2fMB, %d blks, %d inodes.\n", - sblock.fs_ncg, (float)sblock.fs_fpg * sblock.fs_fsize * B2MBFACTOR, - sblock.fs_fpg / sblock.fs_frag, sblock.fs_ipg); - if (sblock.fs_flags & FS_DOSOFTDEP) - printf("\twith soft updates\n"); -#undef B2MBFACTOR -#endif /* FS_DEBUG */ - /* * Now build the cylinders group blocks and * then print out indices of cylinder groups. @@ -207,7 +191,11 @@ * Iterate for only the new cylinder groups. */ for (cylno = osblock.fs_ncg; cylno < sblock.fs_ncg; cylno++) { + if (!suspendlater) + ufssuspend(fso, suspfs, "cg"); initcg(cylno, modtime, fso, Nflag); + if (!suspendlater) + ufsresume(fso, suspfs, "cg"); j = sprintf(tmpbuf, " %jd%s", (intmax_t)fsbtodb(&sblock, cgsblock(&sblock, cylno)), cylno < (sblock.fs_ncg - 1) ? "," : "" ); @@ -221,6 +209,33 @@ } printf("\n"); + /* + * Suspend UFS writes before editing the existing filesystem's blocks. + */ + ufssuspend(fso, suspfs, "last cg"); + + /* + * Do all needed changes in the former last cylinder group. + */ + updjcg(osblock.fs_ncg - 1, modtime, fsi, fso, Nflag); + + /* + * Dump out summary information about file system. + */ +#ifdef FS_DEBUG +#define B2MBFACTOR (1 / (1024.0 * 1024.0)) + printf("growfs: %.1fMB (%jd sectors) block size %d, fragment size %d\n", + (float)sblock.fs_size * sblock.fs_fsize * B2MBFACTOR, + (intmax_t)fsbtodb(&sblock, sblock.fs_size), sblock.fs_bsize, + sblock.fs_fsize); + printf("\tusing %d cylinder groups of %.2fMB, %d blks, %d inodes.\n", + sblock.fs_ncg, (float)sblock.fs_fpg * sblock.fs_fsize * B2MBFACTOR, + sblock.fs_fpg / sblock.fs_frag, sblock.fs_ipg); + if (sblock.fs_flags & FS_DOSOFTDEP) + printf("\twith soft updates\n"); +#undef B2MBFACTOR +#endif /* FS_DEBUG */ + /* * Do all needed changes in the first cylinder group. * allocate blocks in new location @@ -1379,9 +1394,12 @@ struct fs *fs; const char *device; const struct statfs *statfsp; + struct stat stat_fs, stat_stdout, stat_stderr; uint64_t size = 0; off_t mediasize; int error, j, fsi, fso, ch, ret, Nflag = 0, yflag = 0; + const struct fsid *suspfs = NULL; + int suspendlater = 0; char *p, reply[5], oldsizebuf[6], newsizebuf[6]; void *testbuf; @@ -1574,12 +1592,18 @@ fso = -1; } else { if (statfsp != NULL && (statfsp->f_flags & MNT_RDONLY) == 0) { + if (stat(statfsp->f_mntonname, &stat_fs) == -1) + err(1, "unable to stat %s", statfsp->f_mntonname); + if (fstat(STDOUT_FILENO, &stat_stdout) == -1) + err(1, "unable to fstat stdout"); + if (fstat(STDERR_FILENO, &stat_stderr) == -1) + err(1, "unable to fstat stderr"); + if (stat_fs.st_dev == stat_stdout.st_dev || stat_fs.st_dev == stat_stderr.st_dev) + errx(1, "stdout and stderr may not write to filesystem being grown"); fso = open(_PATH_UFSSUSPEND, O_RDWR); if (fso == -1) err(1, "unable to open %s", _PATH_UFSSUSPEND); - error = ioctl(fso, UFSSUSPEND, &statfsp->f_fsid); - if (error != 0) - err(1, "UFSSUSPEND"); + suspfs = &statfsp->f_fsid; } else { fso = open(device, O_WRONLY); if (fso < 0) @@ -1587,6 +1611,20 @@ } } + /* + * We are about to read/write the volume's last block. If + * using /dev/ufssuspend, try the newer UFSEXTEND ioctl first, + * then fall back to UFSSUSPEND. + */ + if (suspfs != NULL) { + if (ioctl(fso, UFSEXTEND, suspfs) < 0) { + if (errno != ENXIO) + err(1, "last block, unexpected error with ioctl UFSEXTEND"); + ufssuspend(fso, suspfs, "last block"); + } else + suspendlater = 1; + } + /* * Try to access our new last block in the file system. */ @@ -1599,6 +1637,9 @@ sblock.fs_fsize, testbuf, fso, Nflag); free(testbuf); + if (!suspendlater) + ufsresume(fso, suspfs, "last block"); + /* * Now calculate new superblock values and check for reasonable * bound for new file system size: @@ -1654,15 +1695,11 @@ /* * Ok, everything prepared, so now let's do the tricks. */ - growfs(fsi, fso, Nflag); + growfs(fsi, fso, Nflag, suspfs, suspendlater); close(fsi); if (fso > -1) { - if (statfsp != NULL && (statfsp->f_flags & MNT_RDONLY) == 0) { - error = ioctl(fso, UFSRESUME); - if (error != 0) - err(1, "UFSRESUME"); - } + ufsresume(fso, suspfs, "close"); error = close(fso); if (error != 0) err(1, "close"); @@ -1769,3 +1806,25 @@ cgp->cg_ckhash = 0; cgp->cg_ckhash = calculate_crc32c(~0L, (void *)cgp, sblock.fs_cgsize); } + +/* + * Manipulate the ufssuspend device state. + */ +static void +ufssuspend(int fd, const fsid_t *fsid, const char *errmsg) +{ + if (fsid == NULL) + return; + if (ioctl(fd, UFSSUSPEND, fsid) != 0) + err(1, "UFSSUSPEND %s", errmsg); +} + +static void +ufsresume(int fd, const fsid_t *fsid, const char *errmsg) + +{ + if (fsid == NULL) + return; + if (ioctl(fd, UFSRESUME) != 0) + err(1, "UFSRESUME %s", errmsg); +} diff --git a/sys/ufs/ffs/ffs_suspend.c b/sys/ufs/ffs/ffs_suspend.c --- a/sys/ufs/ffs/ffs_suspend.c +++ b/sys/ufs/ffs/ffs_suspend.c @@ -115,7 +115,8 @@ devvp = ump->um_devvp; fs = ump->um_fs; - if (ffs_susp_suspended(mp) == 0) { + int suspended = ffs_susp_suspended(mp); + if (suspended == 0 && uio->uio_offset < fs->fs_size * fs->fs_fsize) { sx_sunlock(&ffs_susp_lock); return (ENXIO); } @@ -175,6 +176,9 @@ return (error); } +/* + * Mark the UFS as suspended, and block writes from VFS. + */ static int ffs_susp_suspend(struct mount *mp) { @@ -183,8 +187,6 @@ sx_assert(&ffs_susp_lock, SA_XLOCKED); - if (!ffs_own_mount(mp)) - return (EINVAL); if (ffs_susp_suspended(mp)) return (EBUSY); @@ -216,6 +218,9 @@ return (0); } +/* + * Clear the UFS/VFS suspended state and permit writes again. + */ static void ffs_susp_unsuspend(struct mount *mp) { @@ -240,28 +245,21 @@ UFS_LOCK(ump); ump->um_flags &= ~UM_WRITESUSPENDED; UFS_UNLOCK(ump); - vfs_unbusy(mp); } +/* + * Exit UFS's internal suspended state and reload the filesystem. + */ static void -ffs_susp_dtor(void *data) +ffs_susp_resume(struct mount *mp) { struct fs *fs; struct ufsmount *ump; - struct mount *mp; int error; - sx_xlock(&ffs_susp_lock); - - mp = (struct mount *)data; ump = VFSTOUFS(mp); fs = ump->um_fs; - if (ffs_susp_suspended(mp) == 0) { - sx_xunlock(&ffs_susp_lock); - return; - } - KASSERT((mp->mnt_kern_flag & MNTK_SUSPEND) != 0, ("MNTK_SUSPEND not set")); @@ -270,6 +268,25 @@ panic("failed to unsuspend writes on %s", fs->fs_fsmnt); ffs_susp_unsuspend(mp); +} + +/* + * Unsuspend filesystem if needed, and release the ffssuspend mount lock. + */ +static void +ffs_susp_dtor(void *data) +{ + struct mount *mp; + + sx_xlock(&ffs_susp_lock); + + mp = (struct mount *)data; + + if (ffs_susp_suspended(mp) != 0) { + ffs_susp_resume(mp); + vfs_unbusy(mp); + } + sx_xunlock(&ffs_susp_lock); } @@ -279,8 +296,11 @@ { struct mount *mp; fsid_t *fsidp; + void *cdpcheck; + int hadcdp; int error; + mp = NULL; /* * No suspend inside the jail. Allowing it would require making * sure that e.g. the devfs ruleset for that jail permits access @@ -293,37 +313,63 @@ switch (cmd) { case UFSSUSPEND: + case UFSEXTEND: + /* + * If any fds are open for write on the suspended filesystem, + * return error to prevent deadlock. + * Require single-thread curproc so that the check is not racey. + * XXXKIB: might consider to singlethread curproc instead. + * XXX: This does not prevent a process from attempting to open + * such a file after calling this ioctl. + */ + error = curproc->p_numthreads > 1 ? EDEADLK : + descrip_check_write_mp(curproc->p_fd, mp); + if (error != 0) { + break; + } + fsidp = (fsid_t *)addr; + /* Find and reference FS. */ mp = vfs_getvfs(fsidp); if (mp == NULL) { error = ENOENT; break; } - error = vfs_busy(mp, 0); - vfs_rel(mp); - if (error != 0) + /* Verify that it's FFS. */ + if (!ffs_own_mount(mp)) { + error = EINVAL; + break; + } + /* If already in the desired state, nothing to do. */ + hadcdp = devfs_get_cdevpriv(&cdpcheck) == 0; + if (hadcdp && !((cmd == UFSSUSPEND) ^ ffs_susp_suspended(mp))) break; /* - * Require single-thread curproc so that the check is not racey. - * XXXKIB: might consider to singlethread curproc instead. + * If we don't have a cdevpriv already, get one and + * mark the vfs mount busy. */ - error = curproc->p_numthreads > 1 ? EDEADLK : - descrip_check_write_mp(curproc->p_fd, mp); - if (error != 0) - break; - - error = ffs_susp_suspend(mp); - if (error != 0) { - vfs_unbusy(mp); - break; + if (!hadcdp) { + error = devfs_set_cdevpriv(mp, ffs_susp_dtor); + if (error != 0) + break; + error = vfs_busy(mp, 0); + if (error != 0) + break; + } + /* + * Finally, suspend or resume-if-neeeded the filesystem. An error here + * does not release the mount lock. + */ + if (cmd == UFSSUSPEND) { + error = ffs_susp_suspend(mp); + } else if (hadcdp) { + ffs_susp_resume(mp); + error = 0; } - error = devfs_set_cdevpriv(mp, ffs_susp_dtor); - if (error != 0) - ffs_susp_unsuspend(mp); break; case UFSRESUME: - error = devfs_get_cdevpriv((void **)&mp); + error = devfs_get_cdevpriv(&cdpcheck); if (error != 0) break; /* @@ -340,6 +386,9 @@ break; } + if (mp != NULL) + vfs_rel(mp); + sx_xunlock(&ffs_susp_lock); return (error); diff --git a/sys/ufs/ffs/fs.h b/sys/ufs/ffs/fs.h --- a/sys/ufs/ffs/fs.h +++ b/sys/ufs/ffs/fs.h @@ -901,5 +901,6 @@ */ #define UFSSUSPEND _IOW('U', 1, fsid_t) #define UFSRESUME _IO('U', 2) +#define UFSEXTEND _IOW('U', 3, fsid_t) #endif