sys/kern/vfs_subr.c - annotate

Return to vfs_subr.c CVS log
Up to [local] / sys / kern
Annotation of sys/kern/vfs_subr.c, Revision 1.1.1.1

1.1       nbrk        1: /*     $OpenBSD: vfs_subr.c,v 1.155 2007/08/07 04:32:45 beck Exp $     */
                      2: /*     $NetBSD: vfs_subr.c,v 1.53 1996/04/22 01:39:13 christos Exp $   */
                      3:
                      4: /*
                      5:  * Copyright (c) 1989, 1993
                      6:  *     The Regents of the University of California.  All rights reserved.
                      7:  * (c) UNIX System Laboratories, Inc.
                      8:  * All or some portions of this file are derived from material licensed
                      9:  * to the University of California by American Telephone and Telegraph
                     10:  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
                     11:  * the permission of UNIX System Laboratories, Inc.
                     12:  *
                     13:  * Redistribution and use in source and binary forms, with or without
                     14:  * modification, are permitted provided that the following conditions
                     15:  * are met:
                     16:  * 1. Redistributions of source code must retain the above copyright
                     17:  *    notice, this list of conditions and the following disclaimer.
                     18:  * 2. Redistributions in binary form must reproduce the above copyright
                     19:  *    notice, this list of conditions and the following disclaimer in the
                     20:  *    documentation and/or other materials provided with the distribution.
                     21:  * 3. Neither the name of the University nor the names of its contributors
                     22:  *    may be used to endorse or promote products derived from this software
                     23:  *    without specific prior written permission.
                     24:  *
                     25:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     26:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     27:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     28:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     29:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     30:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     31:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     32:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     33:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     34:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     35:  * SUCH DAMAGE.
                     36:  *
                     37:  *     @(#)vfs_subr.c  8.13 (Berkeley) 4/18/94
                     38:  */
                     39:
                     40: /*
                     41:  * External virtual filesystem routines
                     42:  */
                     43:
                     44: #include <sys/param.h>
                     45: #include <sys/systm.h>
                     46: #include <sys/proc.h>
                     47: #include <sys/mount.h>
                     48: #include <sys/time.h>
                     49: #include <sys/fcntl.h>
                     50: #include <sys/kernel.h>
                     51: #include <sys/vnode.h>
                     52: #include <sys/stat.h>
                     53: #include <sys/namei.h>
                     54: #include <sys/ucred.h>
                     55: #include <sys/buf.h>
                     56: #include <sys/errno.h>
                     57: #include <sys/malloc.h>
                     58: #include <sys/domain.h>
                     59: #include <sys/mbuf.h>
                     60: #include <sys/syscallargs.h>
                     61: #include <sys/pool.h>
                     62:
                     63: #include <uvm/uvm_extern.h>
                     64: #include <sys/sysctl.h>
                     65:
                     66: #include <miscfs/specfs/specdev.h>
                     67:
                     68: enum vtype iftovt_tab[16] = {
                     69:        VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
                     70:        VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
                     71: };
                     72:
                     73: int    vttoif_tab[9] = {
                     74:        0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
                     75:        S_IFSOCK, S_IFIFO, S_IFMT,
                     76: };
                     77:
                     78: int doforce = 1;               /* 1 => permit forcible unmounting */
                     79: int prtactive = 0;             /* 1 => print out reclaim of active vnodes */
                     80: int suid_clear = 1;            /* 1 => clear SUID / SGID on owner change */
                     81:
                     82: /*
                     83:  * Insq/Remq for the vnode usage lists.
                     84:  */
                     85: #define        bufinsvn(bp, dp)        LIST_INSERT_HEAD(dp, bp, b_vnbufs)
                     86: #define        bufremvn(bp) {                                                  \
                     87:        LIST_REMOVE(bp, b_vnbufs);                                      \
                     88:        LIST_NEXT(bp, b_vnbufs) = NOLIST;                               \
                     89: }
                     90:
                     91: struct freelst vnode_hold_list;        /* list of vnodes referencing buffers */
                     92: struct freelst vnode_free_list;        /* vnode free list */
                     93:
                     94: struct mntlist mountlist;      /* mounted filesystem list */
                     95:
                     96: void   vclean(struct vnode *, int, struct proc *);
                     97:
                     98: void insmntque(struct vnode *, struct mount *);
                     99: int getdevvp(dev_t, struct vnode **, enum vtype);
                    100:
                    101: int vfs_hang_addrlist(struct mount *, struct netexport *,
                    102:                                  struct export_args *);
                    103: int vfs_free_netcred(struct radix_node *, void *);
                    104: void vfs_free_addrlist(struct netexport *);
                    105: void vputonfreelist(struct vnode *);
                    106:
                    107: int vflush_vnode(struct vnode *, void *);
                    108: int maxvnodes;
                    109:
                    110: #ifdef DEBUG
                    111: void printlockedvnodes(void);
                    112: #endif
                    113:
                    114: struct pool vnode_pool;
                    115:
                    116: /*
                    117:  * Initialize the vnode management data structures.
                    118:  */
                    119: void
                    120: vntblinit(void)
                    121: {
                    122:        /* buffer cache may need a vnode for each buffer */
                    123:        maxvnodes = desiredvnodes;
                    124:        pool_init(&vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodes",
                    125:            &pool_allocator_nointr);
                    126:        TAILQ_INIT(&vnode_hold_list);
                    127:        TAILQ_INIT(&vnode_free_list);
                    128:        CIRCLEQ_INIT(&mountlist);
                    129:        /*
                    130:         * Initialize the filesystem syncer.
                    131:         */
                    132:        vn_initialize_syncerd();
                    133: }
                    134:
                    135: /*
                    136:  * Mark a mount point as busy. Used to synchronize access and to delay
                    137:  * unmounting.
                    138:  *
                    139:  * Default behaviour is to attempt getting a READ lock and in case of an
                    140:  * ongoing unmount, to wait for it to finish and then return failure.
                    141:  */
                    142: int
                    143: vfs_busy(struct mount *mp, int flags)
                    144: {
                    145:        int rwflags = 0;
                    146:
                    147:        /* new mountpoints need their lock initialised */
                    148:        if (mp->mnt_lock.rwl_name == NULL)
                    149:                rw_init(&mp->mnt_lock, "vfslock");
                    150:
                    151:        if (flags & VB_WRITE)
                    152:                rwflags |= RW_WRITE;
                    153:        else
                    154:                rwflags |= RW_READ;
                    155:
                    156:        if (flags & VB_WAIT)
                    157:                rwflags |= RW_SLEEPFAIL;
                    158:        else
                    159:                rwflags |= RW_NOSLEEP;
                    160:
                    161:        if (rw_enter(&mp->mnt_lock, rwflags))
                    162:                return (EBUSY);
                    163:
                    164:        return (0);
                    165: }
                    166:
                    167: /*
                    168:  * Free a busy file system
                    169:  */
                    170: void
                    171: vfs_unbusy(struct mount *mp)
                    172: {
                    173:        rw_exit(&mp->mnt_lock);
                    174: }
                    175:
                    176: int
                    177: vfs_isbusy(struct mount *mp)
                    178: {
                    179:        if (RWLOCK_OWNER(&mp->mnt_lock) > 0)
                    180:                return (1);
                    181:        else
                    182:                return (0);
                    183: }
                    184:
                    185: /*
                    186:  * Lookup a filesystem type, and if found allocate and initialize
                    187:  * a mount structure for it.
                    188:  *
                    189:  * Devname is usually updated by mount(8) after booting.
                    190:  */
                    191: int
                    192: vfs_rootmountalloc(char *fstypename, char *devname, struct mount **mpp)
                    193: {
                    194:        struct vfsconf *vfsp;
                    195:        struct mount *mp;
                    196:
                    197:        for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
                    198:                if (!strcmp(vfsp->vfc_name, fstypename))
                    199:                        break;
                    200:        if (vfsp == NULL)
                    201:                return (ENODEV);
                    202:        mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK);
                    203:        bzero(mp, sizeof(struct mount));
                    204:        (void)vfs_busy(mp, VB_READ|VB_NOWAIT);
                    205:        LIST_INIT(&mp->mnt_vnodelist);
                    206:        mp->mnt_vfc = vfsp;
                    207:        mp->mnt_op = vfsp->vfc_vfsops;
                    208:        mp->mnt_flag = MNT_RDONLY;
                    209:        mp->mnt_vnodecovered = NULLVP;
                    210:        vfsp->vfc_refcount++;
                    211:        mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
                    212:        strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
                    213:        mp->mnt_stat.f_mntonname[0] = '/';
                    214:        (void)copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
                    215:        *mpp = mp;
                    216:        return (0);
                    217:  }
                    218:
                    219: /*
                    220:  * Find an appropriate filesystem to use for the root. If a filesystem
                    221:  * has not been preselected, walk through the list of known filesystems
                    222:  * trying those that have mountroot routines, and try them until one
                    223:  * works or we have tried them all.
                    224:  */
                    225: int
                    226: vfs_mountroot(void)
                    227: {
                    228:        struct vfsconf *vfsp;
                    229:        int error;
                    230:
                    231:        if (mountroot != NULL)
                    232:                return ((*mountroot)());
                    233:        for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
                    234:                if (vfsp->vfc_mountroot == NULL)
                    235:                        continue;
                    236:                if ((error = (*vfsp->vfc_mountroot)()) == 0)
                    237:                        return (0);
                    238:                printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
                    239:        }
                    240:        return (ENODEV);
                    241: }
                    242:
                    243: /*
                    244:  * Lookup a mount point by filesystem identifier.
                    245:  */
                    246: struct mount *
                    247: vfs_getvfs(fsid_t *fsid)
                    248: {
                    249:        struct mount *mp;
                    250:
                    251:        CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
                    252:                if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
                    253:                    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
                    254:                        return (mp);
                    255:                }
                    256:        }
                    257:
                    258:        return (NULL);
                    259: }
                    260:
                    261:
                    262: /*
                    263:  * Get a new unique fsid
                    264:  */
                    265: void
                    266: vfs_getnewfsid(struct mount *mp)
                    267: {
                    268:        static u_short xxxfs_mntid;
                    269:
                    270:        fsid_t tfsid;
                    271:        int mtype;
                    272:
                    273:        mtype = mp->mnt_vfc->vfc_typenum;
                    274:        mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
                    275:        mp->mnt_stat.f_fsid.val[1] = mtype;
                    276:        if (xxxfs_mntid == 0)
                    277:                ++xxxfs_mntid;
                    278:        tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
                    279:        tfsid.val[1] = mtype;
                    280:        if (!CIRCLEQ_EMPTY(&mountlist)) {
                    281:                while (vfs_getvfs(&tfsid)) {
                    282:                        tfsid.val[0]++;
                    283:                        xxxfs_mntid++;
                    284:                }
                    285:        }
                    286:        mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
                    287: }
                    288:
                    289: /*
                    290:  * Make a 'unique' number from a mount type name.
                    291:  * Note that this is no longer used for ffs which
                    292:  * now has an on-disk filesystem id.
                    293:  */
                    294: long
                    295: makefstype(char *type)
                    296: {
                    297:        long rv;
                    298:
                    299:        for (rv = 0; *type; type++) {
                    300:                rv <<= 2;
                    301:                rv ^= *type;
                    302:        }
                    303:        return rv;
                    304: }
                    305:
                    306: /*
                    307:  * Set vnode attributes to VNOVAL
                    308:  */
                    309: void
                    310: vattr_null(struct vattr *vap)
                    311: {
                    312:
                    313:        vap->va_type = VNON;
                    314:        /* XXX These next two used to be one line, but for a GCC bug. */
                    315:        vap->va_size = VNOVAL;
                    316:        vap->va_bytes = VNOVAL;
                    317:        vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
                    318:                vap->va_fsid = vap->va_fileid =
                    319:                vap->va_blocksize = vap->va_rdev =
                    320:                vap->va_atime.tv_sec = vap->va_atime.tv_nsec =
                    321:                vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec =
                    322:                vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec =
                    323:                vap->va_flags = vap->va_gen = VNOVAL;
                    324:        vap->va_vaflags = 0;
                    325: }
                    326:
                    327: /*
                    328:  * Routines having to do with the management of the vnode table.
                    329:  */
                    330: extern int (**dead_vnodeop_p)(void *);
                    331: long numvnodes;
                    332:
                    333: /*
                    334:  * Return the next vnode from the free list.
                    335:  */
                    336: int
                    337: getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *),
                    338:     struct vnode **vpp)
                    339: {
                    340:        struct proc *p = curproc;
                    341:        struct freelst *listhd;
                    342:        static int toggle;
                    343:        struct vnode *vp;
                    344:        int s;
                    345:
                    346:        /*
                    347:         * We must choose whether to allocate a new vnode or recycle an
                    348:         * existing one. The criterion for allocating a new one is that
                    349:         * the total number of vnodes is less than the number desired or
                    350:         * there are no vnodes on either free list. Generally we only
                    351:         * want to recycle vnodes that have no buffers associated with
                    352:         * them, so we look first on the vnode_free_list. If it is empty,
                    353:         * we next consider vnodes with referencing buffers on the
                    354:         * vnode_hold_list. The toggle ensures that half the time we
                    355:         * will use a buffer from the vnode_hold_list, and half the time
                    356:         * we will allocate a new one unless the list has grown to twice
                    357:         * the desired size. We are reticent to recycle vnodes from the
                    358:         * vnode_hold_list because we will lose the identity of all its
                    359:         * referencing buffers.
                    360:         */
                    361:        toggle ^= 1;
                    362:        if (numvnodes > 2 * maxvnodes)
                    363:                toggle = 0;
                    364:
                    365:        s = splbio();
                    366:        if ((numvnodes < maxvnodes) ||
                    367:            ((TAILQ_FIRST(listhd = &vnode_free_list) == NULL) &&
                    368:            ((TAILQ_FIRST(listhd = &vnode_hold_list) == NULL) || toggle))) {
                    369:                splx(s);
                    370:                vp = pool_get(&vnode_pool, PR_WAITOK);
                    371:                bzero((char *)vp, sizeof *vp);
                    372:                numvnodes++;
                    373:        } else {
                    374:                for (vp = TAILQ_FIRST(listhd); vp != NULLVP;
                    375:                    vp = TAILQ_NEXT(vp, v_freelist)) {
                    376:                        if (VOP_ISLOCKED(vp) == 0)
                    377:                                break;
                    378:                }
                    379:                /*
                    380:                 * Unless this is a bad time of the month, at most
                    381:                 * the first NCPUS items on the free list are
                    382:                 * locked, so this is close enough to being empty.
                    383:                 */
                    384:                if (vp == NULL) {
                    385:                        splx(s);
                    386:                        tablefull("vnode");
                    387:                        *vpp = 0;
                    388:                        return (ENFILE);
                    389:                }
                    390:
                    391: #ifdef DIAGNOSTIC
                    392:                if (vp->v_usecount) {
                    393:                        vprint("free vnode", vp);
                    394:                        panic("free vnode isn't");
                    395:                }
                    396: #endif
                    397:
                    398:                TAILQ_REMOVE(listhd, vp, v_freelist);
                    399:                vp->v_bioflag &= ~VBIOONFREELIST;
                    400:                splx(s);
                    401:
                    402:                if (vp->v_type != VBAD)
                    403:                        vgonel(vp, p);
                    404: #ifdef DIAGNOSTIC
                    405:                if (vp->v_data) {
                    406:                        vprint("cleaned vnode", vp);
                    407:                        panic("cleaned vnode isn't");
                    408:                }
                    409:                s = splbio();
                    410:                if (vp->v_numoutput)
                    411:                        panic("Clean vnode has pending I/O's");
                    412:                splx(s);
                    413: #endif
                    414:                vp->v_flag = 0;
                    415:                vp->v_socket = 0;
                    416:        }
                    417:        vp->v_type = VNON;
                    418:        cache_purge(vp);
                    419:        vp->v_tag = tag;
                    420:        vp->v_op = vops;
                    421:        insmntque(vp, mp);
                    422:        *vpp = vp;
                    423:        vp->v_usecount = 1;
                    424:        vp->v_data = 0;
                    425:        simple_lock_init(&vp->v_uvm.u_obj.vmobjlock);
                    426:        return (0);
                    427: }
                    428:
                    429: /*
                    430:  * Move a vnode from one mount queue to another.
                    431:  */
                    432: void
                    433: insmntque(struct vnode *vp, struct mount *mp)
                    434: {
                    435:        /*
                    436:         * Delete from old mount point vnode list, if on one.
                    437:         */
                    438:        if (vp->v_mount != NULL)
                    439:                LIST_REMOVE(vp, v_mntvnodes);
                    440:        /*
                    441:         * Insert into list of vnodes for the new mount point, if available.
                    442:         */
                    443:        if ((vp->v_mount = mp) != NULL)
                    444:                LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
                    445: }
                    446:
                    447: /*
                    448:  * Create a vnode for a block device.
                    449:  * Used for root filesystem, argdev, and swap areas.
                    450:  * Also used for memory file system special devices.
                    451:  */
                    452: int
                    453: bdevvp(dev_t dev, struct vnode **vpp)
                    454: {
                    455:        return (getdevvp(dev, vpp, VBLK));
                    456: }
                    457:
                    458: /*
                    459:  * Create a vnode for a character device.
                    460:  * Used for console handling.
                    461:  */
                    462: int
                    463: cdevvp(dev_t dev, struct vnode **vpp)
                    464: {
                    465:        return (getdevvp(dev, vpp, VCHR));
                    466: }
                    467:
                    468: /*
                    469:  * Create a vnode for a device.
                    470:  * Used by bdevvp (block device) for root file system etc.,
                    471:  * and by cdevvp (character device) for console.
                    472:  */
                    473: int
                    474: getdevvp(dev_t dev, struct vnode **vpp, enum vtype type)
                    475: {
                    476:        struct vnode *vp;
                    477:        struct vnode *nvp;
                    478:        int error;
                    479:
                    480:        if (dev == NODEV) {
                    481:                *vpp = NULLVP;
                    482:                return (0);
                    483:        }
                    484:        error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
                    485:        if (error) {
                    486:                *vpp = NULLVP;
                    487:                return (error);
                    488:        }
                    489:        vp = nvp;
                    490:        vp->v_type = type;
                    491:        if ((nvp = checkalias(vp, dev, NULL)) != 0) {
                    492:                vput(vp);
                    493:                vp = nvp;
                    494:        }
                    495:        *vpp = vp;
                    496:        return (0);
                    497: }
                    498:
                    499: /*
                    500:  * Check to see if the new vnode represents a special device
                    501:  * for which we already have a vnode (either because of
                    502:  * bdevvp() or because of a different vnode representing
                    503:  * the same block device). If such an alias exists, deallocate
                    504:  * the existing contents and return the aliased vnode. The
                    505:  * caller is responsible for filling it with its new contents.
                    506:  */
                    507: struct vnode *
                    508: checkalias(struct vnode *nvp, dev_t nvp_rdev, struct mount *mp)
                    509: {
                    510:        struct proc *p = curproc;
                    511:        struct vnode *vp;
                    512:        struct vnode **vpp;
                    513:
                    514:        if (nvp->v_type != VBLK && nvp->v_type != VCHR)
                    515:                return (NULLVP);
                    516:
                    517:        vpp = &speclisth[SPECHASH(nvp_rdev)];
                    518: loop:
                    519:        for (vp = *vpp; vp; vp = vp->v_specnext) {
                    520:                if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) {
                    521:                        continue;
                    522:                }
                    523:                /*
                    524:                 * Alias, but not in use, so flush it out.
                    525:                 */
                    526:                if (vp->v_usecount == 0) {
                    527:                        vgonel(vp, p);
                    528:                        goto loop;
                    529:                }
                    530:                if (vget(vp, LK_EXCLUSIVE, p)) {
                    531:                        goto loop;
                    532:                }
                    533:                break;
                    534:        }
                    535:
                    536:        /*
                    537:         * Common case is actually in the if statement
                    538:         */
                    539:        if (vp == NULL || !(vp->v_tag == VT_NON && vp->v_type == VBLK)) {
                    540:                MALLOC(nvp->v_specinfo, struct specinfo *,
                    541:                        sizeof(struct specinfo), M_VNODE, M_WAITOK);
                    542:                nvp->v_rdev = nvp_rdev;
                    543:                nvp->v_hashchain = vpp;
                    544:                nvp->v_specnext = *vpp;
                    545:                nvp->v_specmountpoint = NULL;
                    546:                nvp->v_speclockf = NULL;
                    547:                bzero(nvp->v_specbitmap, sizeof(nvp->v_specbitmap));
                    548:                *vpp = nvp;
                    549:                if (vp != NULLVP) {
                    550:                        nvp->v_flag |= VALIASED;
                    551:                        vp->v_flag |= VALIASED;
                    552:                        vput(vp);
                    553:                }
                    554:                return (NULLVP);
                    555:        }
                    556:
                    557:        /*
                    558:         * This code is the uncommon case. It is called in case
                    559:         * we found an alias that was VT_NON && vtype of VBLK
                    560:         * This means we found a block device that was created
                    561:         * using bdevvp.
                    562:         * An example of such a vnode is the root partition device vnode
                    563:         * created in ffs_mountroot.
                    564:         *
                    565:         * The vnodes created by bdevvp should not be aliased (why?).
                    566:         */
                    567:
                    568:        VOP_UNLOCK(vp, 0, p);
                    569:        vclean(vp, 0, p);
                    570:        vp->v_op = nvp->v_op;
                    571:        vp->v_tag = nvp->v_tag;
                    572:        nvp->v_type = VNON;
                    573:        insmntque(vp, mp);
                    574:        return (vp);
                    575: }
                    576:
                    577: /*
                    578:  * Grab a particular vnode from the free list, increment its
                    579:  * reference count and lock it. If the vnode lock bit is set,
                    580:  * the vnode is being eliminated in vgone. In that case, we
                    581:  * cannot grab it, so the process is awakened when the
                    582:  * transition is completed, and an error code is returned to
                    583:  * indicate that the vnode is no longer usable, possibly
                    584:  * having been changed to a new file system type.
                    585:  */
                    586: int
                    587: vget(struct vnode *vp, int flags, struct proc *p)
                    588: {
                    589:        int error, s, onfreelist;
                    590:
                    591:        /*
                    592:         * If the vnode is in the process of being cleaned out for
                    593:         * another use, we wait for the cleaning to finish and then
                    594:         * return failure. Cleaning is determined by checking that
                    595:         * the VXLOCK flag is set.
                    596:         */
                    597:
                    598:        if (vp->v_flag & VXLOCK) {
                    599:                if (flags & LK_NOWAIT) {
                    600:                        return (EBUSY);
                    601:                }
                    602:
                    603:                vp->v_flag |= VXWANT;
                    604:                ltsleep(vp, PINOD | PNORELOCK, "vget", 0, NULL);
                    605:                return (ENOENT);
                    606:        }
                    607:
                    608:        onfreelist = vp->v_bioflag & VBIOONFREELIST;
                    609:        if (vp->v_usecount == 0 && onfreelist) {
                    610:                s = splbio();
                    611:                if (vp->v_holdcnt > 0)
                    612:                        TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
                    613:                else
                    614:                        TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
                    615:                vp->v_bioflag &= ~VBIOONFREELIST;
                    616:                splx(s);
                    617:        }
                    618:
                    619:        vp->v_usecount++;
                    620:        if (flags & LK_TYPE_MASK) {
                    621:                if ((error = vn_lock(vp, flags, p)) != 0) {
                    622:                        vp->v_usecount--;
                    623:                        if (vp->v_usecount == 0 && onfreelist)
                    624:                                vputonfreelist(vp);
                    625:                }
                    626:                return (error);
                    627:        }
                    628:
                    629:        return (0);
                    630: }
                    631:
                    632:
                    633: /* Vnode reference. */
                    634: void
                    635: vref(struct vnode *vp)
                    636: {
                    637: #ifdef DIAGNOSTIC
                    638:        if (vp->v_usecount == 0)
                    639:                panic("vref used where vget required");
                    640: #endif
                    641:        vp->v_usecount++;
                    642: }
                    643:
                    644: void
                    645: vputonfreelist(struct vnode *vp)
                    646: {
                    647:        int s;
                    648:        struct freelst *lst;
                    649:
                    650:        s = splbio();
                    651: #ifdef DIAGNOSTIC
                    652:        if (vp->v_usecount != 0)
                    653:                panic("Use count is not zero!");
                    654:
                    655:        if (vp->v_bioflag & VBIOONFREELIST) {
                    656:                vprint("vnode already on free list: ", vp);
                    657:                panic("vnode already on free list");
                    658:        }
                    659: #endif
                    660:
                    661:        vp->v_bioflag |= VBIOONFREELIST;
                    662:
                    663:        if (vp->v_holdcnt > 0)
                    664:                lst = &vnode_hold_list;
                    665:        else
                    666:                lst = &vnode_free_list;
                    667:
                    668:        if (vp->v_type == VBAD)
                    669:                TAILQ_INSERT_HEAD(lst, vp, v_freelist);
                    670:        else
                    671:                TAILQ_INSERT_TAIL(lst, vp, v_freelist);
                    672:
                    673:        splx(s);
                    674: }
                    675:
                    676: /*
                    677:  * vput(), just unlock and vrele()
                    678:  */
                    679: void
                    680: vput(struct vnode *vp)
                    681: {
                    682:        struct proc *p = curproc;
                    683:
                    684: #ifdef DIAGNOSTIC
                    685:        if (vp == NULL)
                    686:                panic("vput: null vp");
                    687: #endif
                    688:
                    689: #ifdef DIAGNOSTIC
                    690:        if (vp->v_usecount == 0) {
                    691:                vprint("vput: bad ref count", vp);
                    692:                panic("vput: ref cnt");
                    693:        }
                    694: #endif
                    695:        vp->v_usecount--;
                    696:        if (vp->v_usecount > 0) {
                    697:                VOP_UNLOCK(vp, 0, p);
                    698:                return;
                    699:        }
                    700:
                    701: #ifdef DIAGNOSTIC
                    702:        if (vp->v_writecount != 0) {
                    703:                vprint("vput: bad writecount", vp);
                    704:                panic("vput: v_writecount != 0");
                    705:        }
                    706: #endif
                    707:
                    708:        VOP_INACTIVE(vp, p);
                    709:
                    710:        if (vp->v_usecount == 0 && !(vp->v_bioflag & VBIOONFREELIST))
                    711:                vputonfreelist(vp);
                    712: }
                    713:
                    714: /*
                    715:  * Vnode release - use for active VNODES.
                    716:  * If count drops to zero, call inactive routine and return to freelist.
                    717:  */
                    718: void
                    719: vrele(struct vnode *vp)
                    720: {
                    721:        struct proc *p = curproc;
                    722:
                    723: #ifdef DIAGNOSTIC
                    724:        if (vp == NULL)
                    725:                panic("vrele: null vp");
                    726: #endif
                    727: #ifdef DIAGNOSTIC
                    728:        if (vp->v_usecount == 0) {
                    729:                vprint("vrele: bad ref count", vp);
                    730:                panic("vrele: ref cnt");
                    731:        }
                    732: #endif
                    733:        vp->v_usecount--;
                    734:        if (vp->v_usecount > 0) {
                    735:                return;
                    736:        }
                    737:
                    738: #ifdef DIAGNOSTIC
                    739:        if (vp->v_writecount != 0) {
                    740:                vprint("vrele: bad writecount", vp);
                    741:                panic("vrele: v_writecount != 0");
                    742:        }
                    743: #endif
                    744:
                    745:        if (vn_lock(vp, LK_EXCLUSIVE, p)) {
                    746: #ifdef DIAGNOSTIC
                    747:                vprint("vrele: cannot lock", vp);
                    748: #endif
                    749:                return;
                    750:        }
                    751:
                    752:        VOP_INACTIVE(vp, p);
                    753:
                    754:        if (vp->v_usecount == 0 && !(vp->v_bioflag & VBIOONFREELIST))
                    755:                vputonfreelist(vp);
                    756: }
                    757:
                    758: void vhold(struct vnode *vp);
                    759:
                    760: /*
                    761:  * Page or buffer structure gets a reference.
                    762:  */
                    763: void
                    764: vhold(struct vnode *vp)
                    765: {
                    766:        /*
                    767:         * If it is on the freelist and the hold count is currently
                    768:         * zero, move it to the hold list.
                    769:         */
                    770:        if ((vp->v_bioflag & VBIOONFREELIST) &&
                    771:            vp->v_holdcnt == 0 && vp->v_usecount == 0) {
                    772:                TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
                    773:                TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
                    774:        }
                    775:        vp->v_holdcnt++;
                    776: }
                    777:
                    778: /*
                    779:  * Remove any vnodes in the vnode table belonging to mount point mp.
                    780:  *
                    781:  * If MNT_NOFORCE is specified, there should not be any active ones,
                    782:  * return error if any are found (nb: this is a user error, not a
                    783:  * system error). If MNT_FORCE is specified, detach any active vnodes
                    784:  * that are found.
                    785:  */
                    786: #ifdef DEBUG
                    787: int busyprt = 0;       /* print out busy vnodes */
                    788: struct ctldebug debug1 = { "busyprt", &busyprt };
                    789: #endif
                    790:
                    791: int
                    792: vfs_mount_foreach_vnode(struct mount *mp,
                    793:     int (*func)(struct vnode *, void *), void *arg) {
                    794:        struct vnode *vp, *nvp;
                    795:        int error = 0;
                    796:
                    797: loop:
                    798:        for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp != NULL; vp = nvp) {
                    799:                if (vp->v_mount != mp)
                    800:                        goto loop;
                    801:                nvp = LIST_NEXT(vp, v_mntvnodes);
                    802:
                    803:                error = func(vp, arg);
                    804:
                    805:                if (error != 0)
                    806:                        break;
                    807:        }
                    808:
                    809:        return (error);
                    810: }
                    811:
                    812: struct vflush_args {
                    813:        struct vnode *skipvp;
                    814:        int busy;
                    815:        int flags;
                    816: };
                    817:
                    818: int
                    819: vflush_vnode(struct vnode *vp, void *arg) {
                    820:        struct vflush_args *va = arg;
                    821:        struct proc *p = curproc;
                    822:
                    823:        if (vp == va->skipvp) {
                    824:                return (0);
                    825:        }
                    826:
                    827:        if ((va->flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
                    828:                return (0);
                    829:        }
                    830:
                    831:        /*
                    832:         * If WRITECLOSE is set, only flush out regular file
                    833:         * vnodes open for writing.
                    834:         */
                    835:        if ((va->flags & WRITECLOSE) &&
                    836:            (vp->v_writecount == 0 || vp->v_type != VREG)) {
                    837:                return (0);
                    838:        }
                    839:
                    840:        /*
                    841:         * With v_usecount == 0, all we need to do is clear
                    842:         * out the vnode data structures and we are done.
                    843:         */
                    844:        if (vp->v_usecount == 0) {
                    845:                vgonel(vp, p);
                    846:                return (0);
                    847:        }
                    848:
                    849:        /*
                    850:         * If FORCECLOSE is set, forcibly close the vnode.
                    851:         * For block or character devices, revert to an
                    852:         * anonymous device. For all other files, just kill them.
                    853:         */
                    854:        if (va->flags & FORCECLOSE) {
                    855:                if (vp->v_type != VBLK && vp->v_type != VCHR) {
                    856:                        vgonel(vp, p);
                    857:                } else {
                    858:                        vclean(vp, 0, p);
                    859:                        vp->v_op = spec_vnodeop_p;
                    860:                        insmntque(vp, (struct mount *)0);
                    861:                }
                    862:                return (0);
                    863:        }
                    864:
                    865: #ifdef DEBUG
                    866:        if (busyprt)
                    867:                vprint("vflush: busy vnode", vp);
                    868: #endif
                    869:        va->busy++;
                    870:        return (0);
                    871: }
                    872:
                    873: int
                    874: vflush(struct mount *mp, struct vnode *skipvp, int flags)
                    875: {
                    876:        struct vflush_args va;
                    877:        va.skipvp = skipvp;
                    878:        va.busy = 0;
                    879:        va.flags = flags;
                    880:
                    881:        vfs_mount_foreach_vnode(mp, vflush_vnode, &va);
                    882:
                    883:        if (va.busy)
                    884:                return (EBUSY);
                    885:        return (0);
                    886: }
                    887:
                    888: /*
                    889:  * Disassociate the underlying file system from a vnode.
                    890:  */
                    891: void
                    892: vclean(struct vnode *vp, int flags, struct proc *p)
                    893: {
                    894:        int active;
                    895:
                    896:        /*
                    897:         * Check to see if the vnode is in use.
                    898:         * If so we have to reference it before we clean it out
                    899:         * so that its count cannot fall to zero and generate a
                    900:         * race against ourselves to recycle it.
                    901:         */
                    902:        if ((active = vp->v_usecount) != 0)
                    903:                vp->v_usecount++;
                    904:
                    905:        /*
                    906:         * Prevent the vnode from being recycled or
                    907:         * brought into use while we clean it out.
                    908:         */
                    909:        if (vp->v_flag & VXLOCK)
                    910:                panic("vclean: deadlock");
                    911:        vp->v_flag |= VXLOCK;
                    912:        /*
                    913:         * Even if the count is zero, the VOP_INACTIVE routine may still
                    914:         * have the object locked while it cleans it out. The VOP_LOCK
                    915:         * ensures that the VOP_INACTIVE routine is done with its work.
                    916:         * For active vnodes, it ensures that no other activity can
                    917:         * occur while the underlying object is being cleaned out.
                    918:         */
                    919:        VOP_LOCK(vp, LK_DRAIN, p);
                    920:
                    921:        /*
                    922:         * Clean out any VM data associated with the vnode.
                    923:         */
                    924:        uvm_vnp_terminate(vp);
                    925:        /*
                    926:         * Clean out any buffers associated with the vnode.
                    927:         */
                    928:        if (flags & DOCLOSE)
                    929:                vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
                    930:        /*
                    931:         * If purging an active vnode, it must be closed and
                    932:         * deactivated before being reclaimed. Note that the
                    933:         * VOP_INACTIVE will unlock the vnode
                    934:         */
                    935:        if (active) {
                    936:                if (flags & DOCLOSE)
                    937:                        VOP_CLOSE(vp, FNONBLOCK, NOCRED, p);
                    938:                VOP_INACTIVE(vp, p);
                    939:        } else {
                    940:                /*
                    941:                 * Any other processes trying to obtain this lock must first
                    942:                 * wait for VXLOCK to clear, then call the new lock operation.
                    943:                 */
                    944:                VOP_UNLOCK(vp, 0, p);
                    945:        }
                    946:
                    947:        /*
                    948:         * Reclaim the vnode.
                    949:         */
                    950:        if (VOP_RECLAIM(vp, p))
                    951:                panic("vclean: cannot reclaim");
                    952:        if (active) {
                    953:                vp->v_usecount--;
                    954:                if (vp->v_usecount == 0) {
                    955:                        if (vp->v_holdcnt > 0)
                    956:                                panic("vclean: not clean");
                    957:                        vputonfreelist(vp);
                    958:                }
                    959:        }
                    960:        cache_purge(vp);
                    961:
                    962:        /*
                    963:         * Done with purge, notify sleepers of the grim news.
                    964:         */
                    965:        vp->v_op = dead_vnodeop_p;
                    966:        VN_KNOTE(vp, NOTE_REVOKE);
                    967:        vp->v_tag = VT_NON;
                    968:        vp->v_flag &= ~VXLOCK;
                    969: #ifdef VFSDEBUG
                    970:        vp->v_flag &= ~VLOCKSWORK;
                    971: #endif
                    972:        if (vp->v_flag & VXWANT) {
                    973:                vp->v_flag &= ~VXWANT;
                    974:                wakeup(vp);
                    975:        }
                    976: }
                    977:
                    978: /*
                    979:  * Recycle an unused vnode to the front of the free list.
                    980:  */
                    981: int
                    982: vrecycle(struct vnode *vp, struct proc *p)
                    983: {
                    984:        if (vp->v_usecount == 0) {
                    985:                vgonel(vp, p);
                    986:                return (1);
                    987:        }
                    988:        return (0);
                    989: }
                    990:
                    991: /*
                    992:  * Eliminate all activity associated with a vnode
                    993:  * in preparation for reuse.
                    994:  */
                    995: void
                    996: vgone(struct vnode *vp)
                    997: {
                    998:        struct proc *p = curproc;
                    999:        vgonel(vp, p);
                   1000: }
                   1001:
                   1002: /*
                   1003:  * vgone, with struct proc.
                   1004:  */
                   1005: void
                   1006: vgonel(struct vnode *vp, struct proc *p)
                   1007: {
                   1008:        struct vnode *vq;
                   1009:        struct vnode *vx;
                   1010:        struct mount *mp;
                   1011:        int flags;
                   1012:
                   1013:        /*
                   1014:         * If a vgone (or vclean) is already in progress,
                   1015:         * wait until it is done and return.
                   1016:         */
                   1017:        if (vp->v_flag & VXLOCK) {
                   1018:                vp->v_flag |= VXWANT;
                   1019:                ltsleep(vp, PINOD | PNORELOCK, "vgone", 0, NULL);
                   1020:                return;
                   1021:        }
                   1022:
                   1023:        /*
                   1024:         * Clean out the filesystem specific data.
                   1025:         */
                   1026:        vclean(vp, DOCLOSE, p);
                   1027:        /*
                   1028:         * Delete from old mount point vnode list, if on one.
                   1029:         */
                   1030:        if (vp->v_mount != NULL)
                   1031:                insmntque(vp, (struct mount *)0);
                   1032:        /*
                   1033:         * If special device, remove it from special device alias list
                   1034:         * if it is on one.
                   1035:         */
                   1036:        if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
                   1037:                if (*vp->v_hashchain == vp) {
                   1038:                        *vp->v_hashchain = vp->v_specnext;
                   1039:                } else {
                   1040:                        for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
                   1041:                                if (vq->v_specnext != vp)
                   1042:                                        continue;
                   1043:                                vq->v_specnext = vp->v_specnext;
                   1044:                                break;
                   1045:                        }
                   1046:                        if (vq == NULL)
                   1047:                                panic("missing bdev");
                   1048:                }
                   1049:                if (vp->v_flag & VALIASED) {
                   1050:                        vx = NULL;
                   1051:                        for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
                   1052:                                if (vq->v_rdev != vp->v_rdev ||
                   1053:                                    vq->v_type != vp->v_type)
                   1054:                                        continue;
                   1055:                                if (vx)
                   1056:                                        break;
                   1057:                                vx = vq;
                   1058:                        }
                   1059:                        if (vx == NULL)
                   1060:                                panic("missing alias");
                   1061:                        if (vq == NULL)
                   1062:                                vx->v_flag &= ~VALIASED;
                   1063:                        vp->v_flag &= ~VALIASED;
                   1064:                }
                   1065:
                   1066:                /*
                   1067:                 * If we have a mount point associated with the vnode, we must
                   1068:                 * flush it out now, as to not leave a dangling zombie mount
                   1069:                 * point laying around in VFS.
                   1070:                 */
                   1071:                mp = vp->v_specmountpoint;
                   1072:                if (mp != NULL) {
                   1073:                        if (!vfs_busy(mp, VB_WRITE|VB_WAIT)) {
                   1074:                                flags = MNT_FORCE | MNT_DOOMED;
                   1075:                                dounmount(mp, flags, p, NULL);
                   1076:                        }
                   1077:                }
                   1078:
                   1079:                FREE(vp->v_specinfo, M_VNODE);
                   1080:                vp->v_specinfo = NULL;
                   1081:        }
                   1082:        /*
                   1083:         * If it is on the freelist and not already at the head,
                   1084:         * move it to the head of the list.
                   1085:         */
                   1086:        vp->v_type = VBAD;
                   1087:
                   1088:        /*
                   1089:         * Move onto the free list, unless we were called from
                   1090:         * getnewvnode and we're not on any free list
                   1091:         */
                   1092:        if (vp->v_usecount == 0 &&
                   1093:            (vp->v_bioflag & VBIOONFREELIST)) {
                   1094:                int s;
                   1095:
                   1096:                s = splbio();
                   1097:
                   1098:                if (vp->v_holdcnt > 0)
                   1099:                        panic("vgonel: not clean");
                   1100:
                   1101:                if (TAILQ_FIRST(&vnode_free_list) != vp) {
                   1102:                        TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
                   1103:                        TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
                   1104:                }
                   1105:                splx(s);
                   1106:        }
                   1107: }
                   1108:
                   1109: /*
                   1110:  * Lookup a vnode by device number.
                   1111:  */
                   1112: int
                   1113: vfinddev(dev_t dev, enum vtype type, struct vnode **vpp)
                   1114: {
                   1115:        struct vnode *vp;
                   1116:        int rc =0;
                   1117:
                   1118:        for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
                   1119:                if (dev != vp->v_rdev || type != vp->v_type)
                   1120:                        continue;
                   1121:                *vpp = vp;
                   1122:                rc = 1;
                   1123:                break;
                   1124:        }
                   1125:        return (rc);
                   1126: }
                   1127:
                   1128: /*
                   1129:  * Revoke all the vnodes corresponding to the specified minor number
                   1130:  * range (endpoints inclusive) of the specified major.
                   1131:  */
                   1132: void
                   1133: vdevgone(int maj, int minl, int minh, enum vtype type)
                   1134: {
                   1135:        struct vnode *vp;
                   1136:        int mn;
                   1137:
                   1138:        for (mn = minl; mn <= minh; mn++)
                   1139:                if (vfinddev(makedev(maj, mn), type, &vp))
                   1140:                        VOP_REVOKE(vp, REVOKEALL);
                   1141: }
                   1142:
                   1143: /*
                   1144:  * Calculate the total number of references to a special device.
                   1145:  */
                   1146: int
                   1147: vcount(struct vnode *vp)
                   1148: {
                   1149:        struct vnode *vq, *vnext;
                   1150:        int count;
                   1151:
                   1152: loop:
                   1153:        if ((vp->v_flag & VALIASED) == 0)
                   1154:                return (vp->v_usecount);
                   1155:        for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
                   1156:                vnext = vq->v_specnext;
                   1157:                if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
                   1158:                        continue;
                   1159:                /*
                   1160:                 * Alias, but not in use, so flush it out.
                   1161:                 */
                   1162:                if (vq->v_usecount == 0 && vq != vp) {
                   1163:                        vgone(vq);
                   1164:                        goto loop;
                   1165:                }
                   1166:                count += vq->v_usecount;
                   1167:        }
                   1168:        return (count);
                   1169: }
                   1170:
                   1171: #if defined(DEBUG) || defined(DIAGNOSTIC)
                   1172: /*
                   1173:  * Print out a description of a vnode.
                   1174:  */
                   1175: static char *typename[] =
                   1176:    { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
                   1177:
                   1178: void
                   1179: vprint(char *label, struct vnode *vp)
                   1180: {
                   1181:        char buf[64];
                   1182:
                   1183:        if (label != NULL)
                   1184:                printf("%s: ", label);
                   1185:        printf("%p, type %s, use %u, write %u, hold %u,",
                   1186:                vp, typename[vp->v_type], vp->v_usecount, vp->v_writecount,
                   1187:                vp->v_holdcnt);
                   1188:        buf[0] = '\0';
                   1189:        if (vp->v_flag & VROOT)
                   1190:                strlcat(buf, "|VROOT", sizeof buf);
                   1191:        if (vp->v_flag & VTEXT)
                   1192:                strlcat(buf, "|VTEXT", sizeof buf);
                   1193:        if (vp->v_flag & VSYSTEM)
                   1194:                strlcat(buf, "|VSYSTEM", sizeof buf);
                   1195:        if (vp->v_flag & VXLOCK)
                   1196:                strlcat(buf, "|VXLOCK", sizeof buf);
                   1197:        if (vp->v_flag & VXWANT)
                   1198:                strlcat(buf, "|VXWANT", sizeof buf);
                   1199:        if (vp->v_bioflag & VBIOWAIT)
                   1200:                strlcat(buf, "|VBIOWAIT", sizeof buf);
                   1201:        if (vp->v_bioflag & VBIOONFREELIST)
                   1202:                strlcat(buf, "|VBIOONFREELIST", sizeof buf);
                   1203:        if (vp->v_bioflag & VBIOONSYNCLIST)
                   1204:                strlcat(buf, "|VBIOONSYNCLIST", sizeof buf);
                   1205:        if (vp->v_flag & VALIASED)
                   1206:                strlcat(buf, "|VALIASED", sizeof buf);
                   1207:        if (buf[0] != '\0')
                   1208:                printf(" flags (%s)", &buf[1]);
                   1209:        if (vp->v_data == NULL) {
                   1210:                printf("\n");
                   1211:        } else {
                   1212:                printf("\n\t");
                   1213:                VOP_PRINT(vp);
                   1214:        }
                   1215: }
                   1216: #endif /* DEBUG || DIAGNOSTIC */
                   1217:
                   1218: #ifdef DEBUG
                   1219: /*
                   1220:  * List all of the locked vnodes in the system.
                   1221:  * Called when debugging the kernel.
                   1222:  */
                   1223: void
                   1224: printlockedvnodes(void)
                   1225: {
                   1226:        struct mount *mp, *nmp;
                   1227:        struct vnode *vp;
                   1228:
                   1229:        printf("Locked vnodes\n");
                   1230:
                   1231:        for (mp = CIRCLEQ_FIRST(&mountlist); mp != CIRCLEQ_END(&mountlist);
                   1232:            mp = nmp) {
                   1233:                if (vfs_busy(mp, VB_READ|VB_NOWAIT)) {
                   1234:                        nmp = CIRCLEQ_NEXT(mp, mnt_list);
                   1235:                        continue;
                   1236:                }
                   1237:                LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
                   1238:                        if (VOP_ISLOCKED(vp))
                   1239:                                vprint((char *)0, vp);
                   1240:                }
                   1241:                nmp = CIRCLEQ_NEXT(mp, mnt_list);
                   1242:                vfs_unbusy(mp);
                   1243:        }
                   1244:
                   1245: }
                   1246: #endif
                   1247:
                   1248: /*
                   1249:  * Top level filesystem related information gathering.
                   1250:  */
                   1251: int
                   1252: vfs_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
                   1253:     size_t newlen, struct proc *p)
                   1254: {
                   1255:        struct vfsconf *vfsp, *tmpvfsp;
                   1256:        int ret;
                   1257:
                   1258:        /* all sysctl names at this level are at least name and field */
                   1259:        if (namelen < 2)
                   1260:                return (ENOTDIR);               /* overloaded */
                   1261:
                   1262:        if (name[0] != VFS_GENERIC) {
                   1263:                for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
                   1264:                        if (vfsp->vfc_typenum == name[0])
                   1265:                                break;
                   1266:
                   1267:                if (vfsp == NULL)
                   1268:                        return (EOPNOTSUPP);
                   1269:
                   1270:                return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
                   1271:                    oldp, oldlenp, newp, newlen, p));
                   1272:        }
                   1273:
                   1274:        switch (name[1]) {
                   1275:        case VFS_MAXTYPENUM:
                   1276:                return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf));
                   1277:
                   1278:        case VFS_CONF:
                   1279:                if (namelen < 3)
                   1280:                        return (ENOTDIR);       /* overloaded */
                   1281:
                   1282:                for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
                   1283:                        if (vfsp->vfc_typenum == name[2])
                   1284:                                break;
                   1285:
                   1286:                if (vfsp == NULL)
                   1287:                        return (EOPNOTSUPP);
                   1288:
                   1289:                /* Make a copy, clear out kernel pointers */
                   1290:                tmpvfsp = malloc(sizeof(*tmpvfsp), M_TEMP, M_WAITOK);
                   1291:                bcopy(vfsp, tmpvfsp, sizeof(*tmpvfsp));
                   1292:                tmpvfsp->vfc_vfsops = NULL;
                   1293:                tmpvfsp->vfc_mountroot = NULL;
                   1294:                tmpvfsp->vfc_next = NULL;
                   1295:
                   1296:                ret = sysctl_rdstruct(oldp, oldlenp, newp, tmpvfsp,
                   1297:                    sizeof(struct vfsconf));
                   1298:
                   1299:                free(tmpvfsp, M_TEMP);
                   1300:                return (ret);
                   1301:        }
                   1302:
                   1303:        return (EOPNOTSUPP);
                   1304: }
                   1305:
                   1306: int kinfo_vdebug = 1;
                   1307: #define KINFO_VNODESLOP        10
                   1308: /*
                   1309:  * Dump vnode list (via sysctl).
                   1310:  * Copyout address of vnode followed by vnode.
                   1311:  */
                   1312: /* ARGSUSED */
                   1313: int
                   1314: sysctl_vnode(char *where, size_t *sizep, struct proc *p)
                   1315: {
                   1316:        struct mount *mp, *nmp;
                   1317:        struct vnode *vp, *nvp;
                   1318:        char *bp = where, *savebp;
                   1319:        char *ewhere;
                   1320:        int error;
                   1321:
                   1322:        if (where == NULL) {
                   1323:                *sizep = (numvnodes + KINFO_VNODESLOP) * sizeof(struct e_vnode);
                   1324:                return (0);
                   1325:        }
                   1326:        ewhere = where + *sizep;
                   1327:
                   1328:        for (mp = CIRCLEQ_FIRST(&mountlist); mp != CIRCLEQ_END(&mountlist);
                   1329:            mp = nmp) {
                   1330:                if (vfs_busy(mp, VB_READ|VB_NOWAIT)) {
                   1331:                        nmp = CIRCLEQ_NEXT(mp, mnt_list);
                   1332:                        continue;
                   1333:                }
                   1334:                savebp = bp;
                   1335: again:
                   1336:                for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp != NULL;
                   1337:                    vp = nvp) {
                   1338:                        /*
                   1339:                         * Check that the vp is still associated with
                   1340:                         * this filesystem.  RACE: could have been
                   1341:                         * recycled onto the same filesystem.
                   1342:                         */
                   1343:                        if (vp->v_mount != mp) {
                   1344:                                if (kinfo_vdebug)
                   1345:                                        printf("kinfo: vp changed\n");
                   1346:                                bp = savebp;
                   1347:                                goto again;
                   1348:                        }
                   1349:                        nvp = LIST_NEXT(vp, v_mntvnodes);
                   1350:                        if (bp + sizeof(struct e_vnode) > ewhere) {
                   1351:                                *sizep = bp - where;
                   1352:                                vfs_unbusy(mp);
                   1353:                                return (ENOMEM);
                   1354:                        }
                   1355:                        if ((error = copyout(&vp,
                   1356:                            &((struct e_vnode *)bp)->vptr,
                   1357:                            sizeof(struct vnode *))) ||
                   1358:                           (error = copyout(vp,
                   1359:                            &((struct e_vnode *)bp)->vnode,
                   1360:                            sizeof(struct vnode)))) {
                   1361:                                vfs_unbusy(mp);
                   1362:                                return (error);
                   1363:                        }
                   1364:                        bp += sizeof(struct e_vnode);
                   1365:                }
                   1366:
                   1367:                nmp = CIRCLEQ_NEXT(mp, mnt_list);
                   1368:                vfs_unbusy(mp);
                   1369:        }
                   1370:
                   1371:        *sizep = bp - where;
                   1372:
                   1373:        return (0);
                   1374: }
                   1375:
                   1376: /*
                   1377:  * Check to see if a filesystem is mounted on a block device.
                   1378:  */
                   1379: int
                   1380: vfs_mountedon(struct vnode *vp)
                   1381: {
                   1382:        struct vnode *vq;
                   1383:        int error = 0;
                   1384:
                   1385:        if (vp->v_specmountpoint != NULL)
                   1386:                return (EBUSY);
                   1387:        if (vp->v_flag & VALIASED) {
                   1388:                for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
                   1389:                        if (vq->v_rdev != vp->v_rdev ||
                   1390:                            vq->v_type != vp->v_type)
                   1391:                                continue;
                   1392:                        if (vq->v_specmountpoint != NULL) {
                   1393:                                error = EBUSY;
                   1394:                                break;
                   1395:                        }
                   1396:                }
                   1397:        }
                   1398:        return (error);
                   1399: }
                   1400:
                   1401: /*
                   1402:  * Build hash lists of net addresses and hang them off the mount point.
                   1403:  * Called by ufs_mount() to set up the lists of export addresses.
                   1404:  */
                   1405: int
                   1406: vfs_hang_addrlist(struct mount *mp, struct netexport *nep,
                   1407:     struct export_args *argp)
                   1408: {
                   1409:        struct netcred *np;
                   1410:        struct radix_node_head *rnh;
                   1411:        int i;
                   1412:        struct radix_node *rn;
                   1413:        struct sockaddr *saddr, *smask = 0;
                   1414:        struct domain *dom;
                   1415:        int error;
                   1416:
                   1417:        if (argp->ex_addrlen == 0) {
                   1418:                if (mp->mnt_flag & MNT_DEFEXPORTED)
                   1419:                        return (EPERM);
                   1420:                np = &nep->ne_defexported;
                   1421:                np->netc_exflags = argp->ex_flags;
                   1422:                np->netc_anon = argp->ex_anon;
                   1423:                np->netc_anon.cr_ref = 1;
                   1424:                mp->mnt_flag |= MNT_DEFEXPORTED;
                   1425:                return (0);
                   1426:        }
                   1427:        if (argp->ex_addrlen > MLEN || argp->ex_masklen > MLEN ||
                   1428:            argp->ex_addrlen < 0 || argp->ex_masklen < 0)
                   1429:                return (EINVAL);
                   1430:        i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
                   1431:        np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
                   1432:        bzero(np, i);
                   1433:        saddr = (struct sockaddr *)(np + 1);
                   1434:        error = copyin(argp->ex_addr, saddr, argp->ex_addrlen);
                   1435:        if (error)
                   1436:                goto out;
                   1437:        if (saddr->sa_len > argp->ex_addrlen)
                   1438:                saddr->sa_len = argp->ex_addrlen;
                   1439:        if (argp->ex_masklen) {
                   1440:                smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
                   1441:                error = copyin(argp->ex_mask, smask, argp->ex_masklen);
                   1442:                if (error)
                   1443:                        goto out;
                   1444:                if (smask->sa_len > argp->ex_masklen)
                   1445:                        smask->sa_len = argp->ex_masklen;
                   1446:        }
                   1447:        i = saddr->sa_family;
                   1448:        if (i < 0 || i > AF_MAX) {
                   1449:                error = EINVAL;
                   1450:                goto out;
                   1451:        }
                   1452:        if ((rnh = nep->ne_rtable[i]) == 0) {
                   1453:                /*
                   1454:                 * Seems silly to initialize every AF when most are not
                   1455:                 * used, do so on demand here
                   1456:                 */
                   1457:                for (dom = domains; dom; dom = dom->dom_next)
                   1458:                        if (dom->dom_family == i && dom->dom_rtattach) {
                   1459:                                dom->dom_rtattach((void **)&nep->ne_rtable[i],
                   1460:                                        dom->dom_rtoffset);
                   1461:                                break;
                   1462:                        }
                   1463:                if ((rnh = nep->ne_rtable[i]) == 0) {
                   1464:                        error = ENOBUFS;
                   1465:                        goto out;
                   1466:                }
                   1467:        }
                   1468:        rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
                   1469:                np->netc_rnodes);
                   1470:        if (rn == 0 || np != (struct netcred *)rn) { /* already exists */
                   1471:                error = EPERM;
                   1472:                goto out;
                   1473:        }
                   1474:        np->netc_exflags = argp->ex_flags;
                   1475:        np->netc_anon = argp->ex_anon;
                   1476:        np->netc_anon.cr_ref = 1;
                   1477:        return (0);
                   1478: out:
                   1479:        free(np, M_NETADDR);
                   1480:        return (error);
                   1481: }
                   1482:
                   1483: /* ARGSUSED */
                   1484: int
                   1485: vfs_free_netcred(struct radix_node *rn, void *w)
                   1486: {
                   1487:        struct radix_node_head *rnh = (struct radix_node_head *)w;
                   1488:
                   1489:        (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh, NULL);
                   1490:        free(rn, M_NETADDR);
                   1491:        return (0);
                   1492: }
                   1493:
                   1494: /*
                   1495:  * Free the net address hash lists that are hanging off the mount points.
                   1496:  */
                   1497: void
                   1498: vfs_free_addrlist(struct netexport *nep)
                   1499: {
                   1500:        int i;
                   1501:        struct radix_node_head *rnh;
                   1502:
                   1503:        for (i = 0; i <= AF_MAX; i++)
                   1504:                if ((rnh = nep->ne_rtable[i]) != NULL) {
                   1505:                        (*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh);
                   1506:                        free(rnh, M_RTABLE);
                   1507:                        nep->ne_rtable[i] = 0;
                   1508:                }
                   1509: }
                   1510:
                   1511: int
                   1512: vfs_export(struct mount *mp, struct netexport *nep, struct export_args *argp)
                   1513: {
                   1514:        int error;
                   1515:
                   1516:        if (argp->ex_flags & MNT_DELEXPORT) {
                   1517:                vfs_free_addrlist(nep);
                   1518:                mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
                   1519:        }
                   1520:        if (argp->ex_flags & MNT_EXPORTED) {
                   1521:                if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0)
                   1522:                        return (error);
                   1523:                mp->mnt_flag |= MNT_EXPORTED;
                   1524:        }
                   1525:        return (0);
                   1526: }
                   1527:
                   1528: struct netcred *
                   1529: vfs_export_lookup(struct mount *mp, struct netexport *nep, struct mbuf *nam)
                   1530: {
                   1531:        struct netcred *np;
                   1532:        struct radix_node_head *rnh;
                   1533:        struct sockaddr *saddr;
                   1534:
                   1535:        np = NULL;
                   1536:        if (mp->mnt_flag & MNT_EXPORTED) {
                   1537:                /*
                   1538:                 * Lookup in the export list first.
                   1539:                 */
                   1540:                if (nam != NULL) {
                   1541:                        saddr = mtod(nam, struct sockaddr *);
                   1542:                        rnh = nep->ne_rtable[saddr->sa_family];
                   1543:                        if (rnh != NULL) {
                   1544:                                np = (struct netcred *)
                   1545:                                        (*rnh->rnh_matchaddr)((caddr_t)saddr,
                   1546:                                            rnh);
                   1547:                                if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
                   1548:                                        np = NULL;
                   1549:                        }
                   1550:                }
                   1551:                /*
                   1552:                 * If no address match, use the default if it exists.
                   1553:                 */
                   1554:                if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
                   1555:                        np = &nep->ne_defexported;
                   1556:        }
                   1557:        return (np);
                   1558: }
                   1559:
                   1560: /*
                   1561:  * Do the usual access checking.
                   1562:  * file_mode, uid and gid are from the vnode in question,
                   1563:  * while acc_mode and cred are from the VOP_ACCESS parameter list
                   1564:  */
                   1565: int
                   1566: vaccess(mode_t file_mode, uid_t uid, gid_t gid, mode_t acc_mode,
                   1567:     struct ucred *cred)
                   1568: {
                   1569:        mode_t mask;
                   1570:
                   1571:        /* User id 0 always gets access. */
                   1572:        if (cred->cr_uid == 0)
                   1573:                return 0;
                   1574:
                   1575:        mask = 0;
                   1576:
                   1577:        /* Otherwise, check the owner. */
                   1578:        if (cred->cr_uid == uid) {
                   1579:                if (acc_mode & VEXEC)
                   1580:                        mask |= S_IXUSR;
                   1581:                if (acc_mode & VREAD)
                   1582:                        mask |= S_IRUSR;
                   1583:                if (acc_mode & VWRITE)
                   1584:                        mask |= S_IWUSR;
                   1585:                return (file_mode & mask) == mask ? 0 : EACCES;
                   1586:        }
                   1587:
                   1588:        /* Otherwise, check the groups. */
                   1589:        if (cred->cr_gid == gid || groupmember(gid, cred)) {
                   1590:                if (acc_mode & VEXEC)
                   1591:                        mask |= S_IXGRP;
                   1592:                if (acc_mode & VREAD)
                   1593:                        mask |= S_IRGRP;
                   1594:                if (acc_mode & VWRITE)
                   1595:                        mask |= S_IWGRP;
                   1596:                return (file_mode & mask) == mask ? 0 : EACCES;
                   1597:        }
                   1598:
                   1599:        /* Otherwise, check everyone else. */
                   1600:        if (acc_mode & VEXEC)
                   1601:                mask |= S_IXOTH;
                   1602:        if (acc_mode & VREAD)
                   1603:                mask |= S_IROTH;
                   1604:        if (acc_mode & VWRITE)
                   1605:                mask |= S_IWOTH;
                   1606:        return (file_mode & mask) == mask ? 0 : EACCES;
                   1607: }
                   1608:
                   1609: /*
                   1610:  * Unmount all file systems.
                   1611:  * We traverse the list in reverse order under the assumption that doing so
                   1612:  * will avoid needing to worry about dependencies.
                   1613:  */
                   1614: void
                   1615: vfs_unmountall(void)
                   1616: {
                   1617:        struct mount *mp, *nmp;
                   1618:        int allerror, error, again = 1;
                   1619:
                   1620:  retry:
                   1621:        allerror = 0;
                   1622:        for (mp = CIRCLEQ_LAST(&mountlist); mp != CIRCLEQ_END(&mountlist);
                   1623:            mp = nmp) {
                   1624:                nmp = CIRCLEQ_PREV(mp, mnt_list);
                   1625:                if ((vfs_busy(mp, VB_WRITE|VB_NOWAIT)) != 0)
                   1626:                        continue;
                   1627:                if ((error = dounmount(mp, MNT_FORCE, curproc, NULL)) != 0) {
                   1628:                        printf("unmount of %s failed with error %d\n",
                   1629:                            mp->mnt_stat.f_mntonname, error);
                   1630:                        allerror = 1;
                   1631:                }
                   1632:        }
                   1633:
                   1634:        if (allerror) {
                   1635:                printf("WARNING: some file systems would not unmount\n");
                   1636:                if (again) {
                   1637:                        printf("retrying\n");
                   1638:                        again = 0;
                   1639:                        goto retry;
                   1640:                }
                   1641:        }
                   1642: }
                   1643:
                   1644: /*
                   1645:  * Sync and unmount file systems before shutting down.
                   1646:  */
                   1647: void
                   1648: vfs_shutdown(void)
                   1649: {
                   1650: #ifdef ACCOUNTING
                   1651:        extern void acct_shutdown(void);
                   1652:
                   1653:        acct_shutdown();
                   1654: #endif
                   1655:
                   1656:        /* XXX Should suspend scheduling. */
                   1657:        (void) spl0();
                   1658:
                   1659:        printf("syncing disks... ");
                   1660:
                   1661:        if (panicstr == 0) {
                   1662:                /* Sync before unmount, in case we hang on something. */
                   1663:                sys_sync(&proc0, (void *)0, (register_t *)0);
                   1664:
                   1665:                /* Unmount file systems. */
                   1666:                vfs_unmountall();
                   1667:        }
                   1668:
                   1669:        if (vfs_syncwait(1))
                   1670:                printf("giving up\n");
                   1671:        else
                   1672:                printf("done\n");
                   1673: }
                   1674:
                   1675: /*
                   1676:  * perform sync() operation and wait for buffers to flush.
                   1677:  * assumtions: called w/ scheduler disabled and physical io enabled
                   1678:  * for now called at spl0() XXX
                   1679:  */
                   1680: int
                   1681: vfs_syncwait(int verbose)
                   1682: {
                   1683:        struct buf *bp;
                   1684:        int iter, nbusy, dcount, s;
                   1685:        struct proc *p;
                   1686:
                   1687:        p = curproc? curproc : &proc0;
                   1688:        sys_sync(p, (void *)0, (register_t *)0);
                   1689:
                   1690:        /* Wait for sync to finish. */
                   1691:        dcount = 10000;
                   1692:        for (iter = 0; iter < 20; iter++) {
                   1693:                nbusy = 0;
                   1694:                LIST_FOREACH(bp, &bufhead, b_list) {
                   1695:                        if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY)
                   1696:                                nbusy++;
                   1697:                        /*
                   1698:                         * With soft updates, some buffers that are
                   1699:                         * written will be remarked as dirty until other
                   1700:                         * buffers are written.
                   1701:                         */
                   1702:                        if (bp->b_flags & B_DELWRI) {
                   1703:                                s = splbio();
                   1704:                                bremfree(bp);
                   1705:                                bp->b_flags |= B_BUSY;
                   1706:                                splx(s);
                   1707:                                nbusy++;
                   1708:                                bawrite(bp);
                   1709:                                if (dcount-- <= 0) {
                   1710:                                        if (verbose)
                   1711:                                                printf("softdep ");
                   1712:                                        return 1;
                   1713:                                }
                   1714:                        }
                   1715:                }
                   1716:                if (nbusy == 0)
                   1717:                        break;
                   1718:                if (verbose)
                   1719:                        printf("%d ", nbusy);
                   1720:                DELAY(40000 * iter);
                   1721:        }
                   1722:
                   1723:        return nbusy;
                   1724: }
                   1725:
                   1726: /*
                   1727:  * posix file system related system variables.
                   1728:  */
                   1729: int
                   1730: fs_posix_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
                   1731:     void *newp, size_t newlen, struct proc *p)
                   1732: {
                   1733:        /* all sysctl names at this level are terminal */
                   1734:        if (namelen != 1)
                   1735:                return (ENOTDIR);
                   1736:
                   1737:        switch (name[0]) {
                   1738:        case FS_POSIX_SETUID:
                   1739:                if (newp && securelevel > 0)
                   1740:                        return (EPERM);
                   1741:                return(sysctl_int(oldp, oldlenp, newp, newlen, &suid_clear));
                   1742:        default:
                   1743:                return (EOPNOTSUPP);
                   1744:        }
                   1745:        /* NOTREACHED */
                   1746: }
                   1747:
                   1748: /*
                   1749:  * file system related system variables.
                   1750:  */
                   1751: int
                   1752: fs_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
                   1753:     size_t newlen, struct proc *p)
                   1754: {
                   1755:        sysctlfn *fn;
                   1756:
                   1757:        switch (name[0]) {
                   1758:        case FS_POSIX:
                   1759:                fn = fs_posix_sysctl;
                   1760:                break;
                   1761:        default:
                   1762:                return (EOPNOTSUPP);
                   1763:        }
                   1764:        return (*fn)(name + 1, namelen - 1, oldp, oldlenp, newp, newlen, p);
                   1765: }
                   1766:
                   1767:
                   1768: /*
                   1769:  * Routines dealing with vnodes and buffers
                   1770:  */
                   1771:
                   1772: /*
                   1773:  * Wait for all outstanding I/Os to complete
                   1774:  *
                   1775:  * Manipulates v_numoutput. Must be called at splbio()
                   1776:  */
                   1777: int
                   1778: vwaitforio(struct vnode *vp, int slpflag, char *wmesg, int timeo)
                   1779: {
                   1780:        int error = 0;
                   1781:
                   1782:        splassert(IPL_BIO);
                   1783:
                   1784:        while (vp->v_numoutput) {
                   1785:                vp->v_bioflag |= VBIOWAIT;
                   1786:                error = tsleep(&vp->v_numoutput,
                   1787:                    slpflag | (PRIBIO + 1), wmesg, timeo);
                   1788:                if (error)
                   1789:                        break;
                   1790:        }
                   1791:
                   1792:        return (error);
                   1793: }
                   1794:
                   1795: /*
                   1796:  * Update outstanding I/O count and do wakeup if requested.
                   1797:  *
                   1798:  * Manipulates v_numoutput. Must be called at splbio()
                   1799:  */
                   1800: void
                   1801: vwakeup(struct vnode *vp)
                   1802: {
                   1803:        splassert(IPL_BIO);
                   1804:
                   1805:        if (vp != NULL) {
                   1806:                if (vp->v_numoutput-- == 0)
                   1807:                        panic("vwakeup: neg numoutput");
                   1808:                if ((vp->v_bioflag & VBIOWAIT) && vp->v_numoutput == 0) {
                   1809:                        vp->v_bioflag &= ~VBIOWAIT;
                   1810:                        wakeup(&vp->v_numoutput);
                   1811:                }
                   1812:        }
                   1813: }
                   1814:
                   1815: /*
                   1816:  * Flush out and invalidate all buffers associated with a vnode.
                   1817:  * Called with the underlying object locked.
                   1818:  */
                   1819: int
                   1820: vinvalbuf(struct vnode *vp, int flags, struct ucred *cred, struct proc *p,
                   1821:     int slpflag, int slptimeo)
                   1822: {
                   1823:        struct buf *bp;
                   1824:        struct buf *nbp, *blist;
                   1825:        int s, error;
                   1826:
                   1827: #ifdef VFSDEBUG
                   1828:        if ((vp->v_flag & VLOCKSWORK) && !VOP_ISLOCKED(vp))
                   1829:                panic("vinvalbuf(): vp isn't locked");
                   1830: #endif
                   1831:
                   1832:        if (flags & V_SAVE) {
                   1833:                s = splbio();
                   1834:                vwaitforio(vp, 0, "vinvalbuf", 0);
                   1835:                if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
                   1836:                        splx(s);
                   1837:                        if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0)
                   1838:                                return (error);
                   1839:                        s = splbio();
                   1840:                        if (vp->v_numoutput > 0 ||
                   1841:                            !LIST_EMPTY(&vp->v_dirtyblkhd))
                   1842:                                panic("vinvalbuf: dirty bufs");
                   1843:                }
                   1844:                splx(s);
                   1845:        }
                   1846: loop:
                   1847:        s = splbio();
                   1848:        for (;;) {
                   1849:                if ((blist = LIST_FIRST(&vp->v_cleanblkhd)) &&
                   1850:                    (flags & V_SAVEMETA))
                   1851:                        while (blist && blist->b_lblkno < 0)
                   1852:                                blist = LIST_NEXT(blist, b_vnbufs);
                   1853:                if (blist == NULL &&
                   1854:                    (blist = LIST_FIRST(&vp->v_dirtyblkhd)) &&
                   1855:                    (flags & V_SAVEMETA))
                   1856:                        while (blist && blist->b_lblkno < 0)
                   1857:                                blist = LIST_NEXT(blist, b_vnbufs);
                   1858:                if (!blist)
                   1859:                        break;
                   1860:
                   1861:                for (bp = blist; bp; bp = nbp) {
                   1862:                        nbp = LIST_NEXT(bp, b_vnbufs);
                   1863:                        if (flags & V_SAVEMETA && bp->b_lblkno < 0)
                   1864:                                continue;
                   1865:                        if (bp->b_flags & B_BUSY) {
                   1866:                                bp->b_flags |= B_WANTED;
                   1867:                                error = tsleep(bp, slpflag | (PRIBIO + 1),
                   1868:                                    "vinvalbuf", slptimeo);
                   1869:                                if (error) {
                   1870:                                        splx(s);
                   1871:                                        return (error);
                   1872:                                }
                   1873:                                break;
                   1874:                        }
                   1875:                        bremfree(bp);
                   1876:                        bp->b_flags |= B_BUSY;
                   1877:                        /*
                   1878:                         * XXX Since there are no node locks for NFS, I believe
                   1879:                         * there is a slight chance that a delayed write will
                   1880:                         * occur while sleeping just above, so check for it.
                   1881:                         */
                   1882:                        if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
                   1883:                                splx(s);
                   1884:                                (void) VOP_BWRITE(bp);
                   1885:                                goto loop;
                   1886:                        }
                   1887:                        bp->b_flags |= B_INVAL;
                   1888:                        brelse(bp);
                   1889:                }
                   1890:        }
                   1891:        if (!(flags & V_SAVEMETA) &&
                   1892:            (!LIST_EMPTY(&vp->v_dirtyblkhd) || !LIST_EMPTY(&vp->v_cleanblkhd)))
                   1893:                panic("vinvalbuf: flush failed");
                   1894:        splx(s);
                   1895:        return (0);
                   1896: }
                   1897:
                   1898: void
                   1899: vflushbuf(struct vnode *vp, int sync)
                   1900: {
                   1901:        struct buf *bp, *nbp;
                   1902:        int s;
                   1903:
                   1904: loop:
                   1905:        s = splbio();
                   1906:        for (bp = LIST_FIRST(&vp->v_dirtyblkhd);
                   1907:            bp != LIST_END(&vp->v_dirtyblkhd); bp = nbp) {
                   1908:                nbp = LIST_NEXT(bp, b_vnbufs);
                   1909:                if ((bp->b_flags & B_BUSY))
                   1910:                        continue;
                   1911:                if ((bp->b_flags & B_DELWRI) == 0)
                   1912:                        panic("vflushbuf: not dirty");
                   1913:                bremfree(bp);
                   1914:                bp->b_flags |= B_BUSY;
                   1915:                splx(s);
                   1916:                /*
                   1917:                 * Wait for I/O associated with indirect blocks to complete,
                   1918:                 * since there is no way to quickly wait for them below.
                   1919:                 */
                   1920:                if (bp->b_vp == vp || sync == 0)
                   1921:                        (void) bawrite(bp);
                   1922:                else
                   1923:                        (void) bwrite(bp);
                   1924:                goto loop;
                   1925:        }
                   1926:        if (sync == 0) {
                   1927:                splx(s);
                   1928:                return;
                   1929:        }
                   1930:        vwaitforio(vp, 0, "vflushbuf", 0);
                   1931:        if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
                   1932:                splx(s);
                   1933: #ifdef DIAGNOSTIC
                   1934:                vprint("vflushbuf: dirty", vp);
                   1935: #endif
                   1936:                goto loop;
                   1937:        }
                   1938:        splx(s);
                   1939: }
                   1940:
                   1941: /*
                   1942:  * Associate a buffer with a vnode.
                   1943:  *
                   1944:  * Manipulates buffer vnode queues. Must be called at splbio().
                   1945:  */
                   1946: void
                   1947: bgetvp(struct vnode *vp, struct buf *bp)
                   1948: {
                   1949:        splassert(IPL_BIO);
                   1950:
                   1951:
                   1952:        if (bp->b_vp)
                   1953:                panic("bgetvp: not free");
                   1954:        vhold(vp);
                   1955:        bp->b_vp = vp;
                   1956:        if (vp->v_type == VBLK || vp->v_type == VCHR)
                   1957:                bp->b_dev = vp->v_rdev;
                   1958:        else
                   1959:                bp->b_dev = NODEV;
                   1960:        /*
                   1961:         * Insert onto list for new vnode.
                   1962:         */
                   1963:        bufinsvn(bp, &vp->v_cleanblkhd);
                   1964: }
                   1965:
                   1966: /*
                   1967:  * Disassociate a buffer from a vnode.
                   1968:  *
                   1969:  * Manipulates vnode buffer queues. Must be called at splbio().
                   1970:  */
                   1971: void
                   1972: brelvp(struct buf *bp)
                   1973: {
                   1974:        struct vnode *vp;
                   1975:
                   1976:        splassert(IPL_BIO);
                   1977:
                   1978:        if ((vp = bp->b_vp) == (struct vnode *) 0)
                   1979:                panic("brelvp: NULL");
                   1980:        /*
                   1981:         * Delete from old vnode list, if on one.
                   1982:         */
                   1983:        if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
                   1984:                bufremvn(bp);
                   1985:        if ((vp->v_bioflag & VBIOONSYNCLIST) &&
                   1986:            LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
                   1987:                vp->v_bioflag &= ~VBIOONSYNCLIST;
                   1988:                LIST_REMOVE(vp, v_synclist);
                   1989:        }
                   1990:        bp->b_vp = (struct vnode *) 0;
                   1991:
                   1992: #ifdef DIAGNOSTIC
                   1993:        if (vp->v_holdcnt == 0)
                   1994:                panic("brelvp: holdcnt");
                   1995: #endif
                   1996:        vp->v_holdcnt--;
                   1997:
                   1998:        /*
                   1999:         * If it is on the holdlist and the hold count drops to
                   2000:         * zero, move it to the free list.
                   2001:         */
                   2002:        if ((vp->v_bioflag & VBIOONFREELIST) &&
                   2003:            vp->v_holdcnt == 0 && vp->v_usecount == 0) {
                   2004:                TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
                   2005:                TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
                   2006:        }
                   2007: }
                   2008:
                   2009: /*
                   2010:  * Replaces the current vnode associated with the buffer, if any,
                   2011:  * with a new vnode.
                   2012:  *
                   2013:  * If an output I/O is pending on the buffer, the old vnode
                   2014:  * I/O count is adjusted.
                   2015:  *
                   2016:  * Ignores vnode buffer queues. Must be called at splbio().
                   2017:  */
                   2018: void
                   2019: buf_replacevnode(struct buf *bp, struct vnode *newvp)
                   2020: {
                   2021:        struct vnode *oldvp = bp->b_vp;
                   2022:
                   2023:        splassert(IPL_BIO);
                   2024:
                   2025:        if (oldvp)
                   2026:                brelvp(bp);
                   2027:
                   2028:        if ((bp->b_flags & (B_READ | B_DONE)) == 0) {
                   2029:                newvp->v_numoutput++;   /* put it on swapdev */
                   2030:                vwakeup(oldvp);
                   2031:        }
                   2032:
                   2033:        bgetvp(newvp, bp);
                   2034:        bufremvn(bp);
                   2035: }
                   2036:
                   2037: /*
                   2038:  * Used to assign buffers to the appropriate clean or dirty list on
                   2039:  * the vnode and to add newly dirty vnodes to the appropriate
                   2040:  * filesystem syncer list.
                   2041:  *
                   2042:  * Manipulates vnode buffer queues. Must be called at splbio().
                   2043:  */
                   2044: void
                   2045: reassignbuf(struct buf *bp)
                   2046: {
                   2047:        struct buflists *listheadp;
                   2048:        int delay;
                   2049:        struct vnode *vp = bp->b_vp;
                   2050:
                   2051:        splassert(IPL_BIO);
                   2052:
                   2053:        /*
                   2054:         * Delete from old vnode list, if on one.
                   2055:         */
                   2056:        if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
                   2057:                bufremvn(bp);
                   2058:
                   2059:        /*
                   2060:         * If dirty, put on list of dirty buffers;
                   2061:         * otherwise insert onto list of clean buffers.
                   2062:         */
                   2063:        if ((bp->b_flags & B_DELWRI) == 0) {
                   2064:                listheadp = &vp->v_cleanblkhd;
                   2065:                if ((vp->v_bioflag & VBIOONSYNCLIST) &&
                   2066:                    LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
                   2067:                        vp->v_bioflag &= ~VBIOONSYNCLIST;
                   2068:                        LIST_REMOVE(vp, v_synclist);
                   2069:                }
                   2070:        } else {
                   2071:                listheadp = &vp->v_dirtyblkhd;
                   2072:                if ((vp->v_bioflag & VBIOONSYNCLIST) == 0) {
                   2073:                        switch (vp->v_type) {
                   2074:                        case VDIR:
                   2075:                                delay = syncdelay / 2;
                   2076:                                break;
                   2077:                        case VBLK:
                   2078:                                if (vp->v_specmountpoint != NULL) {
                   2079:                                        delay = syncdelay / 3;
                   2080:                                        break;
                   2081:                                }
                   2082:                                /* FALLTHROUGH */
                   2083:                        default:
                   2084:                                delay = syncdelay;
                   2085:                        }
                   2086:                        vn_syncer_add_to_worklist(vp, delay);
                   2087:                }
                   2088:        }
                   2089:        bufinsvn(bp, listheadp);
                   2090: }
                   2091:
                   2092: int
                   2093: vfs_register(struct vfsconf *vfs)
                   2094: {
                   2095:        struct vfsconf *vfsp;
                   2096:        struct vfsconf **vfspp;
                   2097:
                   2098: #ifdef DIAGNOSTIC
                   2099:        /* Paranoia? */
                   2100:        if (vfs->vfc_refcount != 0)
                   2101:                printf("vfs_register called with vfc_refcount > 0\n");
                   2102: #endif
                   2103:
                   2104:        /* Check if filesystem already known */
                   2105:        for (vfspp = &vfsconf, vfsp = vfsconf; vfsp;
                   2106:            vfspp = &vfsp->vfc_next, vfsp = vfsp->vfc_next)
                   2107:                if (strcmp(vfsp->vfc_name, vfs->vfc_name) == 0)
                   2108:                        return (EEXIST);
                   2109:
                   2110:        if (vfs->vfc_typenum > maxvfsconf)
                   2111:                maxvfsconf = vfs->vfc_typenum;
                   2112:
                   2113:        vfs->vfc_next = NULL;
                   2114:
                   2115:        /* Add to the end of the list */
                   2116:        *vfspp = vfs;
                   2117:
                   2118:        /* Call vfs_init() */
                   2119:        if (vfs->vfc_vfsops->vfs_init)
                   2120:                (*(vfs->vfc_vfsops->vfs_init))(vfs);
                   2121:
                   2122:        return 0;
                   2123: }
                   2124:
                   2125: int
                   2126: vfs_unregister(struct vfsconf *vfs)
                   2127: {
                   2128:        struct vfsconf *vfsp;
                   2129:        struct vfsconf **vfspp;
                   2130:        int maxtypenum;
                   2131:
                   2132:        /* Find our vfsconf struct */
                   2133:        for (vfspp = &vfsconf, vfsp = vfsconf; vfsp;
                   2134:            vfspp = &vfsp->vfc_next, vfsp = vfsp->vfc_next) {
                   2135:                if (strcmp(vfsp->vfc_name, vfs->vfc_name) == 0)
                   2136:                        break;
                   2137:        }
                   2138:
                   2139:        if (!vfsp)                      /* Not found */
                   2140:                return (ENOENT);
                   2141:
                   2142:        if (vfsp->vfc_refcount)         /* In use */
                   2143:                return (EBUSY);
                   2144:
                   2145:        /* Remove from list and free */
                   2146:        *vfspp = vfsp->vfc_next;
                   2147:
                   2148:        maxtypenum = 0;
                   2149:
                   2150:        for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
                   2151:                if (vfsp->vfc_typenum > maxtypenum)
                   2152:                        maxtypenum = vfsp->vfc_typenum;
                   2153:
                   2154:        maxvfsconf = maxtypenum;
                   2155:        return 0;
                   2156: }
                   2157:
                   2158: /*
                   2159:  * Check if vnode represents a disk device
                   2160:  */
                   2161: int
                   2162: vn_isdisk(struct vnode *vp, int *errp)
                   2163: {
                   2164:        if (vp->v_type != VBLK && vp->v_type != VCHR)
                   2165:                return (0);
                   2166:
                   2167:        return (1);
                   2168: }
                   2169:
                   2170: #ifdef DDB
                   2171: #include <machine/db_machdep.h>
                   2172: #include <ddb/db_interface.h>
                   2173: #include <ddb/db_output.h>
                   2174:
                   2175: void
                   2176: vfs_buf_print(struct buf *bp, int full, int (*pr)(const char *, ...))
                   2177: {
                   2178:
                   2179:        (*pr)("  vp %p lblkno 0x%llx blkno 0x%llx dev 0x%x\n"
                   2180:              "  proc %p error %d flags %b\n",
                   2181:            bp->b_vp, (int64_t)bp->b_lblkno, (int64_t)bp->b_blkno, bp->b_dev,
                   2182:            bp->b_proc, bp->b_error, bp->b_flags, B_BITS);
                   2183:
                   2184:        (*pr)("  bufsize 0x%lx bcount 0x%lx resid 0x%lx sync 0x%x\n"
                   2185:              "  data %p saveaddr %p dep %p iodone %p\n",
                   2186:            bp->b_bufsize, bp->b_bcount, (long)bp->b_resid, bp->b_synctime,
                   2187:            bp->b_data, bp->b_saveaddr, LIST_FIRST(&bp->b_dep), bp->b_iodone);
                   2188:
                   2189:        (*pr)("  dirty {off 0x%x end 0x%x} valid {off 0x%x end 0x%x}\n",
                   2190:            bp->b_dirtyoff, bp->b_dirtyend, bp->b_validoff, bp->b_validend);
                   2191:
                   2192: #ifdef FFS_SOFTUPDATES
                   2193:        if (full)
                   2194:                softdep_print(bp, full, pr);
                   2195: #endif
                   2196: }
                   2197:
                   2198: const char *vtypes[] = { VTYPE_NAMES };
                   2199: const char *vtags[] = { VTAG_NAMES };
                   2200:
                   2201: void
                   2202: vfs_vnode_print(struct vnode *vp, int full, int (*pr)(const char *, ...))
                   2203: {
                   2204:
                   2205: #define        NENTS(n)        (sizeof n / sizeof(n[0]))
                   2206:        (*pr)("tag %s(%d) type %s(%d) mount %p typedata %p\n",
                   2207:              vp->v_tag > NENTS(vtags)? "<unk>":vtags[vp->v_tag], vp->v_tag,
                   2208:              vp->v_type > NENTS(vtypes)? "<unk>":vtypes[vp->v_type],
                   2209:              vp->v_type, vp->v_mount, vp->v_mountedhere);
                   2210:
                   2211:        (*pr)("data %p usecount %d writecount %ld holdcnt %ld numoutput %d\n",
                   2212:              vp->v_data, vp->v_usecount, vp->v_writecount,
                   2213:              vp->v_holdcnt, vp->v_numoutput);
                   2214:
                   2215:        /* uvm_object_printit(&vp->v_uobj, full, pr); */
                   2216:
                   2217:        if (full) {
                   2218:                struct buf *bp;
                   2219:
                   2220:                (*pr)("clean bufs:\n");
                   2221:                LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) {
                   2222:                        (*pr)(" bp %p\n", bp);
                   2223:                        vfs_buf_print(bp, full, pr);
                   2224:                }
                   2225:
                   2226:                (*pr)("dirty bufs:\n");
                   2227:                LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
                   2228:                        (*pr)(" bp %p\n", bp);
                   2229:                        vfs_buf_print(bp, full, pr);
                   2230:                }
                   2231:        }
                   2232: }
                   2233:
                   2234: void
                   2235: vfs_mount_print(struct mount *mp, int full, int (*pr)(const char *, ...))
                   2236: {
                   2237:        struct vfsconf *vfc = mp->mnt_vfc;
                   2238:        struct vnode *vp;
                   2239:        int cnt = 0;
                   2240:
                   2241:        (*pr)("flags %b\nvnodecovered %p syncer %p data %p\n",
                   2242:            mp->mnt_flag, MNT_BITS,
                   2243:            mp->mnt_vnodecovered, mp->mnt_syncer, mp->mnt_data);
                   2244:
                   2245:        (*pr)("vfsconf: ops %p name \"%s\" num %d ref %d flags 0x%x\n",
                   2246:             vfc->vfc_vfsops, vfc->vfc_name, vfc->vfc_typenum,
                   2247:            vfc->vfc_refcount, vfc->vfc_flags);
                   2248:
                   2249:        (*pr)("statvfs cache: bsize %x iosize %x\nblocks %u free %u avail %u\n",
                   2250:            mp->mnt_stat.f_bsize, mp->mnt_stat.f_iosize, mp->mnt_stat.f_blocks,
                   2251:            mp->mnt_stat.f_bfree, mp->mnt_stat.f_bavail);
                   2252:
                   2253:        (*pr)("  files %u ffiles %u\n", mp->mnt_stat.f_files,
                   2254:            mp->mnt_stat.f_ffree);
                   2255:
                   2256:        (*pr)("  f_fsidx {0x%x, 0x%x} owner %u ctime 0x%x\n",
                   2257:            mp->mnt_stat.f_fsid.val[0], mp->mnt_stat.f_fsid.val[1],
                   2258:            mp->mnt_stat.f_owner, mp->mnt_stat.f_ctime);
                   2259:
                   2260:        (*pr)("  syncwrites %lu asyncwrites = %lu\n",
                   2261:            mp->mnt_stat.f_syncwrites, mp->mnt_stat.f_asyncwrites);
                   2262:
                   2263:        (*pr)("  fstype \"%s\" mnton \"%s\" mntfrom \"%s\"\n",
                   2264:            mp->mnt_stat.f_fstypename, mp->mnt_stat.f_mntonname,
                   2265:            mp->mnt_stat.f_mntfromname);
                   2266:
                   2267:        (*pr)("locked vnodes:");
                   2268:        /* XXX would take mountlist lock, except ddb has no context */
                   2269:        LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes)
                   2270:                if (VOP_ISLOCKED(vp)) {
                   2271:                        if (!LIST_NEXT(vp, v_mntvnodes))
                   2272:                                (*pr)(" %p", vp);
                   2273:                        else if (!(cnt++ % (72 / (sizeof(void *) * 2 + 4))))
                   2274:                                (*pr)("\n\t%p", vp);
                   2275:                        else
                   2276:                                (*pr)(", %p", vp);
                   2277:                }
                   2278:        (*pr)("\n");
                   2279:
                   2280:        if (full) {
                   2281:                (*pr)("all vnodes:\n\t");
                   2282:                /* XXX would take mountlist lock, except ddb has no context */
                   2283:                LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes)
                   2284:                        if (!LIST_NEXT(vp, v_mntvnodes))
                   2285:                                (*pr)(" %p", vp);
                   2286:                        else if (!(cnt++ % (72 / (sizeof(void *) * 2 + 4))))
                   2287:                                (*pr)(" %p,\n\t", vp);
                   2288:                        else
                   2289:                                (*pr)(" %p,", vp);
                   2290:                (*pr)("\n", vp);
                   2291:        }
                   2292: }
                   2293: #endif /* DDB */
CVSweb