sys/kern/vfs_sync.c - annotate

Return to vfs_sync.c CVS log
Up to [local] / sys / kern
Annotation of sys/kern/vfs_sync.c, Revision 1.1.1.1

1.1       nbrk        1: /*       $OpenBSD: vfs_sync.c,v 1.43 2007/06/01 23:47:56 deraadt Exp $  */
                      2:
                      3: /*
                      4:  *  Portions of this code are:
                      5:  *
                      6:  * Copyright (c) 1989, 1993
                      7:  *     The Regents of the University of California.  All rights reserved.
                      8:  * (c) UNIX System Laboratories, Inc.
                      9:  * All or some portions of this file are derived from material licensed
                     10:  * to the University of California by American Telephone and Telegraph
                     11:  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
                     12:  * the permission of UNIX System Laboratories, Inc.
                     13:  *
                     14:  * Redistribution and use in source and binary forms, with or without
                     15:  * modification, are permitted provided that the following conditions
                     16:  * are met:
                     17:  * 1. Redistributions of source code must retain the above copyright
                     18:  *    notice, this list of conditions and the following disclaimer.
                     19:  * 2. Redistributions in binary form must reproduce the above copyright
                     20:  *    notice, this list of conditions and the following disclaimer in the
                     21:  *    documentation and/or other materials provided with the distribution.
                     22:  * 3. Neither the name of the University nor the names of its contributors
                     23:  *    may be used to endorse or promote products derived from this software
                     24:  *    without specific prior written permission.
                     25:  *
                     26:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     27:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     28:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     29:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     30:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     31:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     32:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     33:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     34:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     35:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     36:  * SUCH DAMAGE.
                     37:  */
                     38:
                     39: /*
                     40:  * Syncer daemon
                     41:  */
                     42:
                     43: #include <sys/queue.h>
                     44: #include <sys/param.h>
                     45: #include <sys/systm.h>
                     46: #include <sys/proc.h>
                     47: #include <sys/mount.h>
                     48: #include <sys/vnode.h>
                     49: #include <sys/buf.h>
                     50: #include <sys/malloc.h>
                     51:
                     52: #include <sys/kernel.h>
                     53: #include <sys/sched.h>
                     54:
                     55: #ifdef FFS_SOFTUPDATES
                     56: int   softdep_process_worklist(struct mount *);
                     57: #endif
                     58:
                     59: /*
                     60:  * The workitem queue.
                     61:  */
                     62: #define SYNCER_MAXDELAY        32              /* maximum sync delay time */
                     63: #define SYNCER_DEFAULT 30              /* default sync delay time */
                     64: int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */
                     65: time_t syncdelay = SYNCER_DEFAULT;     /* time to delay syncing vnodes */
                     66:
                     67: int rushjob = 0;                       /* number of slots to run ASAP */
                     68: int stat_rush_requests = 0;            /* number of rush requests */
                     69:
                     70: static int syncer_delayno = 0;
                     71: static long syncer_mask;
                     72: LIST_HEAD(synclist, vnode);
                     73: static struct synclist *syncer_workitem_pending;
                     74:
                     75: struct proc *syncerproc;
                     76:
                     77: /*
                     78:  * The workitem queue.
                     79:  *
                     80:  * It is useful to delay writes of file data and filesystem metadata
                     81:  * for tens of seconds so that quickly created and deleted files need
                     82:  * not waste disk bandwidth being created and removed. To realize this,
                     83:  * we append vnodes to a "workitem" queue. When running with a soft
                     84:  * updates implementation, most pending metadata dependencies should
                     85:  * not wait for more than a few seconds. Thus, mounted block devices
                     86:  * are delayed only about half the time that file data is delayed.
                     87:  * Similarly, directory updates are more critical, so are only delayed
                     88:  * about a third the time that file data is delayed. Thus, there are
                     89:  * SYNCER_MAXDELAY queues that are processed round-robin at a rate of
                     90:  * one each second (driven off the filesystem syncer process). The
                     91:  * syncer_delayno variable indicates the next queue that is to be processed.
                     92:  * Items that need to be processed soon are placed in this queue:
                     93:  *
                     94:  *     syncer_workitem_pending[syncer_delayno]
                     95:  *
                     96:  * A delay of fifteen seconds is done by placing the request fifteen
                     97:  * entries later in the queue:
                     98:  *
                     99:  *     syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask]
                    100:  *
                    101:  */
                    102:
                    103: void
                    104: vn_initialize_syncerd(void)
                    105: {
                    106:        syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, M_WAITOK,
                    107:            &syncer_mask);
                    108:        syncer_maxdelay = syncer_mask + 1;
                    109: }
                    110:
                    111: /*
                    112:  * Add an item to the syncer work queue.
                    113:  */
                    114: void
                    115: vn_syncer_add_to_worklist(struct vnode *vp, int delay)
                    116: {
                    117:        int s, slot;
                    118:
                    119:        if (delay > syncer_maxdelay - 2)
                    120:                delay = syncer_maxdelay - 2;
                    121:        slot = (syncer_delayno + delay) & syncer_mask;
                    122:
                    123:        s = splbio();
                    124:        if (vp->v_bioflag & VBIOONSYNCLIST)
                    125:                LIST_REMOVE(vp, v_synclist);
                    126:
                    127:        vp->v_bioflag |= VBIOONSYNCLIST;
                    128:        LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist);
                    129:        splx(s);
                    130: }
                    131:
                    132: /*
                    133:  * System filesystem synchronizer daemon.
                    134:  */
                    135: void
                    136: sched_sync(struct proc *p)
                    137: {
                    138:        struct synclist *slp;
                    139:        struct vnode *vp;
                    140:        long starttime;
                    141:        int s;
                    142:
                    143:        syncerproc = curproc;
                    144:
                    145:        for (;;) {
                    146:                starttime = time_second;
                    147:
                    148:                /*
                    149:                 * Push files whose dirty time has expired.
                    150:                 */
                    151:                s = splbio();
                    152:                slp = &syncer_workitem_pending[syncer_delayno];
                    153:
                    154:                syncer_delayno += 1;
                    155:                if (syncer_delayno == syncer_maxdelay)
                    156:                        syncer_delayno = 0;
                    157:
                    158:                while ((vp = LIST_FIRST(slp)) != NULL) {
                    159:                        if (vget(vp, LK_EXCLUSIVE | LK_NOWAIT, p)) {
                    160:                                /*
                    161:                                 * If we fail to get the lock, we move this
                    162:                                 * vnode one second ahead in time.
                    163:                                 * XXX - no good, but the best we can do.
                    164:                                 */
                    165:                                vn_syncer_add_to_worklist(vp, 1);
                    166:                                continue;
                    167:                        }
                    168:                        splx(s);
                    169:                        (void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p);
                    170:                        vput(vp);
                    171:                        s = splbio();
                    172:                        if (LIST_FIRST(slp) == vp) {
                    173:                                /*
                    174:                                 * Note: disk vps can remain on the
                    175:                                 * worklist too with no dirty blocks, but
                    176:                                 * since sync_fsync() moves it to a different
                    177:                                 * slot we are safe.
                    178:                                 */
                    179: #ifdef DIAGNOSTIC
                    180:                                if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL &&
                    181:                                    vp->v_type != VBLK) {
                    182:                                        vprint("fsync failed", vp);
                    183:                                        if (vp->v_mount != NULL)
                    184:                                                printf("mounted on: %s\n",
                    185:                                                    vp->v_mount->mnt_stat.f_mntonname);
                    186:                                        panic("sched_sync: fsync failed");
                    187:                                }
                    188: #endif /* DIAGNOSTIC */
                    189:                                /*
                    190:                                 * Put us back on the worklist.  The worklist
                    191:                                 * routine will remove us from our current
                    192:                                 * position and then add us back in at a later
                    193:                                 * position.
                    194:                                 */
                    195:                                vn_syncer_add_to_worklist(vp, syncdelay);
                    196:                        }
                    197:                }
                    198:
                    199:                splx(s);
                    200:
                    201: #ifdef FFS_SOFTUPDATES
                    202:                /*
                    203:                 * Do soft update processing.
                    204:                 */
                    205:                softdep_process_worklist(NULL);
                    206: #endif
                    207:
                    208:                /*
                    209:                 * The variable rushjob allows the kernel to speed up the
                    210:                 * processing of the filesystem syncer process. A rushjob
                    211:                 * value of N tells the filesystem syncer to process the next
                    212:                 * N seconds worth of work on its queue ASAP. Currently rushjob
                    213:                 * is used by the soft update code to speed up the filesystem
                    214:                 * syncer process when the incore state is getting so far
                    215:                 * ahead of the disk that the kernel memory pool is being
                    216:                 * threatened with exhaustion.
                    217:                 */
                    218:                if (rushjob > 0) {
                    219:                        rushjob -= 1;
                    220:                        continue;
                    221:                }
                    222:                /*
                    223:                 * If it has taken us less than a second to process the
                    224:                 * current work, then wait. Otherwise start right over
                    225:                 * again. We can still lose time if any single round
                    226:                 * takes more than two seconds, but it does not really
                    227:                 * matter as we are just trying to generally pace the
                    228:                 * filesystem activity.
                    229:                 */
                    230:                if (time_second == starttime)
                    231:                        tsleep(&lbolt, PPAUSE, "syncer", 0);
                    232:        }
                    233: }
                    234:
                    235: /*
                    236:  * Request the syncer daemon to speed up its work.
                    237:  * We never push it to speed up more than half of its
                    238:  * normal turn time, otherwise it could take over the cpu.
                    239:  */
                    240: int
                    241: speedup_syncer(void)
                    242: {
                    243:        int s;
                    244:
                    245:        SCHED_LOCK(s);
                    246:        if (syncerproc && syncerproc->p_wchan == &lbolt)
                    247:                setrunnable(syncerproc);
                    248:        SCHED_UNLOCK(s);
                    249:        if (rushjob < syncdelay / 2) {
                    250:                rushjob += 1;
                    251:                stat_rush_requests += 1;
                    252:                return 1;
                    253:        }
                    254:        return 0;
                    255: }
                    256:
                    257: /*
                    258:  * Routine to create and manage a filesystem syncer vnode.
                    259:  */
                    260: #define sync_close nullop
                    261: int   sync_fsync(void *);
                    262: int   sync_inactive(void *);
                    263: #define sync_reclaim nullop
                    264: #define sync_lock vop_generic_lock
                    265: #define sync_unlock vop_generic_unlock
                    266: int   sync_print(void *);
                    267: #define sync_islocked vop_generic_islocked
                    268:
                    269: int (**sync_vnodeop_p)(void *);
                    270: struct vnodeopv_entry_desc sync_vnodeop_entries[] = {
                    271:       { &vop_default_desc, vn_default_error },
                    272:       { &vop_close_desc, sync_close },                /* close */
                    273:       { &vop_fsync_desc, sync_fsync },                /* fsync */
                    274:       { &vop_inactive_desc, sync_inactive },          /* inactive */
                    275:       { &vop_reclaim_desc, sync_reclaim },            /* reclaim */
                    276:       { &vop_lock_desc, sync_lock },                  /* lock */
                    277:       { &vop_unlock_desc, sync_unlock },              /* unlock */
                    278:       { &vop_print_desc, sync_print },                /* print */
                    279:       { &vop_islocked_desc, sync_islocked },          /* islocked */
                    280:       { (struct vnodeop_desc*)NULL, (int(*)(void *))NULL }
                    281: };
                    282: struct vnodeopv_desc sync_vnodeop_opv_desc = {
                    283:        &sync_vnodeop_p, sync_vnodeop_entries
                    284: };
                    285:
                    286: /*
                    287:  * Create a new filesystem syncer vnode for the specified mount point.
                    288:  */
                    289: int
                    290: vfs_allocate_syncvnode(struct mount *mp)
                    291: {
                    292:        struct vnode *vp;
                    293:        static long start, incr, next;
                    294:        int error;
                    295:
                    296:        /* Allocate a new vnode */
                    297:        if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) {
                    298:                mp->mnt_syncer = NULL;
                    299:                return (error);
                    300:        }
                    301:        vp->v_writecount = 1;
                    302:        vp->v_type = VNON;
                    303:        /*
                    304:         * Place the vnode onto the syncer worklist. We attempt to
                    305:         * scatter them about on the list so that they will go off
                    306:         * at evenly distributed times even if all the filesystems
                    307:         * are mounted at once.
                    308:         */
                    309:        next += incr;
                    310:        if (next == 0 || next > syncer_maxdelay) {
                    311:                start /= 2;
                    312:                incr /= 2;
                    313:                if (start == 0) {
                    314:                        start = syncer_maxdelay / 2;
                    315:                        incr = syncer_maxdelay;
                    316:                }
                    317:                next = start;
                    318:        }
                    319:        vn_syncer_add_to_worklist(vp, next);
                    320:        mp->mnt_syncer = vp;
                    321:        return (0);
                    322: }
                    323:
                    324: /*
                    325:  * Do a lazy sync of the filesystem.
                    326:  */
                    327: int
                    328: sync_fsync(void *v)
                    329: {
                    330:        struct vop_fsync_args *ap = v;
                    331:        struct vnode *syncvp = ap->a_vp;
                    332:        struct mount *mp = syncvp->v_mount;
                    333:        int asyncflag;
                    334:
                    335:        /*
                    336:         * We only need to do something if this is a lazy evaluation.
                    337:         */
                    338:        if (ap->a_waitfor != MNT_LAZY)
                    339:                return (0);
                    340:
                    341:        /*
                    342:         * Move ourselves to the back of the sync list.
                    343:         */
                    344:        vn_syncer_add_to_worklist(syncvp, syncdelay);
                    345:
                    346:        /*
                    347:         * Walk the list of vnodes pushing all that are dirty and
                    348:         * not already on the sync list.
                    349:         */
                    350:        if (vfs_busy(mp, VB_READ|VB_NOWAIT) == 0) {
                    351:                asyncflag = mp->mnt_flag & MNT_ASYNC;
                    352:                mp->mnt_flag &= ~MNT_ASYNC;
                    353:                VFS_SYNC(mp, MNT_LAZY, ap->a_cred, ap->a_p);
                    354:                if (asyncflag)
                    355:                        mp->mnt_flag |= MNT_ASYNC;
                    356:                vfs_unbusy(mp);
                    357:        }
                    358:
                    359:        return (0);
                    360: }
                    361:
                    362: /*
                    363:  * The syncer vnode is no longer needed and is being decommissioned.
                    364:  */
                    365: int
                    366: sync_inactive(void *v)
                    367: {
                    368:        struct vop_inactive_args *ap = v;
                    369:
                    370:        struct vnode *vp = ap->a_vp;
                    371:        int s;
                    372:
                    373:        if (vp->v_usecount == 0) {
                    374:                VOP_UNLOCK(vp, 0, ap->a_p);
                    375:                return (0);
                    376:        }
                    377:
                    378:        vp->v_mount->mnt_syncer = NULL;
                    379:
                    380:        s = splbio();
                    381:
                    382:        LIST_REMOVE(vp, v_synclist);
                    383:        vp->v_bioflag &= ~VBIOONSYNCLIST;
                    384:
                    385:        splx(s);
                    386:
                    387:        vp->v_writecount = 0;
                    388:        vput(vp);
                    389:
                    390:        return (0);
                    391: }
                    392:
                    393: /*
                    394:  * Print out a syncer vnode.
                    395:  */
                    396: int
                    397: sync_print(void *v)
                    398: {
                    399:        printf("syncer vnode\n");
                    400:
                    401:        return (0);
                    402: }
CVSweb