Annotation of sys/kern/vfs_sync.c, Revision 1.1
1.1 ! nbrk 1: /* $OpenBSD: vfs_sync.c,v 1.43 2007/06/01 23:47:56 deraadt Exp $ */
! 2:
! 3: /*
! 4: * Portions of this code are:
! 5: *
! 6: * Copyright (c) 1989, 1993
! 7: * The Regents of the University of California. All rights reserved.
! 8: * (c) UNIX System Laboratories, Inc.
! 9: * All or some portions of this file are derived from material licensed
! 10: * to the University of California by American Telephone and Telegraph
! 11: * Co. or Unix System Laboratories, Inc. and are reproduced herein with
! 12: * the permission of UNIX System Laboratories, Inc.
! 13: *
! 14: * Redistribution and use in source and binary forms, with or without
! 15: * modification, are permitted provided that the following conditions
! 16: * are met:
! 17: * 1. Redistributions of source code must retain the above copyright
! 18: * notice, this list of conditions and the following disclaimer.
! 19: * 2. Redistributions in binary form must reproduce the above copyright
! 20: * notice, this list of conditions and the following disclaimer in the
! 21: * documentation and/or other materials provided with the distribution.
! 22: * 3. Neither the name of the University nor the names of its contributors
! 23: * may be used to endorse or promote products derived from this software
! 24: * without specific prior written permission.
! 25: *
! 26: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
! 27: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
! 28: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
! 29: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
! 30: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
! 31: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
! 32: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
! 33: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
! 34: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
! 35: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
! 36: * SUCH DAMAGE.
! 37: */
! 38:
! 39: /*
! 40: * Syncer daemon
! 41: */
! 42:
! 43: #include <sys/queue.h>
! 44: #include <sys/param.h>
! 45: #include <sys/systm.h>
! 46: #include <sys/proc.h>
! 47: #include <sys/mount.h>
! 48: #include <sys/vnode.h>
! 49: #include <sys/buf.h>
! 50: #include <sys/malloc.h>
! 51:
! 52: #include <sys/kernel.h>
! 53: #include <sys/sched.h>
! 54:
! 55: #ifdef FFS_SOFTUPDATES
! 56: int softdep_process_worklist(struct mount *);
! 57: #endif
! 58:
! 59: /*
! 60: * The workitem queue.
! 61: */
! 62: #define SYNCER_MAXDELAY 32 /* maximum sync delay time */
! 63: #define SYNCER_DEFAULT 30 /* default sync delay time */
! 64: int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */
! 65: time_t syncdelay = SYNCER_DEFAULT; /* time to delay syncing vnodes */
! 66:
! 67: int rushjob = 0; /* number of slots to run ASAP */
! 68: int stat_rush_requests = 0; /* number of rush requests */
! 69:
! 70: static int syncer_delayno = 0;
! 71: static long syncer_mask;
! 72: LIST_HEAD(synclist, vnode);
! 73: static struct synclist *syncer_workitem_pending;
! 74:
! 75: struct proc *syncerproc;
! 76:
! 77: /*
! 78: * The workitem queue.
! 79: *
! 80: * It is useful to delay writes of file data and filesystem metadata
! 81: * for tens of seconds so that quickly created and deleted files need
! 82: * not waste disk bandwidth being created and removed. To realize this,
! 83: * we append vnodes to a "workitem" queue. When running with a soft
! 84: * updates implementation, most pending metadata dependencies should
! 85: * not wait for more than a few seconds. Thus, mounted block devices
! 86: * are delayed only about half the time that file data is delayed.
! 87: * Similarly, directory updates are more critical, so are only delayed
! 88: * about a third the time that file data is delayed. Thus, there are
! 89: * SYNCER_MAXDELAY queues that are processed round-robin at a rate of
! 90: * one each second (driven off the filesystem syncer process). The
! 91: * syncer_delayno variable indicates the next queue that is to be processed.
! 92: * Items that need to be processed soon are placed in this queue:
! 93: *
! 94: * syncer_workitem_pending[syncer_delayno]
! 95: *
! 96: * A delay of fifteen seconds is done by placing the request fifteen
! 97: * entries later in the queue:
! 98: *
! 99: * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask]
! 100: *
! 101: */
! 102:
! 103: void
! 104: vn_initialize_syncerd(void)
! 105: {
! 106: syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, M_WAITOK,
! 107: &syncer_mask);
! 108: syncer_maxdelay = syncer_mask + 1;
! 109: }
! 110:
! 111: /*
! 112: * Add an item to the syncer work queue.
! 113: */
! 114: void
! 115: vn_syncer_add_to_worklist(struct vnode *vp, int delay)
! 116: {
! 117: int s, slot;
! 118:
! 119: if (delay > syncer_maxdelay - 2)
! 120: delay = syncer_maxdelay - 2;
! 121: slot = (syncer_delayno + delay) & syncer_mask;
! 122:
! 123: s = splbio();
! 124: if (vp->v_bioflag & VBIOONSYNCLIST)
! 125: LIST_REMOVE(vp, v_synclist);
! 126:
! 127: vp->v_bioflag |= VBIOONSYNCLIST;
! 128: LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist);
! 129: splx(s);
! 130: }
! 131:
! 132: /*
! 133: * System filesystem synchronizer daemon.
! 134: */
! 135: void
! 136: sched_sync(struct proc *p)
! 137: {
! 138: struct synclist *slp;
! 139: struct vnode *vp;
! 140: long starttime;
! 141: int s;
! 142:
! 143: syncerproc = curproc;
! 144:
! 145: for (;;) {
! 146: starttime = time_second;
! 147:
! 148: /*
! 149: * Push files whose dirty time has expired.
! 150: */
! 151: s = splbio();
! 152: slp = &syncer_workitem_pending[syncer_delayno];
! 153:
! 154: syncer_delayno += 1;
! 155: if (syncer_delayno == syncer_maxdelay)
! 156: syncer_delayno = 0;
! 157:
! 158: while ((vp = LIST_FIRST(slp)) != NULL) {
! 159: if (vget(vp, LK_EXCLUSIVE | LK_NOWAIT, p)) {
! 160: /*
! 161: * If we fail to get the lock, we move this
! 162: * vnode one second ahead in time.
! 163: * XXX - no good, but the best we can do.
! 164: */
! 165: vn_syncer_add_to_worklist(vp, 1);
! 166: continue;
! 167: }
! 168: splx(s);
! 169: (void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p);
! 170: vput(vp);
! 171: s = splbio();
! 172: if (LIST_FIRST(slp) == vp) {
! 173: /*
! 174: * Note: disk vps can remain on the
! 175: * worklist too with no dirty blocks, but
! 176: * since sync_fsync() moves it to a different
! 177: * slot we are safe.
! 178: */
! 179: #ifdef DIAGNOSTIC
! 180: if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL &&
! 181: vp->v_type != VBLK) {
! 182: vprint("fsync failed", vp);
! 183: if (vp->v_mount != NULL)
! 184: printf("mounted on: %s\n",
! 185: vp->v_mount->mnt_stat.f_mntonname);
! 186: panic("sched_sync: fsync failed");
! 187: }
! 188: #endif /* DIAGNOSTIC */
! 189: /*
! 190: * Put us back on the worklist. The worklist
! 191: * routine will remove us from our current
! 192: * position and then add us back in at a later
! 193: * position.
! 194: */
! 195: vn_syncer_add_to_worklist(vp, syncdelay);
! 196: }
! 197: }
! 198:
! 199: splx(s);
! 200:
! 201: #ifdef FFS_SOFTUPDATES
! 202: /*
! 203: * Do soft update processing.
! 204: */
! 205: softdep_process_worklist(NULL);
! 206: #endif
! 207:
! 208: /*
! 209: * The variable rushjob allows the kernel to speed up the
! 210: * processing of the filesystem syncer process. A rushjob
! 211: * value of N tells the filesystem syncer to process the next
! 212: * N seconds worth of work on its queue ASAP. Currently rushjob
! 213: * is used by the soft update code to speed up the filesystem
! 214: * syncer process when the incore state is getting so far
! 215: * ahead of the disk that the kernel memory pool is being
! 216: * threatened with exhaustion.
! 217: */
! 218: if (rushjob > 0) {
! 219: rushjob -= 1;
! 220: continue;
! 221: }
! 222: /*
! 223: * If it has taken us less than a second to process the
! 224: * current work, then wait. Otherwise start right over
! 225: * again. We can still lose time if any single round
! 226: * takes more than two seconds, but it does not really
! 227: * matter as we are just trying to generally pace the
! 228: * filesystem activity.
! 229: */
! 230: if (time_second == starttime)
! 231: tsleep(&lbolt, PPAUSE, "syncer", 0);
! 232: }
! 233: }
! 234:
! 235: /*
! 236: * Request the syncer daemon to speed up its work.
! 237: * We never push it to speed up more than half of its
! 238: * normal turn time, otherwise it could take over the cpu.
! 239: */
! 240: int
! 241: speedup_syncer(void)
! 242: {
! 243: int s;
! 244:
! 245: SCHED_LOCK(s);
! 246: if (syncerproc && syncerproc->p_wchan == &lbolt)
! 247: setrunnable(syncerproc);
! 248: SCHED_UNLOCK(s);
! 249: if (rushjob < syncdelay / 2) {
! 250: rushjob += 1;
! 251: stat_rush_requests += 1;
! 252: return 1;
! 253: }
! 254: return 0;
! 255: }
! 256:
! 257: /*
! 258: * Routine to create and manage a filesystem syncer vnode.
! 259: */
! 260: #define sync_close nullop
! 261: int sync_fsync(void *);
! 262: int sync_inactive(void *);
! 263: #define sync_reclaim nullop
! 264: #define sync_lock vop_generic_lock
! 265: #define sync_unlock vop_generic_unlock
! 266: int sync_print(void *);
! 267: #define sync_islocked vop_generic_islocked
! 268:
! 269: int (**sync_vnodeop_p)(void *);
! 270: struct vnodeopv_entry_desc sync_vnodeop_entries[] = {
! 271: { &vop_default_desc, vn_default_error },
! 272: { &vop_close_desc, sync_close }, /* close */
! 273: { &vop_fsync_desc, sync_fsync }, /* fsync */
! 274: { &vop_inactive_desc, sync_inactive }, /* inactive */
! 275: { &vop_reclaim_desc, sync_reclaim }, /* reclaim */
! 276: { &vop_lock_desc, sync_lock }, /* lock */
! 277: { &vop_unlock_desc, sync_unlock }, /* unlock */
! 278: { &vop_print_desc, sync_print }, /* print */
! 279: { &vop_islocked_desc, sync_islocked }, /* islocked */
! 280: { (struct vnodeop_desc*)NULL, (int(*)(void *))NULL }
! 281: };
! 282: struct vnodeopv_desc sync_vnodeop_opv_desc = {
! 283: &sync_vnodeop_p, sync_vnodeop_entries
! 284: };
! 285:
! 286: /*
! 287: * Create a new filesystem syncer vnode for the specified mount point.
! 288: */
! 289: int
! 290: vfs_allocate_syncvnode(struct mount *mp)
! 291: {
! 292: struct vnode *vp;
! 293: static long start, incr, next;
! 294: int error;
! 295:
! 296: /* Allocate a new vnode */
! 297: if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) {
! 298: mp->mnt_syncer = NULL;
! 299: return (error);
! 300: }
! 301: vp->v_writecount = 1;
! 302: vp->v_type = VNON;
! 303: /*
! 304: * Place the vnode onto the syncer worklist. We attempt to
! 305: * scatter them about on the list so that they will go off
! 306: * at evenly distributed times even if all the filesystems
! 307: * are mounted at once.
! 308: */
! 309: next += incr;
! 310: if (next == 0 || next > syncer_maxdelay) {
! 311: start /= 2;
! 312: incr /= 2;
! 313: if (start == 0) {
! 314: start = syncer_maxdelay / 2;
! 315: incr = syncer_maxdelay;
! 316: }
! 317: next = start;
! 318: }
! 319: vn_syncer_add_to_worklist(vp, next);
! 320: mp->mnt_syncer = vp;
! 321: return (0);
! 322: }
! 323:
! 324: /*
! 325: * Do a lazy sync of the filesystem.
! 326: */
! 327: int
! 328: sync_fsync(void *v)
! 329: {
! 330: struct vop_fsync_args *ap = v;
! 331: struct vnode *syncvp = ap->a_vp;
! 332: struct mount *mp = syncvp->v_mount;
! 333: int asyncflag;
! 334:
! 335: /*
! 336: * We only need to do something if this is a lazy evaluation.
! 337: */
! 338: if (ap->a_waitfor != MNT_LAZY)
! 339: return (0);
! 340:
! 341: /*
! 342: * Move ourselves to the back of the sync list.
! 343: */
! 344: vn_syncer_add_to_worklist(syncvp, syncdelay);
! 345:
! 346: /*
! 347: * Walk the list of vnodes pushing all that are dirty and
! 348: * not already on the sync list.
! 349: */
! 350: if (vfs_busy(mp, VB_READ|VB_NOWAIT) == 0) {
! 351: asyncflag = mp->mnt_flag & MNT_ASYNC;
! 352: mp->mnt_flag &= ~MNT_ASYNC;
! 353: VFS_SYNC(mp, MNT_LAZY, ap->a_cred, ap->a_p);
! 354: if (asyncflag)
! 355: mp->mnt_flag |= MNT_ASYNC;
! 356: vfs_unbusy(mp);
! 357: }
! 358:
! 359: return (0);
! 360: }
! 361:
! 362: /*
! 363: * The syncer vnode is no longer needed and is being decommissioned.
! 364: */
! 365: int
! 366: sync_inactive(void *v)
! 367: {
! 368: struct vop_inactive_args *ap = v;
! 369:
! 370: struct vnode *vp = ap->a_vp;
! 371: int s;
! 372:
! 373: if (vp->v_usecount == 0) {
! 374: VOP_UNLOCK(vp, 0, ap->a_p);
! 375: return (0);
! 376: }
! 377:
! 378: vp->v_mount->mnt_syncer = NULL;
! 379:
! 380: s = splbio();
! 381:
! 382: LIST_REMOVE(vp, v_synclist);
! 383: vp->v_bioflag &= ~VBIOONSYNCLIST;
! 384:
! 385: splx(s);
! 386:
! 387: vp->v_writecount = 0;
! 388: vput(vp);
! 389:
! 390: return (0);
! 391: }
! 392:
! 393: /*
! 394: * Print out a syncer vnode.
! 395: */
! 396: int
! 397: sync_print(void *v)
! 398: {
! 399: printf("syncer vnode\n");
! 400:
! 401: return (0);
! 402: }
CVSweb