Annotation of sys/ufs/ffs/ffs_softdep.c, Revision 1.1
1.1 ! nbrk 1: /* $OpenBSD: ffs_softdep.c,v 1.92 2007/07/11 15:32:22 millert Exp $ */
! 2:
! 3: /*
! 4: * Copyright 1998, 2000 Marshall Kirk McKusick. All Rights Reserved.
! 5: *
! 6: * The soft updates code is derived from the appendix of a University
! 7: * of Michigan technical report (Gregory R. Ganger and Yale N. Patt,
! 8: * "Soft Updates: A Solution to the Metadata Update Problem in File
! 9: * Systems", CSE-TR-254-95, August 1995).
! 10: *
! 11: * Further information about soft updates can be obtained from:
! 12: *
! 13: * Marshall Kirk McKusick http://www.mckusick.com/softdep/
! 14: * 1614 Oxford Street mckusick@mckusick.com
! 15: * Berkeley, CA 94709-1608 +1-510-843-9542
! 16: * USA
! 17: *
! 18: * Redistribution and use in source and binary forms, with or without
! 19: * modification, are permitted provided that the following conditions
! 20: * are met:
! 21: *
! 22: * 1. Redistributions of source code must retain the above copyright
! 23: * notice, this list of conditions and the following disclaimer.
! 24: * 2. Redistributions in binary form must reproduce the above copyright
! 25: * notice, this list of conditions and the following disclaimer in the
! 26: * documentation and/or other materials provided with the distribution.
! 27: *
! 28: * THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY
! 29: * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
! 30: * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
! 31: * DISCLAIMED. IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE FOR
! 32: * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
! 33: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
! 34: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
! 35: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
! 36: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
! 37: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
! 38: * SUCH DAMAGE.
! 39: *
! 40: * from: @(#)ffs_softdep.c 9.59 (McKusick) 6/21/00
! 41: * $FreeBSD: src/sys/ufs/ffs/ffs_softdep.c,v 1.86 2001/02/04 16:08:18 phk Exp $
! 42: */
! 43:
! 44: #include <sys/param.h>
! 45: #include <sys/buf.h>
! 46: #include <sys/kernel.h>
! 47: #include <sys/malloc.h>
! 48: #include <sys/mount.h>
! 49: #include <sys/proc.h>
! 50: #include <sys/pool.h>
! 51: #include <sys/syslog.h>
! 52: #include <sys/systm.h>
! 53: #include <sys/vnode.h>
! 54: #include <miscfs/specfs/specdev.h>
! 55: #include <ufs/ufs/dir.h>
! 56: #include <ufs/ufs/quota.h>
! 57: #include <ufs/ufs/inode.h>
! 58: #include <ufs/ufs/ufsmount.h>
! 59: #include <ufs/ffs/fs.h>
! 60: #include <ufs/ffs/softdep.h>
! 61: #include <ufs/ffs/ffs_extern.h>
! 62: #include <ufs/ufs/ufs_extern.h>
! 63:
! 64: #define STATIC
! 65:
! 66: /*
! 67: * Mapping of dependency structure types to malloc types.
! 68: */
! 69: #define D_PAGEDEP 0
! 70: #define D_INODEDEP 1
! 71: #define D_NEWBLK 2
! 72: #define D_BMSAFEMAP 3
! 73: #define D_ALLOCDIRECT 4
! 74: #define D_INDIRDEP 5
! 75: #define D_ALLOCINDIR 6
! 76: #define D_FREEFRAG 7
! 77: #define D_FREEBLKS 8
! 78: #define D_FREEFILE 9
! 79: #define D_DIRADD 10
! 80: #define D_MKDIR 11
! 81: #define D_DIRREM 12
! 82: #define D_NEWDIRBLK 13
! 83: #define D_LAST 13
! 84: /*
! 85: * Names of softdep types.
! 86: */
! 87: const char *softdep_typenames[] = {
! 88: "pagedep",
! 89: "inodedep",
! 90: "newblk",
! 91: "bmsafemap",
! 92: "allocdirect",
! 93: "indirdep",
! 94: "allocindir",
! 95: "freefrag",
! 96: "freeblks",
! 97: "freefile",
! 98: "diradd",
! 99: "mkdir",
! 100: "dirrem",
! 101: "newdirblk",
! 102: };
! 103: #define TYPENAME(type) \
! 104: ((unsigned)(type) <= D_LAST ? softdep_typenames[type] : "???")
! 105: /*
! 106: * Finding the current process.
! 107: */
! 108: #define CURPROC curproc
! 109: /*
! 110: * End system adaptation definitions.
! 111: */
! 112:
! 113: /*
! 114: * Internal function prototypes.
! 115: */
! 116: STATIC void softdep_error(char *, int);
! 117: STATIC void drain_output(struct vnode *, int);
! 118: STATIC int getdirtybuf(struct buf *, int);
! 119: STATIC void clear_remove(struct proc *);
! 120: STATIC void clear_inodedeps(struct proc *);
! 121: STATIC int flush_pagedep_deps(struct vnode *, struct mount *,
! 122: struct diraddhd *);
! 123: STATIC int flush_inodedep_deps(struct fs *, ino_t);
! 124: STATIC int handle_written_filepage(struct pagedep *, struct buf *);
! 125: STATIC void diradd_inode_written(struct diradd *, struct inodedep *);
! 126: STATIC int handle_written_inodeblock(struct inodedep *, struct buf *);
! 127: STATIC void handle_allocdirect_partdone(struct allocdirect *);
! 128: STATIC void handle_allocindir_partdone(struct allocindir *);
! 129: STATIC void initiate_write_filepage(struct pagedep *, struct buf *);
! 130: STATIC void handle_written_mkdir(struct mkdir *, int);
! 131: STATIC void initiate_write_inodeblock_ufs1(struct inodedep *, struct buf *);
! 132: #ifdef FFS2
! 133: STATIC void initiate_write_inodeblock_ufs2(struct inodedep *, struct buf *);
! 134: #endif
! 135: STATIC void handle_workitem_freefile(struct freefile *);
! 136: STATIC void handle_workitem_remove(struct dirrem *);
! 137: STATIC struct dirrem *newdirrem(struct buf *, struct inode *,
! 138: struct inode *, int, struct dirrem **);
! 139: STATIC void free_diradd(struct diradd *);
! 140: STATIC void free_allocindir(struct allocindir *, struct inodedep *);
! 141: STATIC void free_newdirblk(struct newdirblk *);
! 142: STATIC int indir_trunc(struct inode *, daddr_t, int, daddr64_t, long *);
! 143: STATIC void deallocate_dependencies(struct buf *, struct inodedep *);
! 144: STATIC void free_allocdirect(struct allocdirectlst *,
! 145: struct allocdirect *, int);
! 146: STATIC int check_inode_unwritten(struct inodedep *);
! 147: STATIC int free_inodedep(struct inodedep *);
! 148: STATIC void handle_workitem_freeblocks(struct freeblks *);
! 149: STATIC void merge_inode_lists(struct inodedep *);
! 150: STATIC void setup_allocindir_phase2(struct buf *, struct inode *,
! 151: struct allocindir *);
! 152: STATIC struct allocindir *newallocindir(struct inode *, int, daddr_t,
! 153: daddr_t);
! 154: STATIC void handle_workitem_freefrag(struct freefrag *);
! 155: STATIC struct freefrag *newfreefrag(struct inode *, daddr_t, long);
! 156: STATIC void allocdirect_merge(struct allocdirectlst *,
! 157: struct allocdirect *, struct allocdirect *);
! 158: STATIC struct bmsafemap *bmsafemap_lookup(struct buf *);
! 159: STATIC int newblk_lookup(struct fs *, daddr_t, int,
! 160: struct newblk **);
! 161: STATIC int inodedep_lookup(struct fs *, ino_t, int, struct inodedep **);
! 162: STATIC int pagedep_lookup(struct inode *, daddr64_t, int, struct pagedep **);
! 163: STATIC void pause_timer(void *);
! 164: STATIC int request_cleanup(int, int);
! 165: STATIC int process_worklist_item(struct mount *, int);
! 166: STATIC void add_to_worklist(struct worklist *);
! 167:
! 168: /*
! 169: * Exported softdep operations.
! 170: */
! 171: void softdep_disk_io_initiation(struct buf *);
! 172: void softdep_disk_write_complete(struct buf *);
! 173: void softdep_deallocate_dependencies(struct buf *);
! 174: void softdep_move_dependencies(struct buf *, struct buf *);
! 175: int softdep_count_dependencies(struct buf *bp, int, int);
! 176:
! 177: /*
! 178: * Locking primitives.
! 179: *
! 180: * For a uniprocessor, all we need to do is protect against disk
! 181: * interrupts. For a multiprocessor, this lock would have to be
! 182: * a mutex. A single mutex is used throughout this file, though
! 183: * finer grain locking could be used if contention warranted it.
! 184: *
! 185: * For a multiprocessor, the sleep call would accept a lock and
! 186: * release it after the sleep processing was complete. In a uniprocessor
! 187: * implementation there is no such interlock, so we simple mark
! 188: * the places where it needs to be done with the `interlocked' form
! 189: * of the lock calls. Since the uniprocessor sleep already interlocks
! 190: * the spl, there is nothing that really needs to be done.
! 191: */
! 192: #ifndef /* NOT */ DEBUG
! 193: STATIC struct lockit {
! 194: int lkt_spl;
! 195: } lk = { 0 };
! 196: #define ACQUIRE_LOCK(lk) (lk)->lkt_spl = splbio()
! 197: #define FREE_LOCK(lk) splx((lk)->lkt_spl)
! 198: #define ACQUIRE_LOCK_INTERLOCKED(lk,s) (lk)->lkt_spl = (s)
! 199: #define FREE_LOCK_INTERLOCKED(lk) ((lk)->lkt_spl)
! 200:
! 201: #else /* DEBUG */
! 202: STATIC struct lockit {
! 203: int lkt_spl;
! 204: pid_t lkt_held;
! 205: int lkt_line;
! 206: } lk = { 0, -1 };
! 207: STATIC int lockcnt;
! 208:
! 209: STATIC void acquire_lock(struct lockit *, int);
! 210: STATIC void free_lock(struct lockit *, int);
! 211: STATIC void acquire_lock_interlocked(struct lockit *, int, int);
! 212: STATIC int free_lock_interlocked(struct lockit *, int);
! 213:
! 214: #define ACQUIRE_LOCK(lk) acquire_lock(lk, __LINE__)
! 215: #define FREE_LOCK(lk) free_lock(lk, __LINE__)
! 216: #define ACQUIRE_LOCK_INTERLOCKED(lk,s) acquire_lock_interlocked(lk, (s), __LINE__)
! 217: #define FREE_LOCK_INTERLOCKED(lk) free_lock_interlocked(lk, __LINE__)
! 218:
! 219: STATIC void
! 220: acquire_lock(lk, line)
! 221: struct lockit *lk;
! 222: int line;
! 223: {
! 224: pid_t holder;
! 225: int original_line;
! 226:
! 227: if (lk->lkt_held != -1) {
! 228: holder = lk->lkt_held;
! 229: original_line = lk->lkt_line;
! 230: FREE_LOCK(lk);
! 231: if (holder == CURPROC->p_pid)
! 232: panic("softdep_lock: locking against myself, acquired at line %d, relocked at line %d", original_line, line);
! 233: else
! 234: panic("softdep_lock: lock held by %d, acquired at line %d, relocked at line %d", holder, original_line, line);
! 235: }
! 236: lk->lkt_spl = splbio();
! 237: lk->lkt_held = CURPROC->p_pid;
! 238: lk->lkt_line = line;
! 239: lockcnt++;
! 240: }
! 241:
! 242: STATIC void
! 243: free_lock(lk, line)
! 244: struct lockit *lk;
! 245: int line;
! 246: {
! 247:
! 248: if (lk->lkt_held == -1)
! 249: panic("softdep_unlock: lock not held at line %d", line);
! 250: lk->lkt_held = -1;
! 251: splx(lk->lkt_spl);
! 252: }
! 253:
! 254: STATIC void
! 255: acquire_lock_interlocked(lk, s, line)
! 256: struct lockit *lk;
! 257: int s;
! 258: int line;
! 259: {
! 260: pid_t holder;
! 261: int original_line;
! 262:
! 263: if (lk->lkt_held != -1) {
! 264: holder = lk->lkt_held;
! 265: original_line = lk->lkt_line;
! 266: FREE_LOCK_INTERLOCKED(lk);
! 267: if (holder == CURPROC->p_pid)
! 268: panic("softdep_lock: locking against myself, acquired at line %d, relocked at line %d", original_line, line);
! 269: else
! 270: panic("softdep_lock: lock held by %d, acquired at line %d, relocked at line %d", holder, original_line, line);
! 271: }
! 272: lk->lkt_held = CURPROC->p_pid;
! 273: lk->lkt_line = line;
! 274: lk->lkt_spl = s;
! 275: lockcnt++;
! 276: }
! 277:
! 278: STATIC int
! 279: free_lock_interlocked(lk, line)
! 280: struct lockit *lk;
! 281: int line;
! 282: {
! 283:
! 284: if (lk->lkt_held == -1)
! 285: panic("softdep_unlock_interlocked: lock not held at line %d", line);
! 286: lk->lkt_held = -1;
! 287:
! 288: return (lk->lkt_spl);
! 289: }
! 290: #endif /* DEBUG */
! 291:
! 292: /*
! 293: * Place holder for real semaphores.
! 294: */
! 295: struct sema {
! 296: int value;
! 297: pid_t holder;
! 298: char *name;
! 299: int prio;
! 300: int timo;
! 301: };
! 302: STATIC void sema_init(struct sema *, char *, int, int);
! 303: STATIC int sema_get(struct sema *, struct lockit *);
! 304: STATIC void sema_release(struct sema *);
! 305:
! 306: STATIC void
! 307: sema_init(semap, name, prio, timo)
! 308: struct sema *semap;
! 309: char *name;
! 310: int prio, timo;
! 311: {
! 312:
! 313: semap->holder = -1;
! 314: semap->value = 0;
! 315: semap->name = name;
! 316: semap->prio = prio;
! 317: semap->timo = timo;
! 318: }
! 319:
! 320: STATIC int
! 321: sema_get(semap, interlock)
! 322: struct sema *semap;
! 323: struct lockit *interlock;
! 324: {
! 325: int s;
! 326:
! 327: if (semap->value++ > 0) {
! 328: if (interlock != NULL)
! 329: s = FREE_LOCK_INTERLOCKED(interlock);
! 330: tsleep((caddr_t)semap, semap->prio, semap->name, semap->timo);
! 331: if (interlock != NULL) {
! 332: ACQUIRE_LOCK_INTERLOCKED(interlock, s);
! 333: FREE_LOCK(interlock);
! 334: }
! 335: return (0);
! 336: }
! 337: semap->holder = CURPROC->p_pid;
! 338: if (interlock != NULL)
! 339: FREE_LOCK(interlock);
! 340: return (1);
! 341: }
! 342:
! 343: STATIC void
! 344: sema_release(semap)
! 345: struct sema *semap;
! 346: {
! 347:
! 348: if (semap->value <= 0 || semap->holder != CURPROC->p_pid) {
! 349: #ifdef DEBUG
! 350: if (lk.lkt_held != -1)
! 351: FREE_LOCK(&lk);
! 352: #endif
! 353: panic("sema_release: not held");
! 354: }
! 355: if (--semap->value > 0) {
! 356: semap->value = 0;
! 357: wakeup(semap);
! 358: }
! 359: semap->holder = -1;
! 360: }
! 361:
! 362: /*
! 363: * Memory management.
! 364: */
! 365: STATIC struct pool pagedep_pool;
! 366: STATIC struct pool inodedep_pool;
! 367: STATIC struct pool newblk_pool;
! 368: STATIC struct pool bmsafemap_pool;
! 369: STATIC struct pool allocdirect_pool;
! 370: STATIC struct pool indirdep_pool;
! 371: STATIC struct pool allocindir_pool;
! 372: STATIC struct pool freefrag_pool;
! 373: STATIC struct pool freeblks_pool;
! 374: STATIC struct pool freefile_pool;
! 375: STATIC struct pool diradd_pool;
! 376: STATIC struct pool mkdir_pool;
! 377: STATIC struct pool dirrem_pool;
! 378: STATIC struct pool newdirblk_pool;
! 379:
! 380: static __inline void
! 381: softdep_free(struct worklist *item, int type)
! 382: {
! 383:
! 384: switch (type) {
! 385: case D_PAGEDEP:
! 386: pool_put(&pagedep_pool, item);
! 387: break;
! 388:
! 389: case D_INODEDEP:
! 390: pool_put(&inodedep_pool, item);
! 391: break;
! 392:
! 393: case D_BMSAFEMAP:
! 394: pool_put(&bmsafemap_pool, item);
! 395: break;
! 396:
! 397: case D_ALLOCDIRECT:
! 398: pool_put(&allocdirect_pool, item);
! 399: break;
! 400:
! 401: case D_INDIRDEP:
! 402: pool_put(&indirdep_pool, item);
! 403: break;
! 404:
! 405: case D_ALLOCINDIR:
! 406: pool_put(&allocindir_pool, item);
! 407: break;
! 408:
! 409: case D_FREEFRAG:
! 410: pool_put(&freefrag_pool, item);
! 411: break;
! 412:
! 413: case D_FREEBLKS:
! 414: pool_put(&freeblks_pool, item);
! 415: break;
! 416:
! 417: case D_FREEFILE:
! 418: pool_put(&freefile_pool, item);
! 419: break;
! 420:
! 421: case D_DIRADD:
! 422: pool_put(&diradd_pool, item);
! 423: break;
! 424:
! 425: case D_MKDIR:
! 426: pool_put(&mkdir_pool, item);
! 427: break;
! 428:
! 429: case D_DIRREM:
! 430: pool_put(&dirrem_pool, item);
! 431: break;
! 432:
! 433: case D_NEWDIRBLK:
! 434: pool_put(&newdirblk_pool, item);
! 435: break;
! 436:
! 437: default:
! 438: #ifdef DEBUG
! 439: if (lk.lkt_held != -1)
! 440: FREE_LOCK(&lk);
! 441: #endif
! 442: panic("softdep_free: unknown type %d", type);
! 443: }
! 444: }
! 445:
! 446: struct workhead softdep_freequeue;
! 447:
! 448: static __inline void
! 449: softdep_freequeue_add(struct worklist *item)
! 450: {
! 451: int s;
! 452:
! 453: s = splbio();
! 454: LIST_INSERT_HEAD(&softdep_freequeue, item, wk_list);
! 455: splx(s);
! 456: }
! 457:
! 458: static __inline void
! 459: softdep_freequeue_process(void)
! 460: {
! 461: struct worklist *wk;
! 462:
! 463: splassert(IPL_BIO);
! 464:
! 465: while ((wk = LIST_FIRST(&softdep_freequeue)) != NULL) {
! 466: LIST_REMOVE(wk, wk_list);
! 467: FREE_LOCK(&lk);
! 468: softdep_free(wk, wk->wk_type);
! 469: ACQUIRE_LOCK(&lk);
! 470: }
! 471: }
! 472:
! 473: /*
! 474: * Worklist queue management.
! 475: * These routines require that the lock be held.
! 476: */
! 477: #ifndef /* NOT */ DEBUG
! 478: #define WORKLIST_INSERT(head, item) do { \
! 479: (item)->wk_state |= ONWORKLIST; \
! 480: LIST_INSERT_HEAD(head, item, wk_list); \
! 481: } while (0)
! 482: #define WORKLIST_REMOVE(item) do { \
! 483: (item)->wk_state &= ~ONWORKLIST; \
! 484: LIST_REMOVE(item, wk_list); \
! 485: } while (0)
! 486: #define WORKITEM_FREE(item, type) softdep_freequeue_add((struct worklist *)item)
! 487:
! 488: #else /* DEBUG */
! 489: STATIC void worklist_insert(struct workhead *, struct worklist *);
! 490: STATIC void worklist_remove(struct worklist *);
! 491: STATIC void workitem_free(struct worklist *);
! 492:
! 493: #define WORKLIST_INSERT(head, item) worklist_insert(head, item)
! 494: #define WORKLIST_REMOVE(item) worklist_remove(item)
! 495: #define WORKITEM_FREE(item, type) workitem_free((struct worklist *)item)
! 496:
! 497: STATIC void
! 498: worklist_insert(head, item)
! 499: struct workhead *head;
! 500: struct worklist *item;
! 501: {
! 502:
! 503: if (lk.lkt_held == -1)
! 504: panic("worklist_insert: lock not held");
! 505: if (item->wk_state & ONWORKLIST) {
! 506: FREE_LOCK(&lk);
! 507: panic("worklist_insert: already on list");
! 508: }
! 509: item->wk_state |= ONWORKLIST;
! 510: LIST_INSERT_HEAD(head, item, wk_list);
! 511: }
! 512:
! 513: STATIC void
! 514: worklist_remove(item)
! 515: struct worklist *item;
! 516: {
! 517:
! 518: if (lk.lkt_held == -1)
! 519: panic("worklist_remove: lock not held");
! 520: if ((item->wk_state & ONWORKLIST) == 0) {
! 521: FREE_LOCK(&lk);
! 522: panic("worklist_remove: not on list");
! 523: }
! 524: item->wk_state &= ~ONWORKLIST;
! 525: LIST_REMOVE(item, wk_list);
! 526: }
! 527:
! 528: STATIC void
! 529: workitem_free(item)
! 530: struct worklist *item;
! 531: {
! 532:
! 533: if (item->wk_state & ONWORKLIST) {
! 534: if (lk.lkt_held != -1)
! 535: FREE_LOCK(&lk);
! 536: panic("workitem_free: still on list");
! 537: }
! 538: softdep_freequeue_add(item);
! 539: }
! 540: #endif /* DEBUG */
! 541:
! 542: /*
! 543: * Workitem queue management
! 544: */
! 545: STATIC struct workhead softdep_workitem_pending;
! 546: STATIC struct worklist *worklist_tail;
! 547: STATIC int num_on_worklist; /* number of worklist items to be processed */
! 548: STATIC int softdep_worklist_busy; /* 1 => trying to do unmount */
! 549: STATIC int softdep_worklist_req; /* serialized waiters */
! 550: STATIC int max_softdeps; /* maximum number of structs before slowdown */
! 551: STATIC int tickdelay = 2; /* number of ticks to pause during slowdown */
! 552: STATIC int proc_waiting; /* tracks whether we have a timeout posted */
! 553: STATIC int *stat_countp; /* statistic to count in proc_waiting timeout */
! 554: STATIC struct timeout proc_waiting_timeout;
! 555: STATIC struct proc *filesys_syncer; /* proc of filesystem syncer process */
! 556: STATIC int req_clear_inodedeps; /* syncer process flush some inodedeps */
! 557: #define FLUSH_INODES 1
! 558: STATIC int req_clear_remove; /* syncer process flush some freeblks */
! 559: #define FLUSH_REMOVE 2
! 560: /*
! 561: * runtime statistics
! 562: */
! 563: STATIC int stat_worklist_push; /* number of worklist cleanups */
! 564: STATIC int stat_blk_limit_push; /* number of times block limit neared */
! 565: STATIC int stat_ino_limit_push; /* number of times inode limit neared */
! 566: STATIC int stat_blk_limit_hit; /* number of times block slowdown imposed */
! 567: STATIC int stat_ino_limit_hit; /* number of times inode slowdown imposed */
! 568: STATIC int stat_sync_limit_hit; /* number of synchronous slowdowns imposed */
! 569: STATIC int stat_indir_blk_ptrs; /* bufs redirtied as indir ptrs not written */
! 570: STATIC int stat_inode_bitmap; /* bufs redirtied as inode bitmap not written */
! 571: STATIC int stat_direct_blk_ptrs;/* bufs redirtied as direct ptrs not written */
! 572: STATIC int stat_dir_entry; /* bufs redirtied as dir entry cannot write */
! 573:
! 574: /*
! 575: * Add an item to the end of the work queue.
! 576: * This routine requires that the lock be held.
! 577: * This is the only routine that adds items to the list.
! 578: * The following routine is the only one that removes items
! 579: * and does so in order from first to last.
! 580: */
! 581: STATIC void
! 582: add_to_worklist(wk)
! 583: struct worklist *wk;
! 584: {
! 585:
! 586: if (wk->wk_state & ONWORKLIST) {
! 587: #ifdef DEBUG
! 588: if (lk.lkt_held != -1)
! 589: FREE_LOCK(&lk);
! 590: #endif
! 591: panic("add_to_worklist: already on list");
! 592: }
! 593: wk->wk_state |= ONWORKLIST;
! 594: if (LIST_FIRST(&softdep_workitem_pending) == NULL)
! 595: LIST_INSERT_HEAD(&softdep_workitem_pending, wk, wk_list);
! 596: else
! 597: LIST_INSERT_AFTER(worklist_tail, wk, wk_list);
! 598: worklist_tail = wk;
! 599: num_on_worklist += 1;
! 600: }
! 601:
! 602: /*
! 603: * Process that runs once per second to handle items in the background queue.
! 604: *
! 605: * Note that we ensure that everything is done in the order in which they
! 606: * appear in the queue. The code below depends on this property to ensure
! 607: * that blocks of a file are freed before the inode itself is freed. This
! 608: * ordering ensures that no new <vfsid, inum, lbn> triples will be generated
! 609: * until all the old ones have been purged from the dependency lists.
! 610: */
! 611: int
! 612: softdep_process_worklist(matchmnt)
! 613: struct mount *matchmnt;
! 614: {
! 615: struct proc *p = CURPROC;
! 616: int matchcnt, loopcount;
! 617: struct timeval starttime;
! 618:
! 619: /*
! 620: * First process any items on the delayed-free queue.
! 621: */
! 622: ACQUIRE_LOCK(&lk);
! 623: softdep_freequeue_process();
! 624: FREE_LOCK(&lk);
! 625:
! 626: /*
! 627: * Record the process identifier of our caller so that we can give
! 628: * this process preferential treatment in request_cleanup below.
! 629: * We can't do this in softdep_initialize, because the syncer doesn't
! 630: * have to run then.
! 631: * NOTE! This function _could_ be called with a curproc != syncerproc.
! 632: */
! 633: filesys_syncer = syncerproc;
! 634: matchcnt = 0;
! 635:
! 636: /*
! 637: * There is no danger of having multiple processes run this
! 638: * code, but we have to single-thread it when softdep_flushfiles()
! 639: * is in operation to get an accurate count of the number of items
! 640: * related to its mount point that are in the list.
! 641: */
! 642: if (matchmnt == NULL) {
! 643: if (softdep_worklist_busy < 0)
! 644: return(-1);
! 645: softdep_worklist_busy += 1;
! 646: }
! 647:
! 648: /*
! 649: * If requested, try removing inode or removal dependencies.
! 650: */
! 651: if (req_clear_inodedeps) {
! 652: clear_inodedeps(p);
! 653: req_clear_inodedeps -= 1;
! 654: wakeup_one(&proc_waiting);
! 655: }
! 656: if (req_clear_remove) {
! 657: clear_remove(p);
! 658: req_clear_remove -= 1;
! 659: wakeup_one(&proc_waiting);
! 660: }
! 661: loopcount = 1;
! 662: getmicrouptime(&starttime);
! 663: while (num_on_worklist > 0) {
! 664: matchcnt += process_worklist_item(matchmnt, 0);
! 665:
! 666: /*
! 667: * If a umount operation wants to run the worklist
! 668: * accurately, abort.
! 669: */
! 670: if (softdep_worklist_req && matchmnt == NULL) {
! 671: matchcnt = -1;
! 672: break;
! 673: }
! 674:
! 675: /*
! 676: * If requested, try removing inode or removal dependencies.
! 677: */
! 678: if (req_clear_inodedeps) {
! 679: clear_inodedeps(p);
! 680: req_clear_inodedeps -= 1;
! 681: wakeup_one(&proc_waiting);
! 682: }
! 683: if (req_clear_remove) {
! 684: clear_remove(p);
! 685: req_clear_remove -= 1;
! 686: wakeup_one(&proc_waiting);
! 687: }
! 688: /*
! 689: * We do not generally want to stop for buffer space, but if
! 690: * we are really being a buffer hog, we will stop and wait.
! 691: */
! 692: #if 0
! 693: if (loopcount++ % 128 == 0)
! 694: bwillwrite();
! 695: #endif
! 696: /*
! 697: * Never allow processing to run for more than one
! 698: * second. Otherwise the other syncer tasks may get
! 699: * excessively backlogged.
! 700: */
! 701: {
! 702: struct timeval diff;
! 703: struct timeval tv;
! 704:
! 705: getmicrouptime(&tv);
! 706: timersub(&tv, &starttime, &diff);
! 707: if (diff.tv_sec != 0 && matchmnt == NULL) {
! 708: matchcnt = -1;
! 709: break;
! 710: }
! 711: }
! 712:
! 713: /*
! 714: * Process any new items on the delayed-free queue.
! 715: */
! 716: ACQUIRE_LOCK(&lk);
! 717: softdep_freequeue_process();
! 718: FREE_LOCK(&lk);
! 719: }
! 720: if (matchmnt == NULL) {
! 721: softdep_worklist_busy -= 1;
! 722: if (softdep_worklist_req && softdep_worklist_busy == 0)
! 723: wakeup(&softdep_worklist_req);
! 724: }
! 725: return (matchcnt);
! 726: }
! 727:
! 728: /*
! 729: * Process one item on the worklist.
! 730: */
! 731: STATIC int
! 732: process_worklist_item(matchmnt, flags)
! 733: struct mount *matchmnt;
! 734: int flags;
! 735: {
! 736: struct worklist *wk, *wkend;
! 737: struct dirrem *dirrem;
! 738: struct mount *mp;
! 739: struct vnode *vp;
! 740: int matchcnt = 0;
! 741:
! 742: ACQUIRE_LOCK(&lk);
! 743: /*
! 744: * Normally we just process each item on the worklist in order.
! 745: * However, if we are in a situation where we cannot lock any
! 746: * inodes, we have to skip over any dirrem requests whose
! 747: * vnodes are resident and locked.
! 748: */
! 749: LIST_FOREACH(wk, &softdep_workitem_pending, wk_list) {
! 750: if ((flags & LK_NOWAIT) == 0 || wk->wk_type != D_DIRREM)
! 751: break;
! 752: dirrem = WK_DIRREM(wk);
! 753: vp = ufs_ihashlookup(VFSTOUFS(dirrem->dm_mnt)->um_dev,
! 754: dirrem->dm_oldinum);
! 755: if (vp == NULL || !VOP_ISLOCKED(vp))
! 756: break;
! 757: }
! 758: if (wk == 0) {
! 759: FREE_LOCK(&lk);
! 760: return (0);
! 761: }
! 762: /*
! 763: * Remove the item to be processed. If we are removing the last
! 764: * item on the list, we need to recalculate the tail pointer.
! 765: * As this happens rarely and usually when the list is short,
! 766: * we just run down the list to find it rather than tracking it
! 767: * in the above loop.
! 768: */
! 769: WORKLIST_REMOVE(wk);
! 770: if (wk == worklist_tail) {
! 771: LIST_FOREACH(wkend, &softdep_workitem_pending, wk_list)
! 772: if (LIST_NEXT(wkend, wk_list) == NULL)
! 773: break;
! 774: worklist_tail = wkend;
! 775: }
! 776: num_on_worklist -= 1;
! 777: FREE_LOCK(&lk);
! 778: switch (wk->wk_type) {
! 779:
! 780: case D_DIRREM:
! 781: /* removal of a directory entry */
! 782: mp = WK_DIRREM(wk)->dm_mnt;
! 783: #if 0
! 784: if (vn_write_suspend_wait(NULL, mp, V_NOWAIT))
! 785: panic("%s: dirrem on suspended filesystem",
! 786: "process_worklist_item");
! 787: #endif
! 788: if (mp == matchmnt)
! 789: matchcnt += 1;
! 790: handle_workitem_remove(WK_DIRREM(wk));
! 791: break;
! 792:
! 793: case D_FREEBLKS:
! 794: /* releasing blocks and/or fragments from a file */
! 795: mp = WK_FREEBLKS(wk)->fb_mnt;
! 796: #if 0
! 797: if (vn_write_suspend_wait(NULL, mp, V_NOWAIT))
! 798: panic("%s: freeblks on suspended filesystem",
! 799: "process_worklist_item");
! 800: #endif
! 801: if (mp == matchmnt)
! 802: matchcnt += 1;
! 803: handle_workitem_freeblocks(WK_FREEBLKS(wk));
! 804: break;
! 805:
! 806: case D_FREEFRAG:
! 807: /* releasing a fragment when replaced as a file grows */
! 808: mp = WK_FREEFRAG(wk)->ff_mnt;
! 809: #if 0
! 810: if (vn_write_suspend_wait(NULL, mp, V_NOWAIT))
! 811: panic("%s: freefrag on suspended filesystem",
! 812: "process_worklist_item");
! 813: #endif
! 814: if (mp == matchmnt)
! 815: matchcnt += 1;
! 816: handle_workitem_freefrag(WK_FREEFRAG(wk));
! 817: break;
! 818:
! 819: case D_FREEFILE:
! 820: /* releasing an inode when its link count drops to 0 */
! 821: mp = WK_FREEFILE(wk)->fx_mnt;
! 822: #if 0
! 823: if (vn_write_suspend_wait(NULL, mp, V_NOWAIT))
! 824: panic("%s: freefile on suspended filesystem",
! 825: "process_worklist_item");
! 826: #endif
! 827: if (mp == matchmnt)
! 828: matchcnt += 1;
! 829: handle_workitem_freefile(WK_FREEFILE(wk));
! 830: break;
! 831:
! 832: default:
! 833: panic("%s_process_worklist: Unknown type %s",
! 834: "softdep", TYPENAME(wk->wk_type));
! 835: /* NOTREACHED */
! 836: }
! 837: return (matchcnt);
! 838: }
! 839:
! 840: /*
! 841: * Move dependencies from one buffer to another.
! 842: */
! 843: void
! 844: softdep_move_dependencies(oldbp, newbp)
! 845: struct buf *oldbp;
! 846: struct buf *newbp;
! 847: {
! 848: struct worklist *wk, *wktail;
! 849:
! 850: if (LIST_FIRST(&newbp->b_dep) != NULL)
! 851: panic("softdep_move_dependencies: need merge code");
! 852: wktail = 0;
! 853: ACQUIRE_LOCK(&lk);
! 854: while ((wk = LIST_FIRST(&oldbp->b_dep)) != NULL) {
! 855: LIST_REMOVE(wk, wk_list);
! 856: if (wktail == 0)
! 857: LIST_INSERT_HEAD(&newbp->b_dep, wk, wk_list);
! 858: else
! 859: LIST_INSERT_AFTER(wktail, wk, wk_list);
! 860: wktail = wk;
! 861: }
! 862: FREE_LOCK(&lk);
! 863: }
! 864:
! 865: /*
! 866: * Purge the work list of all items associated with a particular mount point.
! 867: */
! 868: int
! 869: softdep_flushworklist(oldmnt, countp, p)
! 870: struct mount *oldmnt;
! 871: int *countp;
! 872: struct proc *p;
! 873: {
! 874: struct vnode *devvp;
! 875: int count, error = 0;
! 876:
! 877: /*
! 878: * Await our turn to clear out the queue, then serialize access.
! 879: */
! 880: while (softdep_worklist_busy) {
! 881: softdep_worklist_req += 1;
! 882: tsleep(&softdep_worklist_req, PRIBIO, "softflush", 0);
! 883: softdep_worklist_req -= 1;
! 884: }
! 885: softdep_worklist_busy = -1;
! 886: /*
! 887: * Alternately flush the block device associated with the mount
! 888: * point and process any dependencies that the flushing
! 889: * creates. We continue until no more worklist dependencies
! 890: * are found.
! 891: */
! 892: *countp = 0;
! 893: devvp = VFSTOUFS(oldmnt)->um_devvp;
! 894: while ((count = softdep_process_worklist(oldmnt)) > 0) {
! 895: *countp += count;
! 896: vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
! 897: error = VOP_FSYNC(devvp, p->p_ucred, MNT_WAIT, p);
! 898: VOP_UNLOCK(devvp, 0, p);
! 899: if (error)
! 900: break;
! 901: }
! 902: softdep_worklist_busy = 0;
! 903: if (softdep_worklist_req)
! 904: wakeup(&softdep_worklist_req);
! 905: return (error);
! 906: }
! 907:
! 908: /*
! 909: * Flush all vnodes and worklist items associated with a specified mount point.
! 910: */
! 911: int
! 912: softdep_flushfiles(oldmnt, flags, p)
! 913: struct mount *oldmnt;
! 914: int flags;
! 915: struct proc *p;
! 916: {
! 917: int error, count, loopcnt;
! 918:
! 919: /*
! 920: * Alternately flush the vnodes associated with the mount
! 921: * point and process any dependencies that the flushing
! 922: * creates. In theory, this loop can happen at most twice,
! 923: * but we give it a few extra just to be sure.
! 924: */
! 925: for (loopcnt = 10; loopcnt > 0; loopcnt--) {
! 926: /*
! 927: * Do another flush in case any vnodes were brought in
! 928: * as part of the cleanup operations.
! 929: */
! 930: if ((error = ffs_flushfiles(oldmnt, flags, p)) != 0)
! 931: break;
! 932: if ((error = softdep_flushworklist(oldmnt, &count, p)) != 0 ||
! 933: count == 0)
! 934: break;
! 935: }
! 936: /*
! 937: * If we are unmounting then it is an error to fail. If we
! 938: * are simply trying to downgrade to read-only, then filesystem
! 939: * activity can keep us busy forever, so we just fail with EBUSY.
! 940: */
! 941: if (loopcnt == 0) {
! 942: error = EBUSY;
! 943: }
! 944: return (error);
! 945: }
! 946:
! 947: /*
! 948: * Structure hashing.
! 949: *
! 950: * There are three types of structures that can be looked up:
! 951: * 1) pagedep structures identified by mount point, inode number,
! 952: * and logical block.
! 953: * 2) inodedep structures identified by mount point and inode number.
! 954: * 3) newblk structures identified by mount point and
! 955: * physical block number.
! 956: *
! 957: * The "pagedep" and "inodedep" dependency structures are hashed
! 958: * separately from the file blocks and inodes to which they correspond.
! 959: * This separation helps when the in-memory copy of an inode or
! 960: * file block must be replaced. It also obviates the need to access
! 961: * an inode or file page when simply updating (or de-allocating)
! 962: * dependency structures. Lookup of newblk structures is needed to
! 963: * find newly allocated blocks when trying to associate them with
! 964: * their allocdirect or allocindir structure.
! 965: *
! 966: * The lookup routines optionally create and hash a new instance when
! 967: * an existing entry is not found.
! 968: */
! 969: #define DEPALLOC 0x0001 /* allocate structure if lookup fails */
! 970: #define NODELAY 0x0002 /* cannot do background work */
! 971:
! 972: /*
! 973: * Structures and routines associated with pagedep caching.
! 974: */
! 975: LIST_HEAD(pagedep_hashhead, pagedep) *pagedep_hashtbl;
! 976: u_long pagedep_hash; /* size of hash table - 1 */
! 977: #define PAGEDEP_HASH(mp, inum, lbn) \
! 978: (&pagedep_hashtbl[((((register_t)(mp)) >> 13) + (inum) + (lbn)) & \
! 979: pagedep_hash])
! 980: STATIC struct sema pagedep_in_progress;
! 981:
! 982: /*
! 983: * Look up a pagedep. Return 1 if found, 0 if not found or found
! 984: * when asked to allocate but not associated with any buffer.
! 985: * If not found, allocate if DEPALLOC flag is passed.
! 986: * Found or allocated entry is returned in pagedeppp.
! 987: * This routine must be called with splbio interrupts blocked.
! 988: */
! 989: STATIC int
! 990: pagedep_lookup(ip, lbn, flags, pagedeppp)
! 991: struct inode *ip;
! 992: daddr64_t lbn;
! 993: int flags;
! 994: struct pagedep **pagedeppp;
! 995: {
! 996: struct pagedep *pagedep;
! 997: struct pagedep_hashhead *pagedephd;
! 998: struct mount *mp;
! 999: int i;
! 1000:
! 1001: splassert(IPL_BIO);
! 1002:
! 1003: #ifdef DEBUG
! 1004: if (lk.lkt_held == -1)
! 1005: panic("pagedep_lookup: lock not held");
! 1006: #endif
! 1007: mp = ITOV(ip)->v_mount;
! 1008: pagedephd = PAGEDEP_HASH(mp, ip->i_number, lbn);
! 1009: top:
! 1010: LIST_FOREACH(pagedep, pagedephd, pd_hash)
! 1011: if (ip->i_number == pagedep->pd_ino &&
! 1012: lbn == pagedep->pd_lbn &&
! 1013: mp == pagedep->pd_mnt)
! 1014: break;
! 1015: if (pagedep) {
! 1016: *pagedeppp = pagedep;
! 1017: if ((flags & DEPALLOC) != 0 &&
! 1018: (pagedep->pd_state & ONWORKLIST) == 0)
! 1019: return (0);
! 1020: return (1);
! 1021: }
! 1022: if ((flags & DEPALLOC) == 0) {
! 1023: *pagedeppp = NULL;
! 1024: return (0);
! 1025: }
! 1026: if (sema_get(&pagedep_in_progress, &lk) == 0) {
! 1027: ACQUIRE_LOCK(&lk);
! 1028: goto top;
! 1029: }
! 1030: pagedep = pool_get(&pagedep_pool, PR_WAITOK);
! 1031: bzero(pagedep, sizeof(struct pagedep));
! 1032: pagedep->pd_list.wk_type = D_PAGEDEP;
! 1033: pagedep->pd_mnt = mp;
! 1034: pagedep->pd_ino = ip->i_number;
! 1035: pagedep->pd_lbn = lbn;
! 1036: LIST_INIT(&pagedep->pd_dirremhd);
! 1037: LIST_INIT(&pagedep->pd_pendinghd);
! 1038: for (i = 0; i < DAHASHSZ; i++)
! 1039: LIST_INIT(&pagedep->pd_diraddhd[i]);
! 1040: ACQUIRE_LOCK(&lk);
! 1041: LIST_INSERT_HEAD(pagedephd, pagedep, pd_hash);
! 1042: sema_release(&pagedep_in_progress);
! 1043: *pagedeppp = pagedep;
! 1044: return (0);
! 1045: }
! 1046:
! 1047: /*
! 1048: * Structures and routines associated with inodedep caching.
! 1049: */
! 1050: LIST_HEAD(inodedep_hashhead, inodedep) *inodedep_hashtbl;
! 1051: STATIC u_long inodedep_hash; /* size of hash table - 1 */
! 1052: STATIC long num_inodedep; /* number of inodedep allocated */
! 1053: #define INODEDEP_HASH(fs, inum) \
! 1054: (&inodedep_hashtbl[((((register_t)(fs)) >> 13) + (inum)) & inodedep_hash])
! 1055: STATIC struct sema inodedep_in_progress;
! 1056:
! 1057: /*
! 1058: * Look up a inodedep. Return 1 if found, 0 if not found.
! 1059: * If not found, allocate if DEPALLOC flag is passed.
! 1060: * Found or allocated entry is returned in inodedeppp.
! 1061: * This routine must be called with splbio interrupts blocked.
! 1062: */
! 1063: STATIC int
! 1064: inodedep_lookup(fs, inum, flags, inodedeppp)
! 1065: struct fs *fs;
! 1066: ino_t inum;
! 1067: int flags;
! 1068: struct inodedep **inodedeppp;
! 1069: {
! 1070: struct inodedep *inodedep;
! 1071: struct inodedep_hashhead *inodedephd;
! 1072: int firsttry;
! 1073:
! 1074: splassert(IPL_BIO);
! 1075:
! 1076: #ifdef DEBUG
! 1077: if (lk.lkt_held == -1)
! 1078: panic("inodedep_lookup: lock not held");
! 1079: #endif
! 1080: firsttry = 1;
! 1081: inodedephd = INODEDEP_HASH(fs, inum);
! 1082: top:
! 1083: LIST_FOREACH(inodedep, inodedephd, id_hash)
! 1084: if (inum == inodedep->id_ino && fs == inodedep->id_fs)
! 1085: break;
! 1086: if (inodedep) {
! 1087: *inodedeppp = inodedep;
! 1088: return (1);
! 1089: }
! 1090: if ((flags & DEPALLOC) == 0) {
! 1091: *inodedeppp = NULL;
! 1092: return (0);
! 1093: }
! 1094: /*
! 1095: * If we are over our limit, try to improve the situation.
! 1096: */
! 1097: if (num_inodedep > max_softdeps && firsttry && (flags & NODELAY) == 0 &&
! 1098: request_cleanup(FLUSH_INODES, 1)) {
! 1099: firsttry = 0;
! 1100: goto top;
! 1101: }
! 1102: if (sema_get(&inodedep_in_progress, &lk) == 0) {
! 1103: ACQUIRE_LOCK(&lk);
! 1104: goto top;
! 1105: }
! 1106: num_inodedep += 1;
! 1107: inodedep = pool_get(&inodedep_pool, PR_WAITOK);
! 1108: inodedep->id_list.wk_type = D_INODEDEP;
! 1109: inodedep->id_fs = fs;
! 1110: inodedep->id_ino = inum;
! 1111: inodedep->id_state = ALLCOMPLETE;
! 1112: inodedep->id_nlinkdelta = 0;
! 1113: inodedep->id_savedino1 = NULL;
! 1114: inodedep->id_savedsize = -1;
! 1115: inodedep->id_buf = NULL;
! 1116: LIST_INIT(&inodedep->id_pendinghd);
! 1117: LIST_INIT(&inodedep->id_inowait);
! 1118: LIST_INIT(&inodedep->id_bufwait);
! 1119: TAILQ_INIT(&inodedep->id_inoupdt);
! 1120: TAILQ_INIT(&inodedep->id_newinoupdt);
! 1121: ACQUIRE_LOCK(&lk);
! 1122: LIST_INSERT_HEAD(inodedephd, inodedep, id_hash);
! 1123: sema_release(&inodedep_in_progress);
! 1124: *inodedeppp = inodedep;
! 1125: return (0);
! 1126: }
! 1127:
! 1128: /*
! 1129: * Structures and routines associated with newblk caching.
! 1130: */
! 1131: LIST_HEAD(newblk_hashhead, newblk) *newblk_hashtbl;
! 1132: u_long newblk_hash; /* size of hash table - 1 */
! 1133: #define NEWBLK_HASH(fs, inum) \
! 1134: (&newblk_hashtbl[((((register_t)(fs)) >> 13) + (inum)) & newblk_hash])
! 1135: STATIC struct sema newblk_in_progress;
! 1136:
! 1137: /*
! 1138: * Look up a newblk. Return 1 if found, 0 if not found.
! 1139: * If not found, allocate if DEPALLOC flag is passed.
! 1140: * Found or allocated entry is returned in newblkpp.
! 1141: */
! 1142: STATIC int
! 1143: newblk_lookup(fs, newblkno, flags, newblkpp)
! 1144: struct fs *fs;
! 1145: daddr_t newblkno;
! 1146: int flags;
! 1147: struct newblk **newblkpp;
! 1148: {
! 1149: struct newblk *newblk;
! 1150: struct newblk_hashhead *newblkhd;
! 1151:
! 1152: newblkhd = NEWBLK_HASH(fs, newblkno);
! 1153: top:
! 1154: LIST_FOREACH(newblk, newblkhd, nb_hash)
! 1155: if (newblkno == newblk->nb_newblkno && fs == newblk->nb_fs)
! 1156: break;
! 1157: if (newblk) {
! 1158: *newblkpp = newblk;
! 1159: return (1);
! 1160: }
! 1161: if ((flags & DEPALLOC) == 0) {
! 1162: *newblkpp = NULL;
! 1163: return (0);
! 1164: }
! 1165: if (sema_get(&newblk_in_progress, 0) == 0)
! 1166: goto top;
! 1167: newblk = pool_get(&newblk_pool, PR_WAITOK);
! 1168: newblk->nb_state = 0;
! 1169: newblk->nb_fs = fs;
! 1170: newblk->nb_newblkno = newblkno;
! 1171: LIST_INSERT_HEAD(newblkhd, newblk, nb_hash);
! 1172: sema_release(&newblk_in_progress);
! 1173: *newblkpp = newblk;
! 1174: return (0);
! 1175: }
! 1176:
! 1177: /*
! 1178: * Executed during filesystem system initialization before
! 1179: * mounting any file systems.
! 1180: */
! 1181: void
! 1182: softdep_initialize()
! 1183: {
! 1184:
! 1185: bioops.io_start = softdep_disk_io_initiation;
! 1186: bioops.io_complete = softdep_disk_write_complete;
! 1187: bioops.io_deallocate = softdep_deallocate_dependencies;
! 1188: bioops.io_movedeps = softdep_move_dependencies;
! 1189: bioops.io_countdeps = softdep_count_dependencies;
! 1190:
! 1191: LIST_INIT(&mkdirlisthd);
! 1192: LIST_INIT(&softdep_workitem_pending);
! 1193: #ifdef KMEMSTATS
! 1194: max_softdeps = min (desiredvnodes * 8,
! 1195: kmemstats[M_INODEDEP].ks_limit / (2 * sizeof(struct inodedep)));
! 1196: #else
! 1197: max_softdeps = desiredvnodes * 4;
! 1198: #endif
! 1199: pagedep_hashtbl = hashinit(desiredvnodes / 5, M_PAGEDEP, M_WAITOK,
! 1200: &pagedep_hash);
! 1201: sema_init(&pagedep_in_progress, "pagedep", PRIBIO, 0);
! 1202: inodedep_hashtbl = hashinit(desiredvnodes, M_INODEDEP, M_WAITOK,
! 1203: &inodedep_hash);
! 1204: sema_init(&inodedep_in_progress, "inodedep", PRIBIO, 0);
! 1205: newblk_hashtbl = hashinit(64, M_NEWBLK, M_WAITOK, &newblk_hash);
! 1206: sema_init(&newblk_in_progress, "newblk", PRIBIO, 0);
! 1207: timeout_set(&proc_waiting_timeout, pause_timer, 0);
! 1208: pool_init(&pagedep_pool, sizeof(struct pagedep), 0, 0, 0,
! 1209: "pagedeppl", &pool_allocator_nointr);
! 1210: pool_init(&inodedep_pool, sizeof(struct inodedep), 0, 0, 0,
! 1211: "inodedeppl", &pool_allocator_nointr);
! 1212: pool_init(&newblk_pool, sizeof(struct newblk), 0, 0, 0,
! 1213: "newblkpl", &pool_allocator_nointr);
! 1214: pool_init(&bmsafemap_pool, sizeof(struct bmsafemap), 0, 0, 0,
! 1215: "bmsafemappl", &pool_allocator_nointr);
! 1216: pool_init(&allocdirect_pool, sizeof(struct allocdirect), 0, 0, 0,
! 1217: "allocdirectpl", &pool_allocator_nointr);
! 1218: pool_init(&indirdep_pool, sizeof(struct indirdep), 0, 0, 0,
! 1219: "indirdeppl", &pool_allocator_nointr);
! 1220: pool_init(&allocindir_pool, sizeof(struct allocindir), 0, 0, 0,
! 1221: "allocindirpl", &pool_allocator_nointr);
! 1222: pool_init(&freefrag_pool, sizeof(struct freefrag), 0, 0, 0,
! 1223: "freefragpl", &pool_allocator_nointr);
! 1224: pool_init(&freeblks_pool, sizeof(struct freeblks), 0, 0, 0,
! 1225: "freeblkspl", &pool_allocator_nointr);
! 1226: pool_init(&freefile_pool, sizeof(struct freefile), 0, 0, 0,
! 1227: "freefilepl", &pool_allocator_nointr);
! 1228: pool_init(&diradd_pool, sizeof(struct diradd), 0, 0, 0,
! 1229: "diraddpl", &pool_allocator_nointr);
! 1230: pool_init(&mkdir_pool, sizeof(struct mkdir), 0, 0, 0,
! 1231: "mkdirpl", &pool_allocator_nointr);
! 1232: pool_init(&dirrem_pool, sizeof(struct dirrem), 0, 0, 0,
! 1233: "dirrempl", &pool_allocator_nointr);
! 1234: pool_init(&newdirblk_pool, sizeof(struct newdirblk), 0, 0, 0,
! 1235: "newdirblkpl", &pool_allocator_nointr);
! 1236: }
! 1237:
! 1238: /*
! 1239: * Called at mount time to notify the dependency code that a
! 1240: * filesystem wishes to use it.
! 1241: */
! 1242: int
! 1243: softdep_mount(devvp, mp, fs, cred)
! 1244: struct vnode *devvp;
! 1245: struct mount *mp;
! 1246: struct fs *fs;
! 1247: struct ucred *cred;
! 1248: {
! 1249: struct csum_total cstotal;
! 1250: struct cg *cgp;
! 1251: struct buf *bp;
! 1252: int error, cyl;
! 1253:
! 1254: /*
! 1255: * When doing soft updates, the counters in the
! 1256: * superblock may have gotten out of sync, so we have
! 1257: * to scan the cylinder groups and recalculate them.
! 1258: */
! 1259: if ((fs->fs_flags & FS_UNCLEAN) == 0)
! 1260: return (0);
! 1261: bzero(&cstotal, sizeof cstotal);
! 1262: for (cyl = 0; cyl < fs->fs_ncg; cyl++) {
! 1263: if ((error = bread(devvp, fsbtodb(fs, cgtod(fs, cyl)),
! 1264: fs->fs_cgsize, cred, &bp)) != 0) {
! 1265: brelse(bp);
! 1266: return (error);
! 1267: }
! 1268: cgp = (struct cg *)bp->b_data;
! 1269: cstotal.cs_nffree += cgp->cg_cs.cs_nffree;
! 1270: cstotal.cs_nbfree += cgp->cg_cs.cs_nbfree;
! 1271: cstotal.cs_nifree += cgp->cg_cs.cs_nifree;
! 1272: cstotal.cs_ndir += cgp->cg_cs.cs_ndir;
! 1273: fs->fs_cs(fs, cyl) = cgp->cg_cs;
! 1274: brelse(bp);
! 1275: }
! 1276: #ifdef DEBUG
! 1277: if (bcmp(&cstotal, &fs->fs_cstotal, sizeof cstotal))
! 1278: printf("ffs_mountfs: superblock updated for soft updates\n");
! 1279: #endif
! 1280: bcopy(&cstotal, &fs->fs_cstotal, sizeof cstotal);
! 1281: return (0);
! 1282: }
! 1283:
! 1284: /*
! 1285: * Protecting the freemaps (or bitmaps).
! 1286: *
! 1287: * To eliminate the need to execute fsck before mounting a file system
! 1288: * after a power failure, one must (conservatively) guarantee that the
! 1289: * on-disk copy of the bitmaps never indicate that a live inode or block is
! 1290: * free. So, when a block or inode is allocated, the bitmap should be
! 1291: * updated (on disk) before any new pointers. When a block or inode is
! 1292: * freed, the bitmap should not be updated until all pointers have been
! 1293: * reset. The latter dependency is handled by the delayed de-allocation
! 1294: * approach described below for block and inode de-allocation. The former
! 1295: * dependency is handled by calling the following procedure when a block or
! 1296: * inode is allocated. When an inode is allocated an "inodedep" is created
! 1297: * with its DEPCOMPLETE flag cleared until its bitmap is written to disk.
! 1298: * Each "inodedep" is also inserted into the hash indexing structure so
! 1299: * that any additional link additions can be made dependent on the inode
! 1300: * allocation.
! 1301: *
! 1302: * The ufs file system maintains a number of free block counts (e.g., per
! 1303: * cylinder group, per cylinder and per <cylinder, rotational position> pair)
! 1304: * in addition to the bitmaps. These counts are used to improve efficiency
! 1305: * during allocation and therefore must be consistent with the bitmaps.
! 1306: * There is no convenient way to guarantee post-crash consistency of these
! 1307: * counts with simple update ordering, for two main reasons: (1) The counts
! 1308: * and bitmaps for a single cylinder group block are not in the same disk
! 1309: * sector. If a disk write is interrupted (e.g., by power failure), one may
! 1310: * be written and the other not. (2) Some of the counts are located in the
! 1311: * superblock rather than the cylinder group block. So, we focus our soft
! 1312: * updates implementation on protecting the bitmaps. When mounting a
! 1313: * filesystem, we recompute the auxiliary counts from the bitmaps.
! 1314: */
! 1315:
! 1316: /*
! 1317: * Called just after updating the cylinder group block to allocate an inode.
! 1318: */
! 1319: void
! 1320: softdep_setup_inomapdep(bp, ip, newinum)
! 1321: struct buf *bp; /* buffer for cylgroup block with inode map */
! 1322: struct inode *ip; /* inode related to allocation */
! 1323: ino_t newinum; /* new inode number being allocated */
! 1324: {
! 1325: struct inodedep *inodedep;
! 1326: struct bmsafemap *bmsafemap;
! 1327:
! 1328: /*
! 1329: * Create a dependency for the newly allocated inode.
! 1330: * Panic if it already exists as something is seriously wrong.
! 1331: * Otherwise add it to the dependency list for the buffer holding
! 1332: * the cylinder group map from which it was allocated.
! 1333: */
! 1334: ACQUIRE_LOCK(&lk);
! 1335: if (inodedep_lookup(ip->i_fs, newinum, DEPALLOC | NODELAY, &inodedep)
! 1336: != 0) {
! 1337: FREE_LOCK(&lk);
! 1338: panic("softdep_setup_inomapdep: found inode");
! 1339: }
! 1340: inodedep->id_buf = bp;
! 1341: inodedep->id_state &= ~DEPCOMPLETE;
! 1342: bmsafemap = bmsafemap_lookup(bp);
! 1343: LIST_INSERT_HEAD(&bmsafemap->sm_inodedephd, inodedep, id_deps);
! 1344: FREE_LOCK(&lk);
! 1345: }
! 1346:
! 1347: /*
! 1348: * Called just after updating the cylinder group block to
! 1349: * allocate block or fragment.
! 1350: */
! 1351: void
! 1352: softdep_setup_blkmapdep(bp, fs, newblkno)
! 1353: struct buf *bp; /* buffer for cylgroup block with block map */
! 1354: struct fs *fs; /* filesystem doing allocation */
! 1355: daddr_t newblkno; /* number of newly allocated block */
! 1356: {
! 1357: struct newblk *newblk;
! 1358: struct bmsafemap *bmsafemap;
! 1359:
! 1360: /*
! 1361: * Create a dependency for the newly allocated block.
! 1362: * Add it to the dependency list for the buffer holding
! 1363: * the cylinder group map from which it was allocated.
! 1364: */
! 1365: if (newblk_lookup(fs, newblkno, DEPALLOC, &newblk) != 0)
! 1366: panic("softdep_setup_blkmapdep: found block");
! 1367: ACQUIRE_LOCK(&lk);
! 1368: newblk->nb_bmsafemap = bmsafemap = bmsafemap_lookup(bp);
! 1369: LIST_INSERT_HEAD(&bmsafemap->sm_newblkhd, newblk, nb_deps);
! 1370: FREE_LOCK(&lk);
! 1371: }
! 1372:
! 1373: /*
! 1374: * Find the bmsafemap associated with a cylinder group buffer.
! 1375: * If none exists, create one. The buffer must be locked when
! 1376: * this routine is called and this routine must be called with
! 1377: * splbio interrupts blocked.
! 1378: */
! 1379: STATIC struct bmsafemap *
! 1380: bmsafemap_lookup(bp)
! 1381: struct buf *bp;
! 1382: {
! 1383: struct bmsafemap *bmsafemap;
! 1384: struct worklist *wk;
! 1385:
! 1386: splassert(IPL_BIO);
! 1387:
! 1388: #ifdef DEBUG
! 1389: if (lk.lkt_held == -1)
! 1390: panic("bmsafemap_lookup: lock not held");
! 1391: #endif
! 1392: LIST_FOREACH(wk, &bp->b_dep, wk_list)
! 1393: if (wk->wk_type == D_BMSAFEMAP)
! 1394: return (WK_BMSAFEMAP(wk));
! 1395: FREE_LOCK(&lk);
! 1396: bmsafemap = pool_get(&bmsafemap_pool, PR_WAITOK);
! 1397: bmsafemap->sm_list.wk_type = D_BMSAFEMAP;
! 1398: bmsafemap->sm_list.wk_state = 0;
! 1399: bmsafemap->sm_buf = bp;
! 1400: LIST_INIT(&bmsafemap->sm_allocdirecthd);
! 1401: LIST_INIT(&bmsafemap->sm_allocindirhd);
! 1402: LIST_INIT(&bmsafemap->sm_inodedephd);
! 1403: LIST_INIT(&bmsafemap->sm_newblkhd);
! 1404: ACQUIRE_LOCK(&lk);
! 1405: WORKLIST_INSERT(&bp->b_dep, &bmsafemap->sm_list);
! 1406: return (bmsafemap);
! 1407: }
! 1408:
! 1409: /*
! 1410: * Direct block allocation dependencies.
! 1411: *
! 1412: * When a new block is allocated, the corresponding disk locations must be
! 1413: * initialized (with zeros or new data) before the on-disk inode points to
! 1414: * them. Also, the freemap from which the block was allocated must be
! 1415: * updated (on disk) before the inode's pointer. These two dependencies are
! 1416: * independent of each other and are needed for all file blocks and indirect
! 1417: * blocks that are pointed to directly by the inode. Just before the
! 1418: * "in-core" version of the inode is updated with a newly allocated block
! 1419: * number, a procedure (below) is called to setup allocation dependency
! 1420: * structures. These structures are removed when the corresponding
! 1421: * dependencies are satisfied or when the block allocation becomes obsolete
! 1422: * (i.e., the file is deleted, the block is de-allocated, or the block is a
! 1423: * fragment that gets upgraded). All of these cases are handled in
! 1424: * procedures described later.
! 1425: *
! 1426: * When a file extension causes a fragment to be upgraded, either to a larger
! 1427: * fragment or to a full block, the on-disk location may change (if the
! 1428: * previous fragment could not simply be extended). In this case, the old
! 1429: * fragment must be de-allocated, but not until after the inode's pointer has
! 1430: * been updated. In most cases, this is handled by later procedures, which
! 1431: * will construct a "freefrag" structure to be added to the workitem queue
! 1432: * when the inode update is complete (or obsolete). The main exception to
! 1433: * this is when an allocation occurs while a pending allocation dependency
! 1434: * (for the same block pointer) remains. This case is handled in the main
! 1435: * allocation dependency setup procedure by immediately freeing the
! 1436: * unreferenced fragments.
! 1437: */
! 1438: void
! 1439: softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)
! 1440: struct inode *ip; /* inode to which block is being added */
! 1441: daddr64_t lbn; /* block pointer within inode */
! 1442: daddr_t newblkno; /* disk block number being added */
! 1443: daddr_t oldblkno; /* previous block number, 0 unless frag */
! 1444: long newsize; /* size of new block */
! 1445: long oldsize; /* size of new block */
! 1446: struct buf *bp; /* bp for allocated block */
! 1447: {
! 1448: struct allocdirect *adp, *oldadp;
! 1449: struct allocdirectlst *adphead;
! 1450: struct bmsafemap *bmsafemap;
! 1451: struct inodedep *inodedep;
! 1452: struct pagedep *pagedep;
! 1453: struct newblk *newblk;
! 1454:
! 1455: adp = pool_get(&allocdirect_pool, PR_WAITOK);
! 1456: bzero(adp, sizeof(struct allocdirect));
! 1457: adp->ad_list.wk_type = D_ALLOCDIRECT;
! 1458: adp->ad_lbn = lbn;
! 1459: adp->ad_newblkno = newblkno;
! 1460: adp->ad_oldblkno = oldblkno;
! 1461: adp->ad_newsize = newsize;
! 1462: adp->ad_oldsize = oldsize;
! 1463: adp->ad_state = ATTACHED;
! 1464: LIST_INIT(&adp->ad_newdirblk);
! 1465: if (newblkno == oldblkno)
! 1466: adp->ad_freefrag = NULL;
! 1467: else
! 1468: adp->ad_freefrag = newfreefrag(ip, oldblkno, oldsize);
! 1469:
! 1470: if (newblk_lookup(ip->i_fs, newblkno, 0, &newblk) == 0)
! 1471: panic("softdep_setup_allocdirect: lost block");
! 1472:
! 1473: ACQUIRE_LOCK(&lk);
! 1474: inodedep_lookup(ip->i_fs, ip->i_number, DEPALLOC | NODELAY, &inodedep);
! 1475: adp->ad_inodedep = inodedep;
! 1476:
! 1477: if (newblk->nb_state == DEPCOMPLETE) {
! 1478: adp->ad_state |= DEPCOMPLETE;
! 1479: adp->ad_buf = NULL;
! 1480: } else {
! 1481: bmsafemap = newblk->nb_bmsafemap;
! 1482: adp->ad_buf = bmsafemap->sm_buf;
! 1483: LIST_REMOVE(newblk, nb_deps);
! 1484: LIST_INSERT_HEAD(&bmsafemap->sm_allocdirecthd, adp, ad_deps);
! 1485: }
! 1486: LIST_REMOVE(newblk, nb_hash);
! 1487: pool_put(&newblk_pool, newblk);
! 1488:
! 1489: if (bp == NULL) {
! 1490: /*
! 1491: * XXXUBC - Yes, I know how to fix this, but not right now.
! 1492: */
! 1493: panic("softdep_setup_allocdirect: Bonk art in the head");
! 1494: }
! 1495: WORKLIST_INSERT(&bp->b_dep, &adp->ad_list);
! 1496: if (lbn >= NDADDR) {
! 1497: /* allocating an indirect block */
! 1498: if (oldblkno != 0) {
! 1499: FREE_LOCK(&lk);
! 1500: panic("softdep_setup_allocdirect: non-zero indir");
! 1501: }
! 1502: } else {
! 1503: /*
! 1504: * Allocating a direct block.
! 1505: *
! 1506: * If we are allocating a directory block, then we must
! 1507: * allocate an associated pagedep to track additions and
! 1508: * deletions.
! 1509: */
! 1510: if ((DIP(ip, mode) & IFMT) == IFDIR &&
! 1511: pagedep_lookup(ip, lbn, DEPALLOC, &pagedep) == 0)
! 1512: WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list);
! 1513: }
! 1514: /*
! 1515: * The list of allocdirects must be kept in sorted and ascending
! 1516: * order so that the rollback routines can quickly determine the
! 1517: * first uncommitted block (the size of the file stored on disk
! 1518: * ends at the end of the lowest committed fragment, or if there
! 1519: * are no fragments, at the end of the highest committed block).
! 1520: * Since files generally grow, the typical case is that the new
! 1521: * block is to be added at the end of the list. We speed this
! 1522: * special case by checking against the last allocdirect in the
! 1523: * list before laboriously traversing the list looking for the
! 1524: * insertion point.
! 1525: */
! 1526: adphead = &inodedep->id_newinoupdt;
! 1527: oldadp = TAILQ_LAST(adphead, allocdirectlst);
! 1528: if (oldadp == NULL || oldadp->ad_lbn <= lbn) {
! 1529: /* insert at end of list */
! 1530: TAILQ_INSERT_TAIL(adphead, adp, ad_next);
! 1531: if (oldadp != NULL && oldadp->ad_lbn == lbn)
! 1532: allocdirect_merge(adphead, adp, oldadp);
! 1533: FREE_LOCK(&lk);
! 1534: return;
! 1535: }
! 1536: TAILQ_FOREACH(oldadp, adphead, ad_next) {
! 1537: if (oldadp->ad_lbn >= lbn)
! 1538: break;
! 1539: }
! 1540: if (oldadp == NULL) {
! 1541: FREE_LOCK(&lk);
! 1542: panic("softdep_setup_allocdirect: lost entry");
! 1543: }
! 1544: /* insert in middle of list */
! 1545: TAILQ_INSERT_BEFORE(oldadp, adp, ad_next);
! 1546: if (oldadp->ad_lbn == lbn)
! 1547: allocdirect_merge(adphead, adp, oldadp);
! 1548: FREE_LOCK(&lk);
! 1549: }
! 1550:
! 1551: /*
! 1552: * Replace an old allocdirect dependency with a newer one.
! 1553: * This routine must be called with splbio interrupts blocked.
! 1554: */
! 1555: STATIC void
! 1556: allocdirect_merge(adphead, newadp, oldadp)
! 1557: struct allocdirectlst *adphead; /* head of list holding allocdirects */
! 1558: struct allocdirect *newadp; /* allocdirect being added */
! 1559: struct allocdirect *oldadp; /* existing allocdirect being checked */
! 1560: {
! 1561: struct worklist *wk;
! 1562: struct freefrag *freefrag;
! 1563: struct newdirblk *newdirblk;
! 1564:
! 1565: splassert(IPL_BIO);
! 1566:
! 1567: #ifdef DEBUG
! 1568: if (lk.lkt_held == -1)
! 1569: panic("allocdirect_merge: lock not held");
! 1570: #endif
! 1571: if (newadp->ad_oldblkno != oldadp->ad_newblkno ||
! 1572: newadp->ad_oldsize != oldadp->ad_newsize ||
! 1573: newadp->ad_lbn >= NDADDR) {
! 1574: FREE_LOCK(&lk);
! 1575: panic("allocdirect_merge: old %d != new %d || lbn %ld >= %d",
! 1576: newadp->ad_oldblkno, oldadp->ad_newblkno, newadp->ad_lbn,
! 1577: NDADDR);
! 1578: }
! 1579: newadp->ad_oldblkno = oldadp->ad_oldblkno;
! 1580: newadp->ad_oldsize = oldadp->ad_oldsize;
! 1581: /*
! 1582: * If the old dependency had a fragment to free or had never
! 1583: * previously had a block allocated, then the new dependency
! 1584: * can immediately post its freefrag and adopt the old freefrag.
! 1585: * This action is done by swapping the freefrag dependencies.
! 1586: * The new dependency gains the old one's freefrag, and the
! 1587: * old one gets the new one and then immediately puts it on
! 1588: * the worklist when it is freed by free_allocdirect. It is
! 1589: * not possible to do this swap when the old dependency had a
! 1590: * non-zero size but no previous fragment to free. This condition
! 1591: * arises when the new block is an extension of the old block.
! 1592: * Here, the first part of the fragment allocated to the new
! 1593: * dependency is part of the block currently claimed on disk by
! 1594: * the old dependency, so cannot legitimately be freed until the
! 1595: * conditions for the new dependency are fulfilled.
! 1596: */
! 1597: if (oldadp->ad_freefrag != NULL || oldadp->ad_oldblkno == 0) {
! 1598: freefrag = newadp->ad_freefrag;
! 1599: newadp->ad_freefrag = oldadp->ad_freefrag;
! 1600: oldadp->ad_freefrag = freefrag;
! 1601: }
! 1602: /*
! 1603: * If we are tracking a new directory-block allocation,
! 1604: * move it from the old allocdirect to the new allocdirect.
! 1605: */
! 1606: if ((wk = LIST_FIRST(&oldadp->ad_newdirblk)) != NULL) {
! 1607: newdirblk = WK_NEWDIRBLK(wk);
! 1608: WORKLIST_REMOVE(&newdirblk->db_list);
! 1609: if (LIST_FIRST(&oldadp->ad_newdirblk) != NULL)
! 1610: panic("allocdirect_merge: extra newdirblk");
! 1611: WORKLIST_INSERT(&newadp->ad_newdirblk, &newdirblk->db_list);
! 1612: }
! 1613: free_allocdirect(adphead, oldadp, 0);
! 1614: }
! 1615:
! 1616: /*
! 1617: * Allocate a new freefrag structure if needed.
! 1618: */
! 1619: STATIC struct freefrag *
! 1620: newfreefrag(ip, blkno, size)
! 1621: struct inode *ip;
! 1622: daddr_t blkno;
! 1623: long size;
! 1624: {
! 1625: struct freefrag *freefrag;
! 1626: struct fs *fs;
! 1627:
! 1628: if (blkno == 0)
! 1629: return (NULL);
! 1630: fs = ip->i_fs;
! 1631: if (fragnum(fs, blkno) + numfrags(fs, size) > fs->fs_frag)
! 1632: panic("newfreefrag: frag size");
! 1633: freefrag = pool_get(&freefrag_pool, PR_WAITOK);
! 1634: freefrag->ff_list.wk_type = D_FREEFRAG;
! 1635: freefrag->ff_state = DIP(ip, uid) & ~ONWORKLIST; /* used below */
! 1636: freefrag->ff_inum = ip->i_number;
! 1637: freefrag->ff_mnt = ITOV(ip)->v_mount;
! 1638: freefrag->ff_devvp = ip->i_devvp;
! 1639: freefrag->ff_blkno = blkno;
! 1640: freefrag->ff_fragsize = size;
! 1641: return (freefrag);
! 1642: }
! 1643:
! 1644: /*
! 1645: * This workitem de-allocates fragments that were replaced during
! 1646: * file block allocation.
! 1647: */
! 1648: STATIC void
! 1649: handle_workitem_freefrag(freefrag)
! 1650: struct freefrag *freefrag;
! 1651: {
! 1652: struct inode tip;
! 1653: struct ufs1_dinode dtip1;
! 1654:
! 1655: tip.i_vnode = NULL;
! 1656: tip.i_din1 = &dtip1;
! 1657: tip.i_fs = VFSTOUFS(freefrag->ff_mnt)->um_fs;
! 1658: tip.i_ump = VFSTOUFS(freefrag->ff_mnt);
! 1659: tip.i_dev = freefrag->ff_devvp->v_rdev;
! 1660: tip.i_number = freefrag->ff_inum;
! 1661: tip.i_ffs1_uid = freefrag->ff_state & ~ONWORKLIST; /* set above */
! 1662: ffs_blkfree(&tip, freefrag->ff_blkno, freefrag->ff_fragsize);
! 1663: pool_put(&freefrag_pool, freefrag);
! 1664: }
! 1665:
! 1666: /*
! 1667: * Indirect block allocation dependencies.
! 1668: *
! 1669: * The same dependencies that exist for a direct block also exist when
! 1670: * a new block is allocated and pointed to by an entry in a block of
! 1671: * indirect pointers. The undo/redo states described above are also
! 1672: * used here. Because an indirect block contains many pointers that
! 1673: * may have dependencies, a second copy of the entire in-memory indirect
! 1674: * block is kept. The buffer cache copy is always completely up-to-date.
! 1675: * The second copy, which is used only as a source for disk writes,
! 1676: * contains only the safe pointers (i.e., those that have no remaining
! 1677: * update dependencies). The second copy is freed when all pointers
! 1678: * are safe. The cache is not allowed to replace indirect blocks with
! 1679: * pending update dependencies. If a buffer containing an indirect
! 1680: * block with dependencies is written, these routines will mark it
! 1681: * dirty again. It can only be successfully written once all the
! 1682: * dependencies are removed. The ffs_fsync routine in conjunction with
! 1683: * softdep_sync_metadata work together to get all the dependencies
! 1684: * removed so that a file can be successfully written to disk. Three
! 1685: * procedures are used when setting up indirect block pointer
! 1686: * dependencies. The division is necessary because of the organization
! 1687: * of the "balloc" routine and because of the distinction between file
! 1688: * pages and file metadata blocks.
! 1689: */
! 1690:
! 1691: /*
! 1692: * Allocate a new allocindir structure.
! 1693: */
! 1694: STATIC struct allocindir *
! 1695: newallocindir(ip, ptrno, newblkno, oldblkno)
! 1696: struct inode *ip; /* inode for file being extended */
! 1697: int ptrno; /* offset of pointer in indirect block */
! 1698: daddr_t newblkno; /* disk block number being added */
! 1699: daddr_t oldblkno; /* previous block number, 0 if none */
! 1700: {
! 1701: struct allocindir *aip;
! 1702:
! 1703: aip = pool_get(&allocindir_pool, PR_WAITOK);
! 1704: bzero(aip,sizeof(struct allocindir));
! 1705: aip->ai_list.wk_type = D_ALLOCINDIR;
! 1706: aip->ai_state = ATTACHED;
! 1707: aip->ai_offset = ptrno;
! 1708: aip->ai_newblkno = newblkno;
! 1709: aip->ai_oldblkno = oldblkno;
! 1710: aip->ai_freefrag = newfreefrag(ip, oldblkno, ip->i_fs->fs_bsize);
! 1711: return (aip);
! 1712: }
! 1713:
! 1714: /*
! 1715: * Called just before setting an indirect block pointer
! 1716: * to a newly allocated file page.
! 1717: */
! 1718: void
! 1719: softdep_setup_allocindir_page(ip, lbn, bp, ptrno, newblkno, oldblkno, nbp)
! 1720: struct inode *ip; /* inode for file being extended */
! 1721: daddr64_t lbn; /* allocated block number within file */
! 1722: struct buf *bp; /* buffer with indirect blk referencing page */
! 1723: int ptrno; /* offset of pointer in indirect block */
! 1724: daddr_t newblkno; /* disk block number being added */
! 1725: daddr_t oldblkno; /* previous block number, 0 if none */
! 1726: struct buf *nbp; /* buffer holding allocated page */
! 1727: {
! 1728: struct allocindir *aip;
! 1729: struct pagedep *pagedep;
! 1730:
! 1731: aip = newallocindir(ip, ptrno, newblkno, oldblkno);
! 1732: ACQUIRE_LOCK(&lk);
! 1733: /*
! 1734: * If we are allocating a directory page, then we must
! 1735: * allocate an associated pagedep to track additions and
! 1736: * deletions.
! 1737: */
! 1738: if ((DIP(ip, mode) & IFMT) == IFDIR &&
! 1739: pagedep_lookup(ip, lbn, DEPALLOC, &pagedep) == 0)
! 1740: WORKLIST_INSERT(&nbp->b_dep, &pagedep->pd_list);
! 1741: if (nbp == NULL) {
! 1742: /*
! 1743: * XXXUBC - Yes, I know how to fix this, but not right now.
! 1744: */
! 1745: panic("softdep_setup_allocindir_page: Bonk art in the head");
! 1746: }
! 1747: WORKLIST_INSERT(&nbp->b_dep, &aip->ai_list);
! 1748: FREE_LOCK(&lk);
! 1749: setup_allocindir_phase2(bp, ip, aip);
! 1750: }
! 1751:
! 1752: /*
! 1753: * Called just before setting an indirect block pointer to a
! 1754: * newly allocated indirect block.
! 1755: */
! 1756: void
! 1757: softdep_setup_allocindir_meta(nbp, ip, bp, ptrno, newblkno)
! 1758: struct buf *nbp; /* newly allocated indirect block */
! 1759: struct inode *ip; /* inode for file being extended */
! 1760: struct buf *bp; /* indirect block referencing allocated block */
! 1761: int ptrno; /* offset of pointer in indirect block */
! 1762: daddr_t newblkno; /* disk block number being added */
! 1763: {
! 1764: struct allocindir *aip;
! 1765:
! 1766: aip = newallocindir(ip, ptrno, newblkno, 0);
! 1767: ACQUIRE_LOCK(&lk);
! 1768: WORKLIST_INSERT(&nbp->b_dep, &aip->ai_list);
! 1769: FREE_LOCK(&lk);
! 1770: setup_allocindir_phase2(bp, ip, aip);
! 1771: }
! 1772:
! 1773: /*
! 1774: * Called to finish the allocation of the "aip" allocated
! 1775: * by one of the two routines above.
! 1776: */
! 1777: STATIC void
! 1778: setup_allocindir_phase2(bp, ip, aip)
! 1779: struct buf *bp; /* in-memory copy of the indirect block */
! 1780: struct inode *ip; /* inode for file being extended */
! 1781: struct allocindir *aip; /* allocindir allocated by the above routines */
! 1782: {
! 1783: struct worklist *wk;
! 1784: struct indirdep *indirdep, *newindirdep;
! 1785: struct bmsafemap *bmsafemap;
! 1786: struct allocindir *oldaip;
! 1787: struct freefrag *freefrag;
! 1788: struct newblk *newblk;
! 1789:
! 1790: if (bp->b_lblkno >= 0)
! 1791: panic("setup_allocindir_phase2: not indir blk");
! 1792: for (indirdep = NULL, newindirdep = NULL; ; ) {
! 1793: ACQUIRE_LOCK(&lk);
! 1794: LIST_FOREACH(wk, &bp->b_dep, wk_list) {
! 1795: if (wk->wk_type != D_INDIRDEP)
! 1796: continue;
! 1797: indirdep = WK_INDIRDEP(wk);
! 1798: break;
! 1799: }
! 1800: if (indirdep == NULL && newindirdep) {
! 1801: indirdep = newindirdep;
! 1802: WORKLIST_INSERT(&bp->b_dep, &indirdep->ir_list);
! 1803: newindirdep = NULL;
! 1804: }
! 1805: FREE_LOCK(&lk);
! 1806: if (indirdep) {
! 1807: if (newblk_lookup(ip->i_fs, aip->ai_newblkno, 0,
! 1808: &newblk) == 0)
! 1809: panic("setup_allocindir: lost block");
! 1810: ACQUIRE_LOCK(&lk);
! 1811: if (newblk->nb_state == DEPCOMPLETE) {
! 1812: aip->ai_state |= DEPCOMPLETE;
! 1813: aip->ai_buf = NULL;
! 1814: } else {
! 1815: bmsafemap = newblk->nb_bmsafemap;
! 1816: aip->ai_buf = bmsafemap->sm_buf;
! 1817: LIST_REMOVE(newblk, nb_deps);
! 1818: LIST_INSERT_HEAD(&bmsafemap->sm_allocindirhd,
! 1819: aip, ai_deps);
! 1820: }
! 1821: LIST_REMOVE(newblk, nb_hash);
! 1822: pool_put(&newblk_pool, newblk);
! 1823: aip->ai_indirdep = indirdep;
! 1824: /*
! 1825: * Check to see if there is an existing dependency
! 1826: * for this block. If there is, merge the old
! 1827: * dependency into the new one.
! 1828: */
! 1829: if (aip->ai_oldblkno == 0)
! 1830: oldaip = NULL;
! 1831: else
! 1832:
! 1833: LIST_FOREACH(oldaip, &indirdep->ir_deplisthd, ai_next)
! 1834: if (oldaip->ai_offset == aip->ai_offset)
! 1835: break;
! 1836: freefrag = NULL;
! 1837: if (oldaip != NULL) {
! 1838: if (oldaip->ai_newblkno != aip->ai_oldblkno) {
! 1839: FREE_LOCK(&lk);
! 1840: panic("setup_allocindir_phase2: blkno");
! 1841: }
! 1842: aip->ai_oldblkno = oldaip->ai_oldblkno;
! 1843: freefrag = aip->ai_freefrag;
! 1844: aip->ai_freefrag = oldaip->ai_freefrag;
! 1845: oldaip->ai_freefrag = NULL;
! 1846: free_allocindir(oldaip, NULL);
! 1847: }
! 1848: LIST_INSERT_HEAD(&indirdep->ir_deplisthd, aip, ai_next);
! 1849: if (ip->i_ump->um_fstype == UM_UFS1)
! 1850: ((int32_t *)indirdep->ir_savebp->b_data)
! 1851: [aip->ai_offset] = aip->ai_oldblkno;
! 1852: else
! 1853: ((int64_t *)indirdep->ir_savebp->b_data)
! 1854: [aip->ai_offset] = aip->ai_oldblkno;
! 1855: FREE_LOCK(&lk);
! 1856: if (freefrag != NULL)
! 1857: handle_workitem_freefrag(freefrag);
! 1858: }
! 1859: if (newindirdep) {
! 1860: if (indirdep->ir_savebp != NULL)
! 1861: brelse(newindirdep->ir_savebp);
! 1862: WORKITEM_FREE(newindirdep, D_INDIRDEP);
! 1863: }
! 1864: if (indirdep)
! 1865: break;
! 1866: newindirdep = pool_get(&indirdep_pool, PR_WAITOK);
! 1867: newindirdep->ir_list.wk_type = D_INDIRDEP;
! 1868: newindirdep->ir_state = ATTACHED;
! 1869: if (ip->i_ump->um_fstype == UM_UFS1)
! 1870: newindirdep->ir_state |= UFS1FMT;
! 1871: LIST_INIT(&newindirdep->ir_deplisthd);
! 1872: LIST_INIT(&newindirdep->ir_donehd);
! 1873: if (bp->b_blkno == bp->b_lblkno) {
! 1874: VOP_BMAP(bp->b_vp, bp->b_lblkno, NULL, &bp->b_blkno,
! 1875: NULL);
! 1876: }
! 1877: newindirdep->ir_savebp =
! 1878: getblk(ip->i_devvp, bp->b_blkno, bp->b_bcount, 0, 0);
! 1879: #if 0
! 1880: BUF_KERNPROC(newindirdep->ir_savebp);
! 1881: #endif
! 1882: bcopy(bp->b_data, newindirdep->ir_savebp->b_data, bp->b_bcount);
! 1883: }
! 1884: }
! 1885:
! 1886: /*
! 1887: * Block de-allocation dependencies.
! 1888: *
! 1889: * When blocks are de-allocated, the on-disk pointers must be nullified before
! 1890: * the blocks are made available for use by other files. (The true
! 1891: * requirement is that old pointers must be nullified before new on-disk
! 1892: * pointers are set. We chose this slightly more stringent requirement to
! 1893: * reduce complexity.) Our implementation handles this dependency by updating
! 1894: * the inode (or indirect block) appropriately but delaying the actual block
! 1895: * de-allocation (i.e., freemap and free space count manipulation) until
! 1896: * after the updated versions reach stable storage. After the disk is
! 1897: * updated, the blocks can be safely de-allocated whenever it is convenient.
! 1898: * This implementation handles only the common case of reducing a file's
! 1899: * length to zero. Other cases are handled by the conventional synchronous
! 1900: * write approach.
! 1901: *
! 1902: * The ffs implementation with which we worked double-checks
! 1903: * the state of the block pointers and file size as it reduces
! 1904: * a file's length. Some of this code is replicated here in our
! 1905: * soft updates implementation. The freeblks->fb_chkcnt field is
! 1906: * used to transfer a part of this information to the procedure
! 1907: * that eventually de-allocates the blocks.
! 1908: *
! 1909: * This routine should be called from the routine that shortens
! 1910: * a file's length, before the inode's size or block pointers
! 1911: * are modified. It will save the block pointer information for
! 1912: * later release and zero the inode so that the calling routine
! 1913: * can release it.
! 1914: */
! 1915: void
! 1916: softdep_setup_freeblocks(ip, length)
! 1917: struct inode *ip; /* The inode whose length is to be reduced */
! 1918: off_t length; /* The new length for the file */
! 1919: {
! 1920: struct freeblks *freeblks;
! 1921: struct inodedep *inodedep;
! 1922: struct allocdirect *adp;
! 1923: struct vnode *vp;
! 1924: struct buf *bp;
! 1925: struct fs *fs;
! 1926: int i, delay, error;
! 1927:
! 1928: fs = ip->i_fs;
! 1929: if (length != 0)
! 1930: panic("softdep_setup_freeblocks: non-zero length");
! 1931: freeblks = pool_get(&freeblks_pool, PR_WAITOK);
! 1932: bzero(freeblks, sizeof(struct freeblks));
! 1933: freeblks->fb_list.wk_type = D_FREEBLKS;
! 1934: freeblks->fb_state = ATTACHED;
! 1935: freeblks->fb_uid = DIP(ip, uid);
! 1936: freeblks->fb_previousinum = ip->i_number;
! 1937: freeblks->fb_devvp = ip->i_devvp;
! 1938: freeblks->fb_mnt = ITOV(ip)->v_mount;
! 1939: freeblks->fb_oldsize = DIP(ip, size);
! 1940: freeblks->fb_newsize = length;
! 1941: freeblks->fb_chkcnt = DIP(ip, blocks);
! 1942:
! 1943: for (i = 0; i < NDADDR; i++) {
! 1944: freeblks->fb_dblks[i] = DIP(ip, db[i]);
! 1945: DIP_ASSIGN(ip, db[i], 0);
! 1946: }
! 1947:
! 1948: for (i = 0; i < NIADDR; i++) {
! 1949: freeblks->fb_iblks[i] = DIP(ip, ib[i]);
! 1950: DIP_ASSIGN(ip, ib[i], 0);
! 1951: }
! 1952:
! 1953: DIP_ASSIGN(ip, blocks, 0);
! 1954: DIP_ASSIGN(ip, size, 0);
! 1955:
! 1956: /*
! 1957: * Push the zero'ed inode to to its disk buffer so that we are free
! 1958: * to delete its dependencies below. Once the dependencies are gone
! 1959: * the buffer can be safely released.
! 1960: */
! 1961: if ((error = bread(ip->i_devvp,
! 1962: fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
! 1963: (int)fs->fs_bsize, NOCRED, &bp)) != 0)
! 1964: softdep_error("softdep_setup_freeblocks", error);
! 1965:
! 1966: if (ip->i_ump->um_fstype == UM_UFS1)
! 1967: *((struct ufs1_dinode *) bp->b_data +
! 1968: ino_to_fsbo(fs, ip->i_number)) = *ip->i_din1;
! 1969: else
! 1970: *((struct ufs2_dinode *) bp->b_data +
! 1971: ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2;
! 1972:
! 1973: /*
! 1974: * Find and eliminate any inode dependencies.
! 1975: */
! 1976: ACQUIRE_LOCK(&lk);
! 1977: (void) inodedep_lookup(fs, ip->i_number, DEPALLOC, &inodedep);
! 1978: if ((inodedep->id_state & IOSTARTED) != 0) {
! 1979: FREE_LOCK(&lk);
! 1980: panic("softdep_setup_freeblocks: inode busy");
! 1981: }
! 1982: /*
! 1983: * Add the freeblks structure to the list of operations that
! 1984: * must await the zero'ed inode being written to disk. If we
! 1985: * still have a bitmap dependency (delay == 0), then the inode
! 1986: * has never been written to disk, so we can process the
! 1987: * freeblks below once we have deleted the dependencies.
! 1988: */
! 1989: delay = (inodedep->id_state & DEPCOMPLETE);
! 1990: if (delay)
! 1991: WORKLIST_INSERT(&inodedep->id_bufwait, &freeblks->fb_list);
! 1992: /*
! 1993: * Because the file length has been truncated to zero, any
! 1994: * pending block allocation dependency structures associated
! 1995: * with this inode are obsolete and can simply be de-allocated.
! 1996: * We must first merge the two dependency lists to get rid of
! 1997: * any duplicate freefrag structures, then purge the merged list.
! 1998: * If we still have a bitmap dependency, then the inode has never
! 1999: * been written to disk, so we can free any fragments without delay.
! 2000: */
! 2001: merge_inode_lists(inodedep);
! 2002: while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != 0)
! 2003: free_allocdirect(&inodedep->id_inoupdt, adp, delay);
! 2004: FREE_LOCK(&lk);
! 2005: bdwrite(bp);
! 2006: /*
! 2007: * We must wait for any I/O in progress to finish so that
! 2008: * all potential buffers on the dirty list will be visible.
! 2009: * Once they are all there, walk the list and get rid of
! 2010: * any dependencies.
! 2011: */
! 2012: vp = ITOV(ip);
! 2013: ACQUIRE_LOCK(&lk);
! 2014: drain_output(vp, 1);
! 2015: while ((bp = LIST_FIRST(&vp->v_dirtyblkhd))) {
! 2016: if (!getdirtybuf(bp, MNT_WAIT))
! 2017: break;
! 2018: (void) inodedep_lookup(fs, ip->i_number, 0, &inodedep);
! 2019: deallocate_dependencies(bp, inodedep);
! 2020: bp->b_flags |= B_INVAL | B_NOCACHE;
! 2021: FREE_LOCK(&lk);
! 2022: brelse(bp);
! 2023: ACQUIRE_LOCK(&lk);
! 2024: }
! 2025: if (inodedep_lookup(fs, ip->i_number, 0, &inodedep) != 0)
! 2026: (void) free_inodedep(inodedep);
! 2027:
! 2028: if (delay) {
! 2029: freeblks->fb_state |= DEPCOMPLETE;
! 2030: /*
! 2031: * If the inode with zeroed block pointers is now on disk we
! 2032: * can start freeing blocks. Add freeblks to the worklist
! 2033: * instead of calling handle_workitem_freeblocks() directly as
! 2034: * it is more likely that additional IO is needed to complete
! 2035: * the request than in the !delay case.
! 2036: */
! 2037: if ((freeblks->fb_state & ALLCOMPLETE) == ALLCOMPLETE)
! 2038: add_to_worklist(&freeblks->fb_list);
! 2039: }
! 2040:
! 2041: FREE_LOCK(&lk);
! 2042: /*
! 2043: * If the inode has never been written to disk (delay == 0),
! 2044: * then we can process the freeblks now that we have deleted
! 2045: * the dependencies.
! 2046: */
! 2047: if (!delay)
! 2048: handle_workitem_freeblocks(freeblks);
! 2049: }
! 2050:
! 2051: /*
! 2052: * Reclaim any dependency structures from a buffer that is about to
! 2053: * be reallocated to a new vnode. The buffer must be locked, thus,
! 2054: * no I/O completion operations can occur while we are manipulating
! 2055: * its associated dependencies. The mutex is held so that other I/O's
! 2056: * associated with related dependencies do not occur.
! 2057: */
! 2058: STATIC void
! 2059: deallocate_dependencies(bp, inodedep)
! 2060: struct buf *bp;
! 2061: struct inodedep *inodedep;
! 2062: {
! 2063: struct worklist *wk;
! 2064: struct indirdep *indirdep;
! 2065: struct allocindir *aip;
! 2066: struct pagedep *pagedep;
! 2067: struct dirrem *dirrem;
! 2068: struct diradd *dap;
! 2069: int i;
! 2070:
! 2071: while ((wk = LIST_FIRST(&bp->b_dep)) != NULL) {
! 2072: switch (wk->wk_type) {
! 2073:
! 2074: case D_INDIRDEP:
! 2075: indirdep = WK_INDIRDEP(wk);
! 2076: /*
! 2077: * None of the indirect pointers will ever be visible,
! 2078: * so they can simply be tossed. GOINGAWAY ensures
! 2079: * that allocated pointers will be saved in the buffer
! 2080: * cache until they are freed. Note that they will
! 2081: * only be able to be found by their physical address
! 2082: * since the inode mapping the logical address will
! 2083: * be gone. The save buffer used for the safe copy
! 2084: * was allocated in setup_allocindir_phase2 using
! 2085: * the physical address so it could be used for this
! 2086: * purpose. Hence we swap the safe copy with the real
! 2087: * copy, allowing the safe copy to be freed and holding
! 2088: * on to the real copy for later use in indir_trunc.
! 2089: */
! 2090: if (indirdep->ir_state & GOINGAWAY) {
! 2091: FREE_LOCK(&lk);
! 2092: panic("deallocate_dependencies: already gone");
! 2093: }
! 2094: indirdep->ir_state |= GOINGAWAY;
! 2095: while ((aip = LIST_FIRST(&indirdep->ir_deplisthd)) != 0)
! 2096: free_allocindir(aip, inodedep);
! 2097: if (bp->b_lblkno >= 0 ||
! 2098: bp->b_blkno != indirdep->ir_savebp->b_lblkno) {
! 2099: FREE_LOCK(&lk);
! 2100: panic("deallocate_dependencies: not indir");
! 2101: }
! 2102: bcopy(bp->b_data, indirdep->ir_savebp->b_data,
! 2103: bp->b_bcount);
! 2104: WORKLIST_REMOVE(wk);
! 2105: WORKLIST_INSERT(&indirdep->ir_savebp->b_dep, wk);
! 2106: continue;
! 2107:
! 2108: case D_PAGEDEP:
! 2109: pagedep = WK_PAGEDEP(wk);
! 2110: /*
! 2111: * None of the directory additions will ever be
! 2112: * visible, so they can simply be tossed.
! 2113: */
! 2114: for (i = 0; i < DAHASHSZ; i++)
! 2115: while ((dap =
! 2116: LIST_FIRST(&pagedep->pd_diraddhd[i])))
! 2117: free_diradd(dap);
! 2118: while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != 0)
! 2119: free_diradd(dap);
! 2120: /*
! 2121: * Copy any directory remove dependencies to the list
! 2122: * to be processed after the zero'ed inode is written.
! 2123: * If the inode has already been written, then they
! 2124: * can be dumped directly onto the work list.
! 2125: */
! 2126: while ((dirrem = LIST_FIRST(&pagedep->pd_dirremhd))) {
! 2127: LIST_REMOVE(dirrem, dm_next);
! 2128: dirrem->dm_dirinum = pagedep->pd_ino;
! 2129: if (inodedep == NULL ||
! 2130: (inodedep->id_state & ALLCOMPLETE) ==
! 2131: ALLCOMPLETE)
! 2132: add_to_worklist(&dirrem->dm_list);
! 2133: else
! 2134: WORKLIST_INSERT(&inodedep->id_bufwait,
! 2135: &dirrem->dm_list);
! 2136: }
! 2137: if ((pagedep->pd_state & NEWBLOCK) != 0) {
! 2138: LIST_FOREACH(wk, &inodedep->id_bufwait, wk_list)
! 2139: if (wk->wk_type == D_NEWDIRBLK &&
! 2140: WK_NEWDIRBLK(wk)->db_pagedep ==
! 2141: pagedep)
! 2142: break;
! 2143: if (wk != NULL) {
! 2144: WORKLIST_REMOVE(wk);
! 2145: free_newdirblk(WK_NEWDIRBLK(wk));
! 2146: } else {
! 2147: FREE_LOCK(&lk);
! 2148: panic("deallocate_dependencies: "
! 2149: "lost pagedep");
! 2150: }
! 2151: }
! 2152: WORKLIST_REMOVE(&pagedep->pd_list);
! 2153: LIST_REMOVE(pagedep, pd_hash);
! 2154: WORKITEM_FREE(pagedep, D_PAGEDEP);
! 2155: continue;
! 2156:
! 2157: case D_ALLOCINDIR:
! 2158: free_allocindir(WK_ALLOCINDIR(wk), inodedep);
! 2159: continue;
! 2160:
! 2161: case D_ALLOCDIRECT:
! 2162: case D_INODEDEP:
! 2163: FREE_LOCK(&lk);
! 2164: panic("deallocate_dependencies: Unexpected type %s",
! 2165: TYPENAME(wk->wk_type));
! 2166: /* NOTREACHED */
! 2167:
! 2168: default:
! 2169: FREE_LOCK(&lk);
! 2170: panic("deallocate_dependencies: Unknown type %s",
! 2171: TYPENAME(wk->wk_type));
! 2172: /* NOTREACHED */
! 2173: }
! 2174: }
! 2175: }
! 2176:
! 2177: /*
! 2178: * Free an allocdirect. Generate a new freefrag work request if appropriate.
! 2179: * This routine must be called with splbio interrupts blocked.
! 2180: */
! 2181: STATIC void
! 2182: free_allocdirect(adphead, adp, delay)
! 2183: struct allocdirectlst *adphead;
! 2184: struct allocdirect *adp;
! 2185: int delay;
! 2186: {
! 2187: struct newdirblk *newdirblk;
! 2188: struct worklist *wk;
! 2189:
! 2190: splassert(IPL_BIO);
! 2191:
! 2192: #ifdef DEBUG
! 2193: if (lk.lkt_held == -1)
! 2194: panic("free_allocdirect: lock not held");
! 2195: #endif
! 2196: if ((adp->ad_state & DEPCOMPLETE) == 0)
! 2197: LIST_REMOVE(adp, ad_deps);
! 2198: TAILQ_REMOVE(adphead, adp, ad_next);
! 2199: if ((adp->ad_state & COMPLETE) == 0)
! 2200: WORKLIST_REMOVE(&adp->ad_list);
! 2201: if (adp->ad_freefrag != NULL) {
! 2202: if (delay)
! 2203: WORKLIST_INSERT(&adp->ad_inodedep->id_bufwait,
! 2204: &adp->ad_freefrag->ff_list);
! 2205: else
! 2206: add_to_worklist(&adp->ad_freefrag->ff_list);
! 2207: }
! 2208: if ((wk = LIST_FIRST(&adp->ad_newdirblk)) != NULL) {
! 2209: newdirblk = WK_NEWDIRBLK(wk);
! 2210: WORKLIST_REMOVE(&newdirblk->db_list);
! 2211: if (LIST_FIRST(&adp->ad_newdirblk) != NULL)
! 2212: panic("free_allocdirect: extra newdirblk");
! 2213: if (delay)
! 2214: WORKLIST_INSERT(&adp->ad_inodedep->id_bufwait,
! 2215: &newdirblk->db_list);
! 2216: else
! 2217: free_newdirblk(newdirblk);
! 2218: }
! 2219: WORKITEM_FREE(adp, D_ALLOCDIRECT);
! 2220: }
! 2221:
! 2222: /*
! 2223: * Free a newdirblk. Clear the NEWBLOCK flag on its associated pagedep.
! 2224: * This routine must be called with splbio interrupts blocked.
! 2225: */
! 2226: void
! 2227: free_newdirblk(newdirblk)
! 2228: struct newdirblk *newdirblk;
! 2229: {
! 2230: struct pagedep *pagedep;
! 2231: struct diradd *dap;
! 2232: int i;
! 2233:
! 2234: splassert(IPL_BIO);
! 2235:
! 2236: #ifdef DEBUG
! 2237: if (lk.lkt_held == -1)
! 2238: panic("free_newdirblk: lock not held");
! 2239: #endif
! 2240: /*
! 2241: * If the pagedep is still linked onto the directory buffer
! 2242: * dependency chain, then some of the entries on the
! 2243: * pd_pendinghd list may not be committed to disk yet. In
! 2244: * this case, we will simply clear the NEWBLOCK flag and
! 2245: * let the pd_pendinghd list be processed when the pagedep
! 2246: * is next written. If the pagedep is no longer on the buffer
! 2247: * dependency chain, then all the entries on the pd_pending
! 2248: * list are committed to disk and we can free them here.
! 2249: */
! 2250: pagedep = newdirblk->db_pagedep;
! 2251: pagedep->pd_state &= ~NEWBLOCK;
! 2252: if ((pagedep->pd_state & ONWORKLIST) == 0)
! 2253: while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != NULL)
! 2254: free_diradd(dap);
! 2255: /*
! 2256: * If no dependencies remain, the pagedep will be freed.
! 2257: */
! 2258: for (i = 0; i < DAHASHSZ; i++)
! 2259: if (LIST_FIRST(&pagedep->pd_diraddhd[i]) != NULL)
! 2260: break;
! 2261: if (i == DAHASHSZ && (pagedep->pd_state & ONWORKLIST) == 0) {
! 2262: LIST_REMOVE(pagedep, pd_hash);
! 2263: WORKITEM_FREE(pagedep, D_PAGEDEP);
! 2264: }
! 2265: WORKITEM_FREE(newdirblk, D_NEWDIRBLK);
! 2266: }
! 2267:
! 2268: /*
! 2269: * Prepare an inode to be freed. The actual free operation is not
! 2270: * done until the zero'ed inode has been written to disk.
! 2271: */
! 2272: void
! 2273: softdep_freefile(pvp, ino, mode)
! 2274: struct vnode *pvp;
! 2275: ino_t ino;
! 2276: mode_t mode;
! 2277: {
! 2278: struct inode *ip = VTOI(pvp);
! 2279: struct inodedep *inodedep;
! 2280: struct freefile *freefile;
! 2281:
! 2282: /*
! 2283: * This sets up the inode de-allocation dependency.
! 2284: */
! 2285: freefile = pool_get(&freefile_pool, PR_WAITOK);
! 2286: freefile->fx_list.wk_type = D_FREEFILE;
! 2287: freefile->fx_list.wk_state = 0;
! 2288: freefile->fx_mode = mode;
! 2289: freefile->fx_oldinum = ino;
! 2290: freefile->fx_devvp = ip->i_devvp;
! 2291: freefile->fx_mnt = ITOV(ip)->v_mount;
! 2292:
! 2293: /*
! 2294: * If the inodedep does not exist, then the zero'ed inode has
! 2295: * been written to disk. If the allocated inode has never been
! 2296: * written to disk, then the on-disk inode is zero'ed. In either
! 2297: * case we can free the file immediately.
! 2298: */
! 2299: ACQUIRE_LOCK(&lk);
! 2300: if (inodedep_lookup(ip->i_fs, ino, 0, &inodedep) == 0 ||
! 2301: check_inode_unwritten(inodedep)) {
! 2302: FREE_LOCK(&lk);
! 2303: handle_workitem_freefile(freefile);
! 2304: return;
! 2305: }
! 2306: WORKLIST_INSERT(&inodedep->id_inowait, &freefile->fx_list);
! 2307: FREE_LOCK(&lk);
! 2308: }
! 2309:
! 2310: /*
! 2311: * Check to see if an inode has never been written to disk. If
! 2312: * so free the inodedep and return success, otherwise return failure.
! 2313: * This routine must be called with splbio interrupts blocked.
! 2314: *
! 2315: * If we still have a bitmap dependency, then the inode has never
! 2316: * been written to disk. Drop the dependency as it is no longer
! 2317: * necessary since the inode is being deallocated. We set the
! 2318: * ALLCOMPLETE flags since the bitmap now properly shows that the
! 2319: * inode is not allocated. Even if the inode is actively being
! 2320: * written, it has been rolled back to its zero'ed state, so we
! 2321: * are ensured that a zero inode is what is on the disk. For short
! 2322: * lived files, this change will usually result in removing all the
! 2323: * dependencies from the inode so that it can be freed immediately.
! 2324: */
! 2325: STATIC int
! 2326: check_inode_unwritten(inodedep)
! 2327: struct inodedep *inodedep;
! 2328: {
! 2329: splassert(IPL_BIO);
! 2330:
! 2331: if ((inodedep->id_state & DEPCOMPLETE) != 0 ||
! 2332: LIST_FIRST(&inodedep->id_pendinghd) != NULL ||
! 2333: LIST_FIRST(&inodedep->id_bufwait) != NULL ||
! 2334: LIST_FIRST(&inodedep->id_inowait) != NULL ||
! 2335: TAILQ_FIRST(&inodedep->id_inoupdt) != NULL ||
! 2336: TAILQ_FIRST(&inodedep->id_newinoupdt) != NULL ||
! 2337: inodedep->id_nlinkdelta != 0)
! 2338: return (0);
! 2339: inodedep->id_state |= ALLCOMPLETE;
! 2340: LIST_REMOVE(inodedep, id_deps);
! 2341: inodedep->id_buf = NULL;
! 2342: if (inodedep->id_state & ONWORKLIST)
! 2343: WORKLIST_REMOVE(&inodedep->id_list);
! 2344: if (inodedep->id_savedino1 != NULL) {
! 2345: FREE(inodedep->id_savedino1, M_INODEDEP);
! 2346: inodedep->id_savedino1 = NULL;
! 2347: }
! 2348: if (free_inodedep(inodedep) == 0) {
! 2349: FREE_LOCK(&lk);
! 2350: panic("check_inode_unwritten: busy inode");
! 2351: }
! 2352: return (1);
! 2353: }
! 2354:
! 2355: /*
! 2356: * Try to free an inodedep structure. Return 1 if it could be freed.
! 2357: */
! 2358: STATIC int
! 2359: free_inodedep(inodedep)
! 2360: struct inodedep *inodedep;
! 2361: {
! 2362:
! 2363: if ((inodedep->id_state & ONWORKLIST) != 0 ||
! 2364: (inodedep->id_state & ALLCOMPLETE) != ALLCOMPLETE ||
! 2365: LIST_FIRST(&inodedep->id_pendinghd) != NULL ||
! 2366: LIST_FIRST(&inodedep->id_bufwait) != NULL ||
! 2367: LIST_FIRST(&inodedep->id_inowait) != NULL ||
! 2368: TAILQ_FIRST(&inodedep->id_inoupdt) != NULL ||
! 2369: TAILQ_FIRST(&inodedep->id_newinoupdt) != NULL ||
! 2370: inodedep->id_nlinkdelta != 0 || inodedep->id_savedino1 != NULL)
! 2371: return (0);
! 2372: LIST_REMOVE(inodedep, id_hash);
! 2373: WORKITEM_FREE(inodedep, D_INODEDEP);
! 2374: num_inodedep -= 1;
! 2375: return (1);
! 2376: }
! 2377:
! 2378: /*
! 2379: * This workitem routine performs the block de-allocation.
! 2380: * The workitem is added to the pending list after the updated
! 2381: * inode block has been written to disk. As mentioned above,
! 2382: * checks regarding the number of blocks de-allocated (compared
! 2383: * to the number of blocks allocated for the file) are also
! 2384: * performed in this function.
! 2385: */
! 2386: STATIC void
! 2387: handle_workitem_freeblocks(freeblks)
! 2388: struct freeblks *freeblks;
! 2389: {
! 2390: struct inode tip;
! 2391: daddr_t bn;
! 2392: union {
! 2393: struct ufs1_dinode di1;
! 2394: struct ufs2_dinode di2;
! 2395: } di;
! 2396: struct fs *fs;
! 2397: int i, level, bsize;
! 2398: long nblocks, blocksreleased = 0;
! 2399: int error, allerror = 0;
! 2400: daddr64_t baselbns[NIADDR], tmpval;
! 2401:
! 2402: if (VFSTOUFS(freeblks->fb_mnt)->um_fstype == UM_UFS1)
! 2403: tip.i_din1 = &di.di1;
! 2404: else
! 2405: tip.i_din2 = &di.di2;
! 2406:
! 2407: tip.i_fs = fs = VFSTOUFS(freeblks->fb_mnt)->um_fs;
! 2408: tip.i_number = freeblks->fb_previousinum;
! 2409: tip.i_ump = VFSTOUFS(freeblks->fb_mnt);
! 2410: tip.i_dev = freeblks->fb_devvp->v_rdev;
! 2411: DIP_ASSIGN(&tip, size, freeblks->fb_oldsize);
! 2412: DIP_ASSIGN(&tip, uid, freeblks->fb_uid);
! 2413: tip.i_vnode = NULL;
! 2414: tmpval = 1;
! 2415: baselbns[0] = NDADDR;
! 2416: for (i = 1; i < NIADDR; i++) {
! 2417: tmpval *= NINDIR(fs);
! 2418: baselbns[i] = baselbns[i - 1] + tmpval;
! 2419: }
! 2420: nblocks = btodb(fs->fs_bsize);
! 2421: blocksreleased = 0;
! 2422: /*
! 2423: * Indirect blocks first.
! 2424: */
! 2425: for (level = (NIADDR - 1); level >= 0; level--) {
! 2426: if ((bn = freeblks->fb_iblks[level]) == 0)
! 2427: continue;
! 2428: if ((error = indir_trunc(&tip, fsbtodb(fs, bn), level,
! 2429: baselbns[level], &blocksreleased)) != 0)
! 2430: allerror = error;
! 2431: ffs_blkfree(&tip, bn, fs->fs_bsize);
! 2432: blocksreleased += nblocks;
! 2433: }
! 2434: /*
! 2435: * All direct blocks or frags.
! 2436: */
! 2437: for (i = (NDADDR - 1); i >= 0; i--) {
! 2438: if ((bn = freeblks->fb_dblks[i]) == 0)
! 2439: continue;
! 2440: bsize = blksize(fs, &tip, i);
! 2441: ffs_blkfree(&tip, bn, bsize);
! 2442: blocksreleased += btodb(bsize);
! 2443: }
! 2444:
! 2445: #ifdef DIAGNOSTIC
! 2446: if (freeblks->fb_chkcnt != blocksreleased)
! 2447: printf("handle_workitem_freeblocks: block count\n");
! 2448: if (allerror)
! 2449: softdep_error("handle_workitem_freeblks", allerror);
! 2450: #endif /* DIAGNOSTIC */
! 2451: WORKITEM_FREE(freeblks, D_FREEBLKS);
! 2452: }
! 2453:
! 2454: /*
! 2455: * Release blocks associated with the inode ip and stored in the indirect
! 2456: * block dbn. If level is greater than SINGLE, the block is an indirect block
! 2457: * and recursive calls to indirtrunc must be used to cleanse other indirect
! 2458: * blocks.
! 2459: */
! 2460: STATIC int
! 2461: indir_trunc(ip, dbn, level, lbn, countp)
! 2462: struct inode *ip;
! 2463: daddr_t dbn;
! 2464: int level;
! 2465: daddr64_t lbn;
! 2466: long *countp;
! 2467: {
! 2468: struct buf *bp;
! 2469: int32_t *bap1 = NULL;
! 2470: int64_t nb, *bap2 = NULL;
! 2471: struct fs *fs;
! 2472: struct worklist *wk;
! 2473: struct indirdep *indirdep;
! 2474: int i, lbnadd, nblocks, ufs1fmt;
! 2475: int error, allerror = 0;
! 2476:
! 2477: fs = ip->i_fs;
! 2478: lbnadd = 1;
! 2479: for (i = level; i > 0; i--)
! 2480: lbnadd *= NINDIR(fs);
! 2481: /*
! 2482: * Get buffer of block pointers to be freed. This routine is not
! 2483: * called until the zero'ed inode has been written, so it is safe
! 2484: * to free blocks as they are encountered. Because the inode has
! 2485: * been zero'ed, calls to bmap on these blocks will fail. So, we
! 2486: * have to use the on-disk address and the block device for the
! 2487: * filesystem to look them up. If the file was deleted before its
! 2488: * indirect blocks were all written to disk, the routine that set
! 2489: * us up (deallocate_dependencies) will have arranged to leave
! 2490: * a complete copy of the indirect block in memory for our use.
! 2491: * Otherwise we have to read the blocks in from the disk.
! 2492: */
! 2493: ACQUIRE_LOCK(&lk);
! 2494: if ((bp = incore(ip->i_devvp, dbn)) != NULL &&
! 2495: (wk = LIST_FIRST(&bp->b_dep)) != NULL) {
! 2496: if (wk->wk_type != D_INDIRDEP ||
! 2497: (indirdep = WK_INDIRDEP(wk))->ir_savebp != bp ||
! 2498: (indirdep->ir_state & GOINGAWAY) == 0) {
! 2499: FREE_LOCK(&lk);
! 2500: panic("indir_trunc: lost indirdep");
! 2501: }
! 2502: WORKLIST_REMOVE(wk);
! 2503: WORKITEM_FREE(indirdep, D_INDIRDEP);
! 2504: if (LIST_FIRST(&bp->b_dep) != NULL) {
! 2505: FREE_LOCK(&lk);
! 2506: panic("indir_trunc: dangling dep");
! 2507: }
! 2508: FREE_LOCK(&lk);
! 2509: } else {
! 2510: FREE_LOCK(&lk);
! 2511: error = bread(ip->i_devvp, dbn, (int)fs->fs_bsize, NOCRED, &bp);
! 2512: if (error)
! 2513: return (error);
! 2514: }
! 2515: /*
! 2516: * Recursively free indirect blocks.
! 2517: */
! 2518: if (ip->i_ump->um_fstype == UM_UFS1) {
! 2519: ufs1fmt = 1;
! 2520: bap1 = (int32_t *)bp->b_data;
! 2521: } else {
! 2522: ufs1fmt = 0;
! 2523: bap2 = (int64_t *)bp->b_data;
! 2524: }
! 2525: nblocks = btodb(fs->fs_bsize);
! 2526: for (i = NINDIR(fs) - 1; i >= 0; i--) {
! 2527: if (ufs1fmt)
! 2528: nb = bap1[i];
! 2529: else
! 2530: nb = bap2[i];
! 2531: if (nb == 0)
! 2532: continue;
! 2533: if (level != 0) {
! 2534: if ((error = indir_trunc(ip, fsbtodb(fs, nb),
! 2535: level - 1, lbn + (i * lbnadd), countp)) != 0)
! 2536: allerror = error;
! 2537: }
! 2538: ffs_blkfree(ip, nb, fs->fs_bsize);
! 2539: *countp += nblocks;
! 2540: }
! 2541: bp->b_flags |= B_INVAL | B_NOCACHE;
! 2542: brelse(bp);
! 2543: return (allerror);
! 2544: }
! 2545:
! 2546: /*
! 2547: * Free an allocindir.
! 2548: * This routine must be called with splbio interrupts blocked.
! 2549: */
! 2550: STATIC void
! 2551: free_allocindir(aip, inodedep)
! 2552: struct allocindir *aip;
! 2553: struct inodedep *inodedep;
! 2554: {
! 2555: struct freefrag *freefrag;
! 2556:
! 2557: splassert(IPL_BIO);
! 2558:
! 2559: #ifdef DEBUG
! 2560: if (lk.lkt_held == -1)
! 2561: panic("free_allocindir: lock not held");
! 2562: #endif
! 2563: if ((aip->ai_state & DEPCOMPLETE) == 0)
! 2564: LIST_REMOVE(aip, ai_deps);
! 2565: if (aip->ai_state & ONWORKLIST)
! 2566: WORKLIST_REMOVE(&aip->ai_list);
! 2567: LIST_REMOVE(aip, ai_next);
! 2568: if ((freefrag = aip->ai_freefrag) != NULL) {
! 2569: if (inodedep == NULL)
! 2570: add_to_worklist(&freefrag->ff_list);
! 2571: else
! 2572: WORKLIST_INSERT(&inodedep->id_bufwait,
! 2573: &freefrag->ff_list);
! 2574: }
! 2575: WORKITEM_FREE(aip, D_ALLOCINDIR);
! 2576: }
! 2577:
! 2578: /*
! 2579: * Directory entry addition dependencies.
! 2580: *
! 2581: * When adding a new directory entry, the inode (with its incremented link
! 2582: * count) must be written to disk before the directory entry's pointer to it.
! 2583: * Also, if the inode is newly allocated, the corresponding freemap must be
! 2584: * updated (on disk) before the directory entry's pointer. These requirements
! 2585: * are met via undo/redo on the directory entry's pointer, which consists
! 2586: * simply of the inode number.
! 2587: *
! 2588: * As directory entries are added and deleted, the free space within a
! 2589: * directory block can become fragmented. The ufs file system will compact
! 2590: * a fragmented directory block to make space for a new entry. When this
! 2591: * occurs, the offsets of previously added entries change. Any "diradd"
! 2592: * dependency structures corresponding to these entries must be updated with
! 2593: * the new offsets.
! 2594: */
! 2595:
! 2596: /*
! 2597: * This routine is called after the in-memory inode's link
! 2598: * count has been incremented, but before the directory entry's
! 2599: * pointer to the inode has been set.
! 2600: */
! 2601: int
! 2602: softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp, isnewblk)
! 2603: struct buf *bp; /* buffer containing directory block */
! 2604: struct inode *dp; /* inode for directory */
! 2605: off_t diroffset; /* offset of new entry in directory */
! 2606: long newinum; /* inode referenced by new directory entry */
! 2607: struct buf *newdirbp; /* non-NULL => contents of new mkdir */
! 2608: int isnewblk; /* entry is in a newly allocated block */
! 2609: {
! 2610: int offset; /* offset of new entry within directory block */
! 2611: daddr64_t lbn; /* block in directory containing new entry */
! 2612: struct fs *fs;
! 2613: struct diradd *dap;
! 2614: struct allocdirect *adp;
! 2615: struct pagedep *pagedep;
! 2616: struct inodedep *inodedep;
! 2617: struct newdirblk *newdirblk = NULL;
! 2618: struct mkdir *mkdir1, *mkdir2;
! 2619:
! 2620:
! 2621: fs = dp->i_fs;
! 2622: lbn = lblkno(fs, diroffset);
! 2623: offset = blkoff(fs, diroffset);
! 2624: dap = pool_get(&diradd_pool, PR_WAITOK);
! 2625: bzero(dap,sizeof(struct diradd));
! 2626: dap->da_list.wk_type = D_DIRADD;
! 2627: dap->da_offset = offset;
! 2628: dap->da_newinum = newinum;
! 2629: dap->da_state = ATTACHED;
! 2630: if (isnewblk && lbn < NDADDR && fragoff(fs, diroffset) == 0) {
! 2631: newdirblk = pool_get(&newdirblk_pool, PR_WAITOK);
! 2632: newdirblk->db_list.wk_type = D_NEWDIRBLK;
! 2633: newdirblk->db_state = 0;
! 2634: }
! 2635: if (newdirbp == NULL) {
! 2636: dap->da_state |= DEPCOMPLETE;
! 2637: ACQUIRE_LOCK(&lk);
! 2638: } else {
! 2639: dap->da_state |= MKDIR_BODY | MKDIR_PARENT;
! 2640: mkdir1 = pool_get(&mkdir_pool, PR_WAITOK);
! 2641: mkdir1->md_list.wk_type = D_MKDIR;
! 2642: mkdir1->md_state = MKDIR_BODY;
! 2643: mkdir1->md_diradd = dap;
! 2644: mkdir2 = pool_get(&mkdir_pool, PR_WAITOK);
! 2645: mkdir2->md_list.wk_type = D_MKDIR;
! 2646: mkdir2->md_state = MKDIR_PARENT;
! 2647: mkdir2->md_diradd = dap;
! 2648: /*
! 2649: * Dependency on "." and ".." being written to disk.
! 2650: */
! 2651: mkdir1->md_buf = newdirbp;
! 2652: ACQUIRE_LOCK(&lk);
! 2653: LIST_INSERT_HEAD(&mkdirlisthd, mkdir1, md_mkdirs);
! 2654: WORKLIST_INSERT(&newdirbp->b_dep, &mkdir1->md_list);
! 2655: FREE_LOCK(&lk);
! 2656: bdwrite(newdirbp);
! 2657: /*
! 2658: * Dependency on link count increase for parent directory
! 2659: */
! 2660: ACQUIRE_LOCK(&lk);
! 2661: if (inodedep_lookup(fs, dp->i_number, 0, &inodedep) == 0
! 2662: || (inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) {
! 2663: dap->da_state &= ~MKDIR_PARENT;
! 2664: WORKITEM_FREE(mkdir2, D_MKDIR);
! 2665: } else {
! 2666: LIST_INSERT_HEAD(&mkdirlisthd, mkdir2, md_mkdirs);
! 2667: WORKLIST_INSERT(&inodedep->id_bufwait,&mkdir2->md_list);
! 2668: }
! 2669: }
! 2670: /*
! 2671: * Link into parent directory pagedep to await its being written.
! 2672: */
! 2673: if (pagedep_lookup(dp, lbn, DEPALLOC, &pagedep) == 0)
! 2674: WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list);
! 2675: dap->da_pagedep = pagedep;
! 2676: LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)], dap,
! 2677: da_pdlist);
! 2678: /*
! 2679: * Link into its inodedep. Put it on the id_bufwait list if the inode
! 2680: * is not yet written. If it is written, do the post-inode write
! 2681: * processing to put it on the id_pendinghd list.
! 2682: */
! 2683: (void) inodedep_lookup(fs, newinum, DEPALLOC, &inodedep);
! 2684: if ((inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE)
! 2685: diradd_inode_written(dap, inodedep);
! 2686: else
! 2687: WORKLIST_INSERT(&inodedep->id_bufwait, &dap->da_list);
! 2688: if (isnewblk) {
! 2689: /*
! 2690: * Directories growing into indirect blocks are rare
! 2691: * enough and the frequency of new block allocation
! 2692: * in those cases even more rare, that we choose not
! 2693: * to bother tracking them. Rather we simply force the
! 2694: * new directory entry to disk.
! 2695: */
! 2696: if (lbn >= NDADDR) {
! 2697: FREE_LOCK(&lk);
! 2698: /*
! 2699: * We only have a new allocation when at the
! 2700: * beginning of a new block, not when we are
! 2701: * expanding into an existing block.
! 2702: */
! 2703: if (blkoff(fs, diroffset) == 0)
! 2704: return (1);
! 2705: return (0);
! 2706: }
! 2707: /*
! 2708: * We only have a new allocation when at the beginning
! 2709: * of a new fragment, not when we are expanding into an
! 2710: * existing fragment. Also, there is nothing to do if we
! 2711: * are already tracking this block.
! 2712: */
! 2713: if (fragoff(fs, diroffset) != 0) {
! 2714: FREE_LOCK(&lk);
! 2715: return (0);
! 2716: }
! 2717:
! 2718: if ((pagedep->pd_state & NEWBLOCK) != 0) {
! 2719: WORKITEM_FREE(newdirblk, D_NEWDIRBLK);
! 2720: FREE_LOCK(&lk);
! 2721: return (0);
! 2722: }
! 2723: /*
! 2724: * Find our associated allocdirect and have it track us.
! 2725: */
! 2726: if (inodedep_lookup(fs, dp->i_number, 0, &inodedep) == 0)
! 2727: panic("softdep_setup_directory_add: lost inodedep");
! 2728: adp = TAILQ_LAST(&inodedep->id_newinoupdt, allocdirectlst);
! 2729: if (adp == NULL || adp->ad_lbn != lbn) {
! 2730: FREE_LOCK(&lk);
! 2731: panic("softdep_setup_directory_add: lost entry");
! 2732: }
! 2733: pagedep->pd_state |= NEWBLOCK;
! 2734: newdirblk->db_pagedep = pagedep;
! 2735: WORKLIST_INSERT(&adp->ad_newdirblk, &newdirblk->db_list);
! 2736: }
! 2737: FREE_LOCK(&lk);
! 2738: return (0);
! 2739: }
! 2740:
! 2741: /*
! 2742: * This procedure is called to change the offset of a directory
! 2743: * entry when compacting a directory block which must be owned
! 2744: * exclusively by the caller. Note that the actual entry movement
! 2745: * must be done in this procedure to ensure that no I/O completions
! 2746: * occur while the move is in progress.
! 2747: */
! 2748: void
! 2749: softdep_change_directoryentry_offset(dp, base, oldloc, newloc, entrysize)
! 2750: struct inode *dp; /* inode for directory */
! 2751: caddr_t base; /* address of dp->i_offset */
! 2752: caddr_t oldloc; /* address of old directory location */
! 2753: caddr_t newloc; /* address of new directory location */
! 2754: int entrysize; /* size of directory entry */
! 2755: {
! 2756: int offset, oldoffset, newoffset;
! 2757: struct pagedep *pagedep;
! 2758: struct diradd *dap;
! 2759: daddr64_t lbn;
! 2760:
! 2761: ACQUIRE_LOCK(&lk);
! 2762: lbn = lblkno(dp->i_fs, dp->i_offset);
! 2763: offset = blkoff(dp->i_fs, dp->i_offset);
! 2764: if (pagedep_lookup(dp, lbn, 0, &pagedep) == 0)
! 2765: goto done;
! 2766: oldoffset = offset + (oldloc - base);
! 2767: newoffset = offset + (newloc - base);
! 2768:
! 2769: LIST_FOREACH(dap, &pagedep->pd_diraddhd[DIRADDHASH(oldoffset)], da_pdlist) {
! 2770: if (dap->da_offset != oldoffset)
! 2771: continue;
! 2772: dap->da_offset = newoffset;
! 2773: if (DIRADDHASH(newoffset) == DIRADDHASH(oldoffset))
! 2774: break;
! 2775: LIST_REMOVE(dap, da_pdlist);
! 2776: LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(newoffset)],
! 2777: dap, da_pdlist);
! 2778: break;
! 2779: }
! 2780: if (dap == NULL) {
! 2781:
! 2782: LIST_FOREACH(dap, &pagedep->pd_pendinghd, da_pdlist) {
! 2783: if (dap->da_offset == oldoffset) {
! 2784: dap->da_offset = newoffset;
! 2785: break;
! 2786: }
! 2787: }
! 2788: }
! 2789: done:
! 2790: bcopy(oldloc, newloc, entrysize);
! 2791: FREE_LOCK(&lk);
! 2792: }
! 2793:
! 2794: /*
! 2795: * Free a diradd dependency structure. This routine must be called
! 2796: * with splbio interrupts blocked.
! 2797: */
! 2798: STATIC void
! 2799: free_diradd(dap)
! 2800: struct diradd *dap;
! 2801: {
! 2802: struct dirrem *dirrem;
! 2803: struct pagedep *pagedep;
! 2804: struct inodedep *inodedep;
! 2805: struct mkdir *mkdir, *nextmd;
! 2806:
! 2807: splassert(IPL_BIO);
! 2808:
! 2809: #ifdef DEBUG
! 2810: if (lk.lkt_held == -1)
! 2811: panic("free_diradd: lock not held");
! 2812: #endif
! 2813: WORKLIST_REMOVE(&dap->da_list);
! 2814: LIST_REMOVE(dap, da_pdlist);
! 2815: if ((dap->da_state & DIRCHG) == 0) {
! 2816: pagedep = dap->da_pagedep;
! 2817: } else {
! 2818: dirrem = dap->da_previous;
! 2819: pagedep = dirrem->dm_pagedep;
! 2820: dirrem->dm_dirinum = pagedep->pd_ino;
! 2821: add_to_worklist(&dirrem->dm_list);
! 2822: }
! 2823: if (inodedep_lookup(VFSTOUFS(pagedep->pd_mnt)->um_fs, dap->da_newinum,
! 2824: 0, &inodedep) != 0)
! 2825: (void) free_inodedep(inodedep);
! 2826: if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0) {
! 2827: for (mkdir = LIST_FIRST(&mkdirlisthd); mkdir; mkdir = nextmd) {
! 2828: nextmd = LIST_NEXT(mkdir, md_mkdirs);
! 2829: if (mkdir->md_diradd != dap)
! 2830: continue;
! 2831: dap->da_state &= ~mkdir->md_state;
! 2832: WORKLIST_REMOVE(&mkdir->md_list);
! 2833: LIST_REMOVE(mkdir, md_mkdirs);
! 2834: WORKITEM_FREE(mkdir, D_MKDIR);
! 2835: }
! 2836: if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0) {
! 2837: FREE_LOCK(&lk);
! 2838: panic("free_diradd: unfound ref");
! 2839: }
! 2840: }
! 2841: WORKITEM_FREE(dap, D_DIRADD);
! 2842: }
! 2843:
! 2844: /*
! 2845: * Directory entry removal dependencies.
! 2846: *
! 2847: * When removing a directory entry, the entry's inode pointer must be
! 2848: * zero'ed on disk before the corresponding inode's link count is decremented
! 2849: * (possibly freeing the inode for re-use). This dependency is handled by
! 2850: * updating the directory entry but delaying the inode count reduction until
! 2851: * after the directory block has been written to disk. After this point, the
! 2852: * inode count can be decremented whenever it is convenient.
! 2853: */
! 2854:
! 2855: /*
! 2856: * This routine should be called immediately after removing
! 2857: * a directory entry. The inode's link count should not be
! 2858: * decremented by the calling procedure -- the soft updates
! 2859: * code will do this task when it is safe.
! 2860: */
! 2861: void
! 2862: softdep_setup_remove(bp, dp, ip, isrmdir)
! 2863: struct buf *bp; /* buffer containing directory block */
! 2864: struct inode *dp; /* inode for the directory being modified */
! 2865: struct inode *ip; /* inode for directory entry being removed */
! 2866: int isrmdir; /* indicates if doing RMDIR */
! 2867: {
! 2868: struct dirrem *dirrem, *prevdirrem;
! 2869:
! 2870: /*
! 2871: * Allocate a new dirrem if appropriate and ACQUIRE_LOCK.
! 2872: */
! 2873: dirrem = newdirrem(bp, dp, ip, isrmdir, &prevdirrem);
! 2874:
! 2875: /*
! 2876: * If the COMPLETE flag is clear, then there were no active
! 2877: * entries and we want to roll back to a zeroed entry until
! 2878: * the new inode is committed to disk. If the COMPLETE flag is
! 2879: * set then we have deleted an entry that never made it to
! 2880: * disk. If the entry we deleted resulted from a name change,
! 2881: * then the old name still resides on disk. We cannot delete
! 2882: * its inode (returned to us in prevdirrem) until the zeroed
! 2883: * directory entry gets to disk. The new inode has never been
! 2884: * referenced on the disk, so can be deleted immediately.
! 2885: */
! 2886: if ((dirrem->dm_state & COMPLETE) == 0) {
! 2887: LIST_INSERT_HEAD(&dirrem->dm_pagedep->pd_dirremhd, dirrem,
! 2888: dm_next);
! 2889: FREE_LOCK(&lk);
! 2890: } else {
! 2891: if (prevdirrem != NULL)
! 2892: LIST_INSERT_HEAD(&dirrem->dm_pagedep->pd_dirremhd,
! 2893: prevdirrem, dm_next);
! 2894: dirrem->dm_dirinum = dirrem->dm_pagedep->pd_ino;
! 2895: FREE_LOCK(&lk);
! 2896: handle_workitem_remove(dirrem);
! 2897: }
! 2898: }
! 2899:
! 2900: /*
! 2901: * Allocate a new dirrem if appropriate and return it along with
! 2902: * its associated pagedep. Called without a lock, returns with lock.
! 2903: */
! 2904: STATIC long num_dirrem; /* number of dirrem allocated */
! 2905: STATIC struct dirrem *
! 2906: newdirrem(bp, dp, ip, isrmdir, prevdirremp)
! 2907: struct buf *bp; /* buffer containing directory block */
! 2908: struct inode *dp; /* inode for the directory being modified */
! 2909: struct inode *ip; /* inode for directory entry being removed */
! 2910: int isrmdir; /* indicates if doing RMDIR */
! 2911: struct dirrem **prevdirremp; /* previously referenced inode, if any */
! 2912: {
! 2913: int offset;
! 2914: daddr64_t lbn;
! 2915: struct diradd *dap;
! 2916: struct dirrem *dirrem;
! 2917: struct pagedep *pagedep;
! 2918:
! 2919: /*
! 2920: * Whiteouts have no deletion dependencies.
! 2921: */
! 2922: if (ip == NULL)
! 2923: panic("newdirrem: whiteout");
! 2924: /*
! 2925: * If we are over our limit, try to improve the situation.
! 2926: * Limiting the number of dirrem structures will also limit
! 2927: * the number of freefile and freeblks structures.
! 2928: */
! 2929: if (num_dirrem > max_softdeps / 2)
! 2930: (void) request_cleanup(FLUSH_REMOVE, 0);
! 2931: num_dirrem += 1;
! 2932: dirrem = pool_get(&dirrem_pool, PR_WAITOK);
! 2933: bzero(dirrem,sizeof(struct dirrem));
! 2934: dirrem->dm_list.wk_type = D_DIRREM;
! 2935: dirrem->dm_state = isrmdir ? RMDIR : 0;
! 2936: dirrem->dm_mnt = ITOV(ip)->v_mount;
! 2937: dirrem->dm_oldinum = ip->i_number;
! 2938: *prevdirremp = NULL;
! 2939:
! 2940: ACQUIRE_LOCK(&lk);
! 2941: lbn = lblkno(dp->i_fs, dp->i_offset);
! 2942: offset = blkoff(dp->i_fs, dp->i_offset);
! 2943: if (pagedep_lookup(dp, lbn, DEPALLOC, &pagedep) == 0)
! 2944: WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list);
! 2945: dirrem->dm_pagedep = pagedep;
! 2946: /*
! 2947: * Check for a diradd dependency for the same directory entry.
! 2948: * If present, then both dependencies become obsolete and can
! 2949: * be de-allocated. Check for an entry on both the pd_dirraddhd
! 2950: * list and the pd_pendinghd list.
! 2951: */
! 2952:
! 2953: LIST_FOREACH(dap, &pagedep->pd_diraddhd[DIRADDHASH(offset)], da_pdlist)
! 2954: if (dap->da_offset == offset)
! 2955: break;
! 2956: if (dap == NULL) {
! 2957:
! 2958: LIST_FOREACH(dap, &pagedep->pd_pendinghd, da_pdlist)
! 2959: if (dap->da_offset == offset)
! 2960: break;
! 2961: if (dap == NULL)
! 2962: return (dirrem);
! 2963: }
! 2964: /*
! 2965: * Must be ATTACHED at this point.
! 2966: */
! 2967: if ((dap->da_state & ATTACHED) == 0) {
! 2968: FREE_LOCK(&lk);
! 2969: panic("newdirrem: not ATTACHED");
! 2970: }
! 2971: if (dap->da_newinum != ip->i_number) {
! 2972: FREE_LOCK(&lk);
! 2973: panic("newdirrem: inum %d should be %d",
! 2974: ip->i_number, dap->da_newinum);
! 2975: }
! 2976: /*
! 2977: * If we are deleting a changed name that never made it to disk,
! 2978: * then return the dirrem describing the previous inode (which
! 2979: * represents the inode currently referenced from this entry on disk).
! 2980: */
! 2981: if ((dap->da_state & DIRCHG) != 0) {
! 2982: *prevdirremp = dap->da_previous;
! 2983: dap->da_state &= ~DIRCHG;
! 2984: dap->da_pagedep = pagedep;
! 2985: }
! 2986: /*
! 2987: * We are deleting an entry that never made it to disk.
! 2988: * Mark it COMPLETE so we can delete its inode immediately.
! 2989: */
! 2990: dirrem->dm_state |= COMPLETE;
! 2991: free_diradd(dap);
! 2992: return (dirrem);
! 2993: }
! 2994:
! 2995: /*
! 2996: * Directory entry change dependencies.
! 2997: *
! 2998: * Changing an existing directory entry requires that an add operation
! 2999: * be completed first followed by a deletion. The semantics for the addition
! 3000: * are identical to the description of adding a new entry above except
! 3001: * that the rollback is to the old inode number rather than zero. Once
! 3002: * the addition dependency is completed, the removal is done as described
! 3003: * in the removal routine above.
! 3004: */
! 3005:
! 3006: /*
! 3007: * This routine should be called immediately after changing
! 3008: * a directory entry. The inode's link count should not be
! 3009: * decremented by the calling procedure -- the soft updates
! 3010: * code will perform this task when it is safe.
! 3011: */
! 3012: void
! 3013: softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir)
! 3014: struct buf *bp; /* buffer containing directory block */
! 3015: struct inode *dp; /* inode for the directory being modified */
! 3016: struct inode *ip; /* inode for directory entry being removed */
! 3017: long newinum; /* new inode number for changed entry */
! 3018: int isrmdir; /* indicates if doing RMDIR */
! 3019: {
! 3020: int offset;
! 3021: struct diradd *dap = NULL;
! 3022: struct dirrem *dirrem, *prevdirrem;
! 3023: struct pagedep *pagedep;
! 3024: struct inodedep *inodedep;
! 3025:
! 3026: offset = blkoff(dp->i_fs, dp->i_offset);
! 3027: dap = pool_get(&diradd_pool, PR_WAITOK);
! 3028: bzero(dap,sizeof(struct diradd));
! 3029: dap->da_list.wk_type = D_DIRADD;
! 3030: dap->da_state = DIRCHG | ATTACHED | DEPCOMPLETE;
! 3031: dap->da_offset = offset;
! 3032: dap->da_newinum = newinum;
! 3033:
! 3034: /*
! 3035: * Allocate a new dirrem and ACQUIRE_LOCK.
! 3036: */
! 3037: dirrem = newdirrem(bp, dp, ip, isrmdir, &prevdirrem);
! 3038: pagedep = dirrem->dm_pagedep;
! 3039: /*
! 3040: * The possible values for isrmdir:
! 3041: * 0 - non-directory file rename
! 3042: * 1 - directory rename within same directory
! 3043: * inum - directory rename to new directory of given inode number
! 3044: * When renaming to a new directory, we are both deleting and
! 3045: * creating a new directory entry, so the link count on the new
! 3046: * directory should not change. Thus we do not need the followup
! 3047: * dirrem which is usually done in handle_workitem_remove. We set
! 3048: * the DIRCHG flag to tell handle_workitem_remove to skip the
! 3049: * followup dirrem.
! 3050: */
! 3051: if (isrmdir > 1)
! 3052: dirrem->dm_state |= DIRCHG;
! 3053:
! 3054: /*
! 3055: * If the COMPLETE flag is clear, then there were no active
! 3056: * entries and we want to roll back to the previous inode until
! 3057: * the new inode is committed to disk. If the COMPLETE flag is
! 3058: * set, then we have deleted an entry that never made it to disk.
! 3059: * If the entry we deleted resulted from a name change, then the old
! 3060: * inode reference still resides on disk. Any rollback that we do
! 3061: * needs to be to that old inode (returned to us in prevdirrem). If
! 3062: * the entry we deleted resulted from a create, then there is
! 3063: * no entry on the disk, so we want to roll back to zero rather
! 3064: * than the uncommitted inode. In either of the COMPLETE cases we
! 3065: * want to immediately free the unwritten and unreferenced inode.
! 3066: */
! 3067: if ((dirrem->dm_state & COMPLETE) == 0) {
! 3068: dap->da_previous = dirrem;
! 3069: } else {
! 3070: if (prevdirrem != NULL) {
! 3071: dap->da_previous = prevdirrem;
! 3072: } else {
! 3073: dap->da_state &= ~DIRCHG;
! 3074: dap->da_pagedep = pagedep;
! 3075: }
! 3076: dirrem->dm_dirinum = pagedep->pd_ino;
! 3077: add_to_worklist(&dirrem->dm_list);
! 3078: }
! 3079: /*
! 3080: * Link into its inodedep. Put it on the id_bufwait list if the inode
! 3081: * is not yet written. If it is written, do the post-inode write
! 3082: * processing to put it on the id_pendinghd list.
! 3083: */
! 3084: if (inodedep_lookup(dp->i_fs, newinum, DEPALLOC, &inodedep) == 0 ||
! 3085: (inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) {
! 3086: dap->da_state |= COMPLETE;
! 3087: LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist);
! 3088: WORKLIST_INSERT(&inodedep->id_pendinghd, &dap->da_list);
! 3089: } else {
! 3090: LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)],
! 3091: dap, da_pdlist);
! 3092: WORKLIST_INSERT(&inodedep->id_bufwait, &dap->da_list);
! 3093: }
! 3094: FREE_LOCK(&lk);
! 3095: }
! 3096:
! 3097: /*
! 3098: * Called whenever the link count on an inode is changed.
! 3099: * It creates an inode dependency so that the new reference(s)
! 3100: * to the inode cannot be committed to disk until the updated
! 3101: * inode has been written.
! 3102: */
! 3103: void
! 3104: softdep_change_linkcnt(ip, nodelay)
! 3105: struct inode *ip; /* the inode with the increased link count */
! 3106: int nodelay; /* do background work or not */
! 3107: {
! 3108: struct inodedep *inodedep;
! 3109: int flags;
! 3110:
! 3111: /*
! 3112: * If requested, do not allow background work to happen.
! 3113: */
! 3114: flags = DEPALLOC;
! 3115: if (nodelay)
! 3116: flags |= NODELAY;
! 3117:
! 3118: ACQUIRE_LOCK(&lk);
! 3119:
! 3120: (void) inodedep_lookup(ip->i_fs, ip->i_number, flags, &inodedep);
! 3121: if (DIP(ip, nlink) < ip->i_effnlink) {
! 3122: FREE_LOCK(&lk);
! 3123: panic("softdep_change_linkcnt: bad delta");
! 3124: }
! 3125:
! 3126: inodedep->id_nlinkdelta = DIP(ip, nlink) - ip->i_effnlink;
! 3127:
! 3128: FREE_LOCK(&lk);
! 3129: }
! 3130:
! 3131: /*
! 3132: * This workitem decrements the inode's link count.
! 3133: * If the link count reaches zero, the file is removed.
! 3134: */
! 3135: STATIC void
! 3136: handle_workitem_remove(dirrem)
! 3137: struct dirrem *dirrem;
! 3138: {
! 3139: struct proc *p = CURPROC; /* XXX */
! 3140: struct inodedep *inodedep;
! 3141: struct vnode *vp;
! 3142: struct inode *ip;
! 3143: ino_t oldinum;
! 3144: int error;
! 3145:
! 3146: if ((error = VFS_VGET(dirrem->dm_mnt, dirrem->dm_oldinum, &vp)) != 0) {
! 3147: softdep_error("handle_workitem_remove: vget", error);
! 3148: return;
! 3149: }
! 3150: ip = VTOI(vp);
! 3151: ACQUIRE_LOCK(&lk);
! 3152: if ((inodedep_lookup(ip->i_fs, dirrem->dm_oldinum, 0, &inodedep))
! 3153: == 0) {
! 3154: FREE_LOCK(&lk);
! 3155: panic("handle_workitem_remove: lost inodedep");
! 3156: }
! 3157: /*
! 3158: * Normal file deletion.
! 3159: */
! 3160: if ((dirrem->dm_state & RMDIR) == 0) {
! 3161: DIP_ADD(ip, nlink, -1);
! 3162: ip->i_flag |= IN_CHANGE;
! 3163: if (DIP(ip, nlink) < ip->i_effnlink) {
! 3164: FREE_LOCK(&lk);
! 3165: panic("handle_workitem_remove: bad file delta");
! 3166: }
! 3167: inodedep->id_nlinkdelta = DIP(ip, nlink) - ip->i_effnlink;
! 3168: FREE_LOCK(&lk);
! 3169: vput(vp);
! 3170: num_dirrem -= 1;
! 3171: WORKITEM_FREE(dirrem, D_DIRREM);
! 3172: return;
! 3173: }
! 3174: /*
! 3175: * Directory deletion. Decrement reference count for both the
! 3176: * just deleted parent directory entry and the reference for ".".
! 3177: * Next truncate the directory to length zero. When the
! 3178: * truncation completes, arrange to have the reference count on
! 3179: * the parent decremented to account for the loss of "..".
! 3180: */
! 3181: DIP_ADD(ip, nlink, -2);
! 3182: ip->i_flag |= IN_CHANGE;
! 3183: if (DIP(ip, nlink) < ip->i_effnlink)
! 3184: panic("handle_workitem_remove: bad dir delta");
! 3185: inodedep->id_nlinkdelta = DIP(ip, nlink) - ip->i_effnlink;
! 3186: FREE_LOCK(&lk);
! 3187: if ((error = UFS_TRUNCATE(ip, (off_t)0, 0, p->p_ucred)) != 0)
! 3188: softdep_error("handle_workitem_remove: truncate", error);
! 3189: /*
! 3190: * Rename a directory to a new parent. Since, we are both deleting
! 3191: * and creating a new directory entry, the link count on the new
! 3192: * directory should not change. Thus we skip the followup dirrem.
! 3193: */
! 3194: if (dirrem->dm_state & DIRCHG) {
! 3195: vput(vp);
! 3196: num_dirrem -= 1;
! 3197: WORKITEM_FREE(dirrem, D_DIRREM);
! 3198: return;
! 3199: }
! 3200: /*
! 3201: * If the inodedep does not exist, then the zero'ed inode has
! 3202: * been written to disk. If the allocated inode has never been
! 3203: * written to disk, then the on-disk inode is zero'ed. In either
! 3204: * case we can remove the file immediately.
! 3205: */
! 3206: ACQUIRE_LOCK(&lk);
! 3207: dirrem->dm_state = 0;
! 3208: oldinum = dirrem->dm_oldinum;
! 3209: dirrem->dm_oldinum = dirrem->dm_dirinum;
! 3210: if (inodedep_lookup(ip->i_fs, oldinum, 0, &inodedep) == 0 ||
! 3211: check_inode_unwritten(inodedep)) {
! 3212: FREE_LOCK(&lk);
! 3213: vput(vp);
! 3214: handle_workitem_remove(dirrem);
! 3215: return;
! 3216: }
! 3217: WORKLIST_INSERT(&inodedep->id_inowait, &dirrem->dm_list);
! 3218: FREE_LOCK(&lk);
! 3219: ip->i_flag |= IN_CHANGE;
! 3220: UFS_UPDATE(VTOI(vp), 0);
! 3221: vput(vp);
! 3222: }
! 3223:
! 3224: /*
! 3225: * Inode de-allocation dependencies.
! 3226: *
! 3227: * When an inode's link count is reduced to zero, it can be de-allocated. We
! 3228: * found it convenient to postpone de-allocation until after the inode is
! 3229: * written to disk with its new link count (zero). At this point, all of the
! 3230: * on-disk inode's block pointers are nullified and, with careful dependency
! 3231: * list ordering, all dependencies related to the inode will be satisfied and
! 3232: * the corresponding dependency structures de-allocated. So, if/when the
! 3233: * inode is reused, there will be no mixing of old dependencies with new
! 3234: * ones. This artificial dependency is set up by the block de-allocation
! 3235: * procedure above (softdep_setup_freeblocks) and completed by the
! 3236: * following procedure.
! 3237: */
! 3238: STATIC void
! 3239: handle_workitem_freefile(freefile)
! 3240: struct freefile *freefile;
! 3241: {
! 3242: struct fs *fs;
! 3243: struct vnode vp;
! 3244: struct inode tip;
! 3245: #ifdef DEBUG
! 3246: struct inodedep *idp;
! 3247: #endif
! 3248: int error;
! 3249:
! 3250: fs = VFSTOUFS(freefile->fx_mnt)->um_fs;
! 3251: #ifdef DEBUG
! 3252: ACQUIRE_LOCK(&lk);
! 3253: error = inodedep_lookup(fs, freefile->fx_oldinum, 0, &idp);
! 3254: FREE_LOCK(&lk);
! 3255: if (error)
! 3256: panic("handle_workitem_freefile: inodedep survived");
! 3257: #endif
! 3258: tip.i_ump = VFSTOUFS(freefile->fx_mnt);
! 3259: tip.i_dev = freefile->fx_devvp->v_rdev;
! 3260: tip.i_fs = fs;
! 3261: tip.i_vnode = &vp;
! 3262: vp.v_data = &tip;
! 3263:
! 3264: if ((error = ffs_freefile(&tip, freefile->fx_oldinum,
! 3265: freefile->fx_mode)) != 0) {
! 3266: softdep_error("handle_workitem_freefile", error);
! 3267: }
! 3268: WORKITEM_FREE(freefile, D_FREEFILE);
! 3269: }
! 3270:
! 3271: /*
! 3272: * Disk writes.
! 3273: *
! 3274: * The dependency structures constructed above are most actively used when file
! 3275: * system blocks are written to disk. No constraints are placed on when a
! 3276: * block can be written, but unsatisfied update dependencies are made safe by
! 3277: * modifying (or replacing) the source memory for the duration of the disk
! 3278: * write. When the disk write completes, the memory block is again brought
! 3279: * up-to-date.
! 3280: *
! 3281: * In-core inode structure reclamation.
! 3282: *
! 3283: * Because there are a finite number of "in-core" inode structures, they are
! 3284: * reused regularly. By transferring all inode-related dependencies to the
! 3285: * in-memory inode block and indexing them separately (via "inodedep"s), we
! 3286: * can allow "in-core" inode structures to be reused at any time and avoid
! 3287: * any increase in contention.
! 3288: *
! 3289: * Called just before entering the device driver to initiate a new disk I/O.
! 3290: * The buffer must be locked, thus, no I/O completion operations can occur
! 3291: * while we are manipulating its associated dependencies.
! 3292: */
! 3293: void
! 3294: softdep_disk_io_initiation(bp)
! 3295: struct buf *bp; /* structure describing disk write to occur */
! 3296: {
! 3297: struct worklist *wk, *nextwk;
! 3298: struct indirdep *indirdep;
! 3299: struct inodedep *inodedep;
! 3300: struct buf *sbp;
! 3301:
! 3302: /*
! 3303: * We only care about write operations. There should never
! 3304: * be dependencies for reads.
! 3305: */
! 3306: if (bp->b_flags & B_READ)
! 3307: panic("softdep_disk_io_initiation: read");
! 3308:
! 3309: ACQUIRE_LOCK(&lk);
! 3310:
! 3311: /*
! 3312: * Do any necessary pre-I/O processing.
! 3313: */
! 3314: for (wk = LIST_FIRST(&bp->b_dep); wk; wk = nextwk) {
! 3315: nextwk = LIST_NEXT(wk, wk_list);
! 3316: switch (wk->wk_type) {
! 3317:
! 3318: case D_PAGEDEP:
! 3319: initiate_write_filepage(WK_PAGEDEP(wk), bp);
! 3320: continue;
! 3321:
! 3322: case D_INODEDEP:
! 3323: inodedep = WK_INODEDEP(wk);
! 3324: if (inodedep->id_fs->fs_magic == FS_UFS1_MAGIC)
! 3325: initiate_write_inodeblock_ufs1(inodedep, bp);
! 3326: #ifdef FFS2
! 3327: else
! 3328: initiate_write_inodeblock_ufs2(inodedep, bp);
! 3329: #endif
! 3330: continue;
! 3331:
! 3332: case D_INDIRDEP:
! 3333: indirdep = WK_INDIRDEP(wk);
! 3334: if (indirdep->ir_state & GOINGAWAY)
! 3335: panic("disk_io_initiation: indirdep gone");
! 3336: /*
! 3337: * If there are no remaining dependencies, this
! 3338: * will be writing the real pointers, so the
! 3339: * dependency can be freed.
! 3340: */
! 3341: if (LIST_FIRST(&indirdep->ir_deplisthd) == NULL) {
! 3342: sbp = indirdep->ir_savebp;
! 3343: sbp->b_flags |= B_INVAL | B_NOCACHE;
! 3344: /* inline expand WORKLIST_REMOVE(wk); */
! 3345: wk->wk_state &= ~ONWORKLIST;
! 3346: LIST_REMOVE(wk, wk_list);
! 3347: WORKITEM_FREE(indirdep, D_INDIRDEP);
! 3348: FREE_LOCK(&lk);
! 3349: brelse(sbp);
! 3350: ACQUIRE_LOCK(&lk);
! 3351: continue;
! 3352: }
! 3353: /*
! 3354: * Replace up-to-date version with safe version.
! 3355: */
! 3356: FREE_LOCK(&lk);
! 3357: indirdep->ir_saveddata = malloc(bp->b_bcount,
! 3358: M_INDIRDEP, M_WAITOK);
! 3359: ACQUIRE_LOCK(&lk);
! 3360: indirdep->ir_state &= ~ATTACHED;
! 3361: indirdep->ir_state |= UNDONE;
! 3362: bcopy(bp->b_data, indirdep->ir_saveddata, bp->b_bcount);
! 3363: bcopy(indirdep->ir_savebp->b_data, bp->b_data,
! 3364: bp->b_bcount);
! 3365: continue;
! 3366:
! 3367: case D_MKDIR:
! 3368: case D_BMSAFEMAP:
! 3369: case D_ALLOCDIRECT:
! 3370: case D_ALLOCINDIR:
! 3371: continue;
! 3372:
! 3373: default:
! 3374: FREE_LOCK(&lk);
! 3375: panic("handle_disk_io_initiation: Unexpected type %s",
! 3376: TYPENAME(wk->wk_type));
! 3377: /* NOTREACHED */
! 3378: }
! 3379: }
! 3380:
! 3381: FREE_LOCK(&lk);
! 3382: }
! 3383:
! 3384: /*
! 3385: * Called from within the procedure above to deal with unsatisfied
! 3386: * allocation dependencies in a directory. The buffer must be locked,
! 3387: * thus, no I/O completion operations can occur while we are
! 3388: * manipulating its associated dependencies.
! 3389: */
! 3390: STATIC void
! 3391: initiate_write_filepage(pagedep, bp)
! 3392: struct pagedep *pagedep;
! 3393: struct buf *bp;
! 3394: {
! 3395: struct diradd *dap;
! 3396: struct direct *ep;
! 3397: int i;
! 3398:
! 3399: if (pagedep->pd_state & IOSTARTED) {
! 3400: /*
! 3401: * This can only happen if there is a driver that does not
! 3402: * understand chaining. Here biodone will reissue the call
! 3403: * to strategy for the incomplete buffers.
! 3404: */
! 3405: printf("initiate_write_filepage: already started\n");
! 3406: return;
! 3407: }
! 3408: pagedep->pd_state |= IOSTARTED;
! 3409: for (i = 0; i < DAHASHSZ; i++) {
! 3410: LIST_FOREACH(dap, &pagedep->pd_diraddhd[i], da_pdlist) {
! 3411: ep = (struct direct *)
! 3412: ((char *)bp->b_data + dap->da_offset);
! 3413: if (ep->d_ino != dap->da_newinum) {
! 3414: FREE_LOCK(&lk);
! 3415: panic("%s: dir inum %d != new %d",
! 3416: "initiate_write_filepage",
! 3417: ep->d_ino, dap->da_newinum);
! 3418: }
! 3419: if (dap->da_state & DIRCHG)
! 3420: ep->d_ino = dap->da_previous->dm_oldinum;
! 3421: else
! 3422: ep->d_ino = 0;
! 3423: dap->da_state &= ~ATTACHED;
! 3424: dap->da_state |= UNDONE;
! 3425: }
! 3426: }
! 3427: }
! 3428:
! 3429: /*
! 3430: * Called from within the procedure above to deal with unsatisfied
! 3431: * allocation dependencies in an inodeblock. The buffer must be
! 3432: * locked, thus, no I/O completion operations can occur while we
! 3433: * are manipulating its associated dependencies.
! 3434: */
! 3435: STATIC void
! 3436: initiate_write_inodeblock_ufs1(inodedep, bp)
! 3437: struct inodedep *inodedep;
! 3438: struct buf *bp; /* The inode block */
! 3439: {
! 3440: struct allocdirect *adp, *lastadp;
! 3441: struct ufs1_dinode *dp;
! 3442: struct fs *fs;
! 3443: #ifdef DIAGNOSTIC
! 3444: daddr64_t prevlbn = 0;
! 3445: int32_t d1, d2;
! 3446: #endif
! 3447: int i, deplist;
! 3448:
! 3449: if (inodedep->id_state & IOSTARTED) {
! 3450: FREE_LOCK(&lk);
! 3451: panic("initiate_write_inodeblock: already started");
! 3452: }
! 3453: inodedep->id_state |= IOSTARTED;
! 3454: fs = inodedep->id_fs;
! 3455: dp = (struct ufs1_dinode *)bp->b_data +
! 3456: ino_to_fsbo(fs, inodedep->id_ino);
! 3457: /*
! 3458: * If the bitmap is not yet written, then the allocated
! 3459: * inode cannot be written to disk.
! 3460: */
! 3461: if ((inodedep->id_state & DEPCOMPLETE) == 0) {
! 3462: if (inodedep->id_savedino1 != NULL) {
! 3463: FREE_LOCK(&lk);
! 3464: panic("initiate_write_inodeblock: already doing I/O");
! 3465: }
! 3466: FREE_LOCK(&lk);
! 3467: MALLOC(inodedep->id_savedino1, struct ufs1_dinode *,
! 3468: sizeof(struct ufs1_dinode), M_INODEDEP, M_WAITOK);
! 3469: ACQUIRE_LOCK(&lk);
! 3470: *inodedep->id_savedino1 = *dp;
! 3471: bzero((caddr_t)dp, sizeof(struct ufs1_dinode));
! 3472: return;
! 3473: }
! 3474: /*
! 3475: * If no dependencies, then there is nothing to roll back.
! 3476: */
! 3477: inodedep->id_savedsize = dp->di_size;
! 3478: if (TAILQ_FIRST(&inodedep->id_inoupdt) == NULL)
! 3479: return;
! 3480: /*
! 3481: * Set the dependencies to busy.
! 3482: */
! 3483: for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp;
! 3484: adp = TAILQ_NEXT(adp, ad_next)) {
! 3485: #ifdef DIAGNOSTIC
! 3486: if (deplist != 0 && prevlbn >= adp->ad_lbn) {
! 3487: FREE_LOCK(&lk);
! 3488: panic("softdep_write_inodeblock: lbn order");
! 3489: }
! 3490: prevlbn = adp->ad_lbn;
! 3491: if (adp->ad_lbn < NDADDR &&
! 3492: (d1 = dp->di_db[adp->ad_lbn]) != (d2 = adp->ad_newblkno)) {
! 3493: FREE_LOCK(&lk);
! 3494: panic("%s: direct pointer #%ld mismatch %d != %d",
! 3495: "softdep_write_inodeblock", adp->ad_lbn, d1, d2);
! 3496: }
! 3497: if (adp->ad_lbn >= NDADDR &&
! 3498: (d1 = dp->di_ib[adp->ad_lbn - NDADDR]) !=
! 3499: (d2 = adp->ad_newblkno)) {
! 3500: FREE_LOCK(&lk);
! 3501: panic("%s: indirect pointer #%ld mismatch %d != %d",
! 3502: "softdep_write_inodeblock", adp->ad_lbn - NDADDR,
! 3503: d1, d2);
! 3504: }
! 3505: deplist |= 1 << adp->ad_lbn;
! 3506: if ((adp->ad_state & ATTACHED) == 0) {
! 3507: FREE_LOCK(&lk);
! 3508: panic("softdep_write_inodeblock: Unknown state 0x%x",
! 3509: adp->ad_state);
! 3510: }
! 3511: #endif /* DIAGNOSTIC */
! 3512: adp->ad_state &= ~ATTACHED;
! 3513: adp->ad_state |= UNDONE;
! 3514: }
! 3515: /*
! 3516: * The on-disk inode cannot claim to be any larger than the last
! 3517: * fragment that has been written. Otherwise, the on-disk inode
! 3518: * might have fragments that were not the last block in the file
! 3519: * which would corrupt the filesystem.
! 3520: */
! 3521: for (lastadp = NULL, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp;
! 3522: lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)) {
! 3523: if (adp->ad_lbn >= NDADDR)
! 3524: break;
! 3525: dp->di_db[adp->ad_lbn] = adp->ad_oldblkno;
! 3526: /* keep going until hitting a rollback to a frag */
! 3527: if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize)
! 3528: continue;
! 3529: dp->di_size = fs->fs_bsize * adp->ad_lbn + adp->ad_oldsize;
! 3530: for (i = adp->ad_lbn + 1; i < NDADDR; i++) {
! 3531: #ifdef DIAGNOSTIC
! 3532: if (dp->di_db[i] != 0 && (deplist & (1 << i)) == 0) {
! 3533: FREE_LOCK(&lk);
! 3534: panic("softdep_write_inodeblock: lost dep1");
! 3535: }
! 3536: #endif /* DIAGNOSTIC */
! 3537: dp->di_db[i] = 0;
! 3538: }
! 3539: for (i = 0; i < NIADDR; i++) {
! 3540: #ifdef DIAGNOSTIC
! 3541: if (dp->di_ib[i] != 0 &&
! 3542: (deplist & ((1 << NDADDR) << i)) == 0) {
! 3543: FREE_LOCK(&lk);
! 3544: panic("softdep_write_inodeblock: lost dep2");
! 3545: }
! 3546: #endif /* DIAGNOSTIC */
! 3547: dp->di_ib[i] = 0;
! 3548: }
! 3549: return;
! 3550: }
! 3551: /*
! 3552: * If we have zero'ed out the last allocated block of the file,
! 3553: * roll back the size to the last currently allocated block.
! 3554: * We know that this last allocated block is a full-sized as
! 3555: * we already checked for fragments in the loop above.
! 3556: */
! 3557: if (lastadp != NULL &&
! 3558: dp->di_size <= (lastadp->ad_lbn + 1) * fs->fs_bsize) {
! 3559: for (i = lastadp->ad_lbn; i >= 0; i--)
! 3560: if (dp->di_db[i] != 0)
! 3561: break;
! 3562: dp->di_size = (i + 1) * fs->fs_bsize;
! 3563: }
! 3564: /*
! 3565: * The only dependencies are for indirect blocks.
! 3566: *
! 3567: * The file size for indirect block additions is not guaranteed.
! 3568: * Such a guarantee would be non-trivial to achieve. The conventional
! 3569: * synchronous write implementation also does not make this guarantee.
! 3570: * Fsck should catch and fix discrepancies. Arguably, the file size
! 3571: * can be over-estimated without destroying integrity when the file
! 3572: * moves into the indirect blocks (i.e., is large). If we want to
! 3573: * postpone fsck, we are stuck with this argument.
! 3574: */
! 3575: for (; adp; adp = TAILQ_NEXT(adp, ad_next))
! 3576: dp->di_ib[adp->ad_lbn - NDADDR] = 0;
! 3577: }
! 3578:
! 3579: #ifdef FFS2
! 3580: /*
! 3581: * Version of initiate_write_inodeblock that handles FFS2 dinodes.
! 3582: */
! 3583: STATIC void
! 3584: initiate_write_inodeblock_ufs2(inodedep, bp)
! 3585: struct inodedep *inodedep;
! 3586: struct buf *bp; /* The inode block */
! 3587: {
! 3588: struct allocdirect *adp, *lastadp;
! 3589: struct ufs2_dinode *dp;
! 3590: struct fs *fs = inodedep->id_fs;
! 3591: #ifdef DIAGNOSTIC
! 3592: daddr64_t prevlbn = -1, d1, d2;
! 3593: #endif
! 3594: int deplist, i;
! 3595:
! 3596: if (inodedep->id_state & IOSTARTED)
! 3597: panic("initiate_write_inodeblock_ufs2: already started");
! 3598: inodedep->id_state |= IOSTARTED;
! 3599: fs = inodedep->id_fs;
! 3600: dp = (struct ufs2_dinode *)bp->b_data +
! 3601: ino_to_fsbo(fs, inodedep->id_ino);
! 3602: /*
! 3603: * If the bitmap is not yet written, then the allocated
! 3604: * inode cannot be written to disk.
! 3605: */
! 3606: if ((inodedep->id_state & DEPCOMPLETE) == 0) {
! 3607: if (inodedep->id_savedino2 != NULL)
! 3608: panic("initiate_write_inodeblock_ufs2: I/O underway");
! 3609: MALLOC(inodedep->id_savedino2, struct ufs2_dinode *,
! 3610: sizeof(struct ufs2_dinode), M_INODEDEP, M_WAITOK);
! 3611: *inodedep->id_savedino2 = *dp;
! 3612: bzero((caddr_t)dp, sizeof(struct ufs2_dinode));
! 3613: return;
! 3614: }
! 3615: /*
! 3616: * If no dependencies, then there is nothing to roll back.
! 3617: */
! 3618: inodedep->id_savedsize = dp->di_size;
! 3619: if (TAILQ_FIRST(&inodedep->id_inoupdt) == NULL)
! 3620: return;
! 3621:
! 3622: #ifdef notyet
! 3623: inodedep->id_savedextsize = dp->di_extsize;
! 3624: if (TAILQ_FIRST(&inodedep->id_inoupdt) == NULL &&
! 3625: TAILQ_FIRST(&inodedep->id_extupdt) == NULL)
! 3626: return;
! 3627: /*
! 3628: * Set the ext data dependencies to busy.
! 3629: */
! 3630: for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_extupdt); adp;
! 3631: adp = TAILQ_NEXT(adp, ad_next)) {
! 3632: #ifdef DIAGNOSTIC
! 3633: if (deplist != 0 && prevlbn >= adp->ad_lbn) {
! 3634: FREE_LOCK(&lk);
! 3635: panic("softdep_write_inodeblock: lbn order");
! 3636: }
! 3637: prevlbn = adp->ad_lbn;
! 3638: if ((d1 = dp->di_extb[adp->ad_lbn]) !=
! 3639: (d2 = adp->ad_newblkno)) {
! 3640: FREE_LOCK(&lk);
! 3641: panic("%s: direct pointer #%ld mismatch %ld != %ld",
! 3642: "softdep_write_inodeblock", adp->ad_lbn, d1, d2);
! 3643: }
! 3644: deplist |= 1 << adp->ad_lbn;
! 3645: if ((adp->ad_state & ATTACHED) == 0) {
! 3646: FREE_LOCK(&lk);
! 3647: panic("softdep_write_inodeblock: Unknown state 0x%x",
! 3648: adp->ad_state);
! 3649: }
! 3650: #endif /* DIAGNOSTIC */
! 3651: adp->ad_state &= ~ATTACHED;
! 3652: adp->ad_state |= UNDONE;
! 3653: }
! 3654: /*
! 3655: * The on-disk inode cannot claim to be any larger than the last
! 3656: * fragment that has been written. Otherwise, the on-disk inode
! 3657: * might have fragments that were not the last block in the ext
! 3658: * data which would corrupt the filesystem.
! 3659: */
! 3660: for (lastadp = NULL, adp = TAILQ_FIRST(&inodedep->id_extupdt); adp;
! 3661: lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)) {
! 3662: dp->di_extb[adp->ad_lbn] = adp->ad_oldblkno;
! 3663: /* keep going until hitting a rollback to a frag */
! 3664: if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize)
! 3665: continue;
! 3666: dp->di_extsize = fs->fs_bsize * adp->ad_lbn + adp->ad_oldsize;
! 3667: for (i = adp->ad_lbn + 1; i < NXADDR; i++) {
! 3668: #ifdef DIAGNOSTIC
! 3669: if (dp->di_extb[i] != 0 && (deplist & (1 << i)) == 0) {
! 3670: FREE_LOCK(&lk);
! 3671: panic("softdep_write_inodeblock: lost dep1");
! 3672: }
! 3673: #endif /* DIAGNOSTIC */
! 3674: dp->di_extb[i] = 0;
! 3675: }
! 3676: lastadp = NULL;
! 3677: break;
! 3678: }
! 3679: /*
! 3680: * If we have zero'ed out the last allocated block of the ext
! 3681: * data, roll back the size to the last currently allocated block.
! 3682: * We know that this last allocated block is a full-sized as
! 3683: * we already checked for fragments in the loop above.
! 3684: */
! 3685: if (lastadp != NULL &&
! 3686: dp->di_extsize <= (lastadp->ad_lbn + 1) * fs->fs_bsize) {
! 3687: for (i = lastadp->ad_lbn; i >= 0; i--)
! 3688: if (dp->di_extb[i] != 0)
! 3689: break;
! 3690: dp->di_extsize = (i + 1) * fs->fs_bsize;
! 3691: }
! 3692: #endif /* notyet */
! 3693:
! 3694: /*
! 3695: * Set the file data dependencies to busy.
! 3696: */
! 3697: for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp;
! 3698: adp = TAILQ_NEXT(adp, ad_next)) {
! 3699: #ifdef DIAGNOSTIC
! 3700: if (deplist != 0 && prevlbn >= adp->ad_lbn) {
! 3701: FREE_LOCK(&lk);
! 3702: panic("softdep_write_inodeblock: lbn order");
! 3703: }
! 3704: prevlbn = adp->ad_lbn;
! 3705: if (adp->ad_lbn < NDADDR &&
! 3706: (d1 = dp->di_db[adp->ad_lbn]) != (d2 = adp->ad_newblkno)) {
! 3707: FREE_LOCK(&lk);
! 3708: panic("%s: direct pointer #%ld mismatch %ld != %ld",
! 3709: "softdep_write_inodeblock", adp->ad_lbn, d1, d2);
! 3710: }
! 3711: if (adp->ad_lbn >= NDADDR &&
! 3712: (d1 = dp->di_ib[adp->ad_lbn - NDADDR]) !=
! 3713: (d2 = adp->ad_newblkno)) {
! 3714: FREE_LOCK(&lk);
! 3715: panic("%s: indirect pointer #%ld mismatch %ld != %ld",
! 3716: "softdep_write_inodeblock", adp->ad_lbn - NDADDR,
! 3717: d1, d2);
! 3718: }
! 3719: deplist |= 1 << adp->ad_lbn;
! 3720: if ((adp->ad_state & ATTACHED) == 0) {
! 3721: FREE_LOCK(&lk);
! 3722: panic("softdep_write_inodeblock: Unknown state 0x%x",
! 3723: adp->ad_state);
! 3724: }
! 3725: #endif /* DIAGNOSTIC */
! 3726: adp->ad_state &= ~ATTACHED;
! 3727: adp->ad_state |= UNDONE;
! 3728: }
! 3729: /*
! 3730: * The on-disk inode cannot claim to be any larger than the last
! 3731: * fragment that has been written. Otherwise, the on-disk inode
! 3732: * might have fragments that were not the last block in the file
! 3733: * which would corrupt the filesystem.
! 3734: */
! 3735: for (lastadp = NULL, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp;
! 3736: lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)) {
! 3737: if (adp->ad_lbn >= NDADDR)
! 3738: break;
! 3739: dp->di_db[adp->ad_lbn] = adp->ad_oldblkno;
! 3740: /* keep going until hitting a rollback to a frag */
! 3741: if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize)
! 3742: continue;
! 3743: dp->di_size = fs->fs_bsize * adp->ad_lbn + adp->ad_oldsize;
! 3744: for (i = adp->ad_lbn + 1; i < NDADDR; i++) {
! 3745: #ifdef DIAGNOSTIC
! 3746: if (dp->di_db[i] != 0 && (deplist & (1 << i)) == 0) {
! 3747: FREE_LOCK(&lk);
! 3748: panic("softdep_write_inodeblock: lost dep2");
! 3749: }
! 3750: #endif /* DIAGNOSTIC */
! 3751: dp->di_db[i] = 0;
! 3752: }
! 3753: for (i = 0; i < NIADDR; i++) {
! 3754: #ifdef DIAGNOSTIC
! 3755: if (dp->di_ib[i] != 0 &&
! 3756: (deplist & ((1 << NDADDR) << i)) == 0) {
! 3757: FREE_LOCK(&lk);
! 3758: panic("softdep_write_inodeblock: lost dep3");
! 3759: }
! 3760: #endif /* DIAGNOSTIC */
! 3761: dp->di_ib[i] = 0;
! 3762: }
! 3763: return;
! 3764: }
! 3765: /*
! 3766: * If we have zero'ed out the last allocated block of the file,
! 3767: * roll back the size to the last currently allocated block.
! 3768: * We know that this last allocated block is a full-sized as
! 3769: * we already checked for fragments in the loop above.
! 3770: */
! 3771: if (lastadp != NULL &&
! 3772: dp->di_size <= (lastadp->ad_lbn + 1) * fs->fs_bsize) {
! 3773: for (i = lastadp->ad_lbn; i >= 0; i--)
! 3774: if (dp->di_db[i] != 0)
! 3775: break;
! 3776: dp->di_size = (i + 1) * fs->fs_bsize;
! 3777: }
! 3778: /*
! 3779: * The only dependencies are for indirect blocks.
! 3780: *
! 3781: * The file size for indirect block additions is not guaranteed.
! 3782: * Such a guarantee would be non-trivial to achieve. The conventional
! 3783: * synchronous write implementation also does not make this guarantee.
! 3784: * Fsck should catch and fix discrepancies. Arguably, the file size
! 3785: * can be over-estimated without destroying integrity when the file
! 3786: * moves into the indirect blocks (i.e., is large). If we want to
! 3787: * postpone fsck, we are stuck with this argument.
! 3788: */
! 3789: for (; adp; adp = TAILQ_NEXT(adp, ad_next))
! 3790: dp->di_ib[adp->ad_lbn - NDADDR] = 0;
! 3791: }
! 3792: #endif /* FFS2 */
! 3793:
! 3794: /*
! 3795: * This routine is called during the completion interrupt
! 3796: * service routine for a disk write (from the procedure called
! 3797: * by the device driver to inform the file system caches of
! 3798: * a request completion). It should be called early in this
! 3799: * procedure, before the block is made available to other
! 3800: * processes or other routines are called.
! 3801: */
! 3802: void
! 3803: softdep_disk_write_complete(bp)
! 3804: struct buf *bp; /* describes the completed disk write */
! 3805: {
! 3806: struct worklist *wk;
! 3807: struct workhead reattach;
! 3808: struct newblk *newblk;
! 3809: struct allocindir *aip;
! 3810: struct allocdirect *adp;
! 3811: struct indirdep *indirdep;
! 3812: struct inodedep *inodedep;
! 3813: struct bmsafemap *bmsafemap;
! 3814:
! 3815: /*
! 3816: * If an error occurred while doing the write, then the data
! 3817: * has not hit the disk and the dependencies cannot be unrolled.
! 3818: */
! 3819: if ((bp->b_flags & B_ERROR) && !(bp->b_flags & B_INVAL))
! 3820: return;
! 3821:
! 3822: #ifdef DEBUG
! 3823: if (lk.lkt_held != -1)
! 3824: panic("softdep_disk_write_complete: lock is held");
! 3825: lk.lkt_held = -2;
! 3826: #endif
! 3827: LIST_INIT(&reattach);
! 3828: while ((wk = LIST_FIRST(&bp->b_dep)) != NULL) {
! 3829: WORKLIST_REMOVE(wk);
! 3830: switch (wk->wk_type) {
! 3831:
! 3832: case D_PAGEDEP:
! 3833: if (handle_written_filepage(WK_PAGEDEP(wk), bp))
! 3834: WORKLIST_INSERT(&reattach, wk);
! 3835: continue;
! 3836:
! 3837: case D_INODEDEP:
! 3838: if (handle_written_inodeblock(WK_INODEDEP(wk), bp))
! 3839: WORKLIST_INSERT(&reattach, wk);
! 3840: continue;
! 3841:
! 3842: case D_BMSAFEMAP:
! 3843: bmsafemap = WK_BMSAFEMAP(wk);
! 3844: while ((newblk = LIST_FIRST(&bmsafemap->sm_newblkhd))) {
! 3845: newblk->nb_state |= DEPCOMPLETE;
! 3846: newblk->nb_bmsafemap = NULL;
! 3847: LIST_REMOVE(newblk, nb_deps);
! 3848: }
! 3849: while ((adp =
! 3850: LIST_FIRST(&bmsafemap->sm_allocdirecthd))) {
! 3851: adp->ad_state |= DEPCOMPLETE;
! 3852: adp->ad_buf = NULL;
! 3853: LIST_REMOVE(adp, ad_deps);
! 3854: handle_allocdirect_partdone(adp);
! 3855: }
! 3856: while ((aip =
! 3857: LIST_FIRST(&bmsafemap->sm_allocindirhd))) {
! 3858: aip->ai_state |= DEPCOMPLETE;
! 3859: aip->ai_buf = NULL;
! 3860: LIST_REMOVE(aip, ai_deps);
! 3861: handle_allocindir_partdone(aip);
! 3862: }
! 3863: while ((inodedep =
! 3864: LIST_FIRST(&bmsafemap->sm_inodedephd)) != NULL) {
! 3865: inodedep->id_state |= DEPCOMPLETE;
! 3866: LIST_REMOVE(inodedep, id_deps);
! 3867: inodedep->id_buf = NULL;
! 3868: }
! 3869: WORKITEM_FREE(bmsafemap, D_BMSAFEMAP);
! 3870: continue;
! 3871:
! 3872: case D_MKDIR:
! 3873: handle_written_mkdir(WK_MKDIR(wk), MKDIR_BODY);
! 3874: continue;
! 3875:
! 3876: case D_ALLOCDIRECT:
! 3877: adp = WK_ALLOCDIRECT(wk);
! 3878: adp->ad_state |= COMPLETE;
! 3879: handle_allocdirect_partdone(adp);
! 3880: continue;
! 3881:
! 3882: case D_ALLOCINDIR:
! 3883: aip = WK_ALLOCINDIR(wk);
! 3884: aip->ai_state |= COMPLETE;
! 3885: handle_allocindir_partdone(aip);
! 3886: continue;
! 3887:
! 3888: case D_INDIRDEP:
! 3889: indirdep = WK_INDIRDEP(wk);
! 3890: if (indirdep->ir_state & GOINGAWAY)
! 3891: panic("disk_write_complete: indirdep gone");
! 3892: bcopy(indirdep->ir_saveddata, bp->b_data, bp->b_bcount);
! 3893: free(indirdep->ir_saveddata, M_INDIRDEP);
! 3894: indirdep->ir_saveddata = 0;
! 3895: indirdep->ir_state &= ~UNDONE;
! 3896: indirdep->ir_state |= ATTACHED;
! 3897: while ((aip = LIST_FIRST(&indirdep->ir_donehd)) != 0) {
! 3898: handle_allocindir_partdone(aip);
! 3899: if (aip == LIST_FIRST(&indirdep->ir_donehd))
! 3900: panic("disk_write_complete: not gone");
! 3901: }
! 3902: WORKLIST_INSERT(&reattach, wk);
! 3903: if ((bp->b_flags & B_DELWRI) == 0)
! 3904: stat_indir_blk_ptrs++;
! 3905: buf_dirty(bp);
! 3906: continue;
! 3907:
! 3908: default:
! 3909: panic("handle_disk_write_complete: Unknown type %s",
! 3910: TYPENAME(wk->wk_type));
! 3911: /* NOTREACHED */
! 3912: }
! 3913: }
! 3914: /*
! 3915: * Reattach any requests that must be redone.
! 3916: */
! 3917: while ((wk = LIST_FIRST(&reattach)) != NULL) {
! 3918: WORKLIST_REMOVE(wk);
! 3919: WORKLIST_INSERT(&bp->b_dep, wk);
! 3920: }
! 3921: #ifdef DEBUG
! 3922: if (lk.lkt_held != -2)
! 3923: panic("softdep_disk_write_complete: lock lost");
! 3924: lk.lkt_held = -1;
! 3925: #endif
! 3926: }
! 3927:
! 3928: /*
! 3929: * Called from within softdep_disk_write_complete above. Note that
! 3930: * this routine is always called from interrupt level with further
! 3931: * splbio interrupts blocked.
! 3932: */
! 3933: STATIC void
! 3934: handle_allocdirect_partdone(adp)
! 3935: struct allocdirect *adp; /* the completed allocdirect */
! 3936: {
! 3937: struct allocdirect *listadp;
! 3938: struct inodedep *inodedep;
! 3939: long bsize, delay;
! 3940:
! 3941: splassert(IPL_BIO);
! 3942:
! 3943: if ((adp->ad_state & ALLCOMPLETE) != ALLCOMPLETE)
! 3944: return;
! 3945: if (adp->ad_buf != NULL)
! 3946: panic("handle_allocdirect_partdone: dangling dep");
! 3947:
! 3948: /*
! 3949: * The on-disk inode cannot claim to be any larger than the last
! 3950: * fragment that has been written. Otherwise, the on-disk inode
! 3951: * might have fragments that were not the last block in the file
! 3952: * which would corrupt the filesystem. Thus, we cannot free any
! 3953: * allocdirects after one whose ad_oldblkno claims a fragment as
! 3954: * these blocks must be rolled back to zero before writing the inode.
! 3955: * We check the currently active set of allocdirects in id_inoupdt.
! 3956: */
! 3957: inodedep = adp->ad_inodedep;
! 3958: bsize = inodedep->id_fs->fs_bsize;
! 3959: TAILQ_FOREACH(listadp, &inodedep->id_inoupdt, ad_next) {
! 3960: /* found our block */
! 3961: if (listadp == adp)
! 3962: break;
! 3963: /* continue if ad_oldlbn is not a fragment */
! 3964: if (listadp->ad_oldsize == 0 ||
! 3965: listadp->ad_oldsize == bsize)
! 3966: continue;
! 3967: /* hit a fragment */
! 3968: return;
! 3969: }
! 3970: /*
! 3971: * If we have reached the end of the current list without
! 3972: * finding the just finished dependency, then it must be
! 3973: * on the future dependency list. Future dependencies cannot
! 3974: * be freed until they are moved to the current list.
! 3975: */
! 3976: if (listadp == NULL) {
! 3977: #ifdef DEBUG
! 3978: TAILQ_FOREACH(listadp, &inodedep->id_newinoupdt, ad_next)
! 3979: /* found our block */
! 3980: if (listadp == adp)
! 3981: break;
! 3982: if (listadp == NULL)
! 3983: panic("handle_allocdirect_partdone: lost dep");
! 3984: #endif /* DEBUG */
! 3985: return;
! 3986: }
! 3987: /*
! 3988: * If we have found the just finished dependency, then free
! 3989: * it along with anything that follows it that is complete.
! 3990: * If the inode still has a bitmap dependency, then it has
! 3991: * never been written to disk, hence the on-disk inode cannot
! 3992: * reference the old fragment so we can free it without delay.
! 3993: */
! 3994: delay = (inodedep->id_state & DEPCOMPLETE);
! 3995: for (; adp; adp = listadp) {
! 3996: listadp = TAILQ_NEXT(adp, ad_next);
! 3997: if ((adp->ad_state & ALLCOMPLETE) != ALLCOMPLETE)
! 3998: return;
! 3999: free_allocdirect(&inodedep->id_inoupdt, adp, delay);
! 4000: }
! 4001: }
! 4002:
! 4003: /*
! 4004: * Called from within softdep_disk_write_complete above. Note that
! 4005: * this routine is always called from interrupt level with further
! 4006: * splbio interrupts blocked.
! 4007: */
! 4008: STATIC void
! 4009: handle_allocindir_partdone(aip)
! 4010: struct allocindir *aip; /* the completed allocindir */
! 4011: {
! 4012: struct indirdep *indirdep;
! 4013:
! 4014: splassert(IPL_BIO);
! 4015:
! 4016: if ((aip->ai_state & ALLCOMPLETE) != ALLCOMPLETE)
! 4017: return;
! 4018: if (aip->ai_buf != NULL)
! 4019: panic("handle_allocindir_partdone: dangling dependency");
! 4020: indirdep = aip->ai_indirdep;
! 4021: if (indirdep->ir_state & UNDONE) {
! 4022: LIST_REMOVE(aip, ai_next);
! 4023: LIST_INSERT_HEAD(&indirdep->ir_donehd, aip, ai_next);
! 4024: return;
! 4025: }
! 4026: if (indirdep->ir_state & UFS1FMT)
! 4027: ((int32_t *)indirdep->ir_savebp->b_data)[aip->ai_offset] =
! 4028: aip->ai_newblkno;
! 4029: else
! 4030: ((int64_t *)indirdep->ir_savebp->b_data)[aip->ai_offset] =
! 4031: aip->ai_newblkno;
! 4032: LIST_REMOVE(aip, ai_next);
! 4033: if (aip->ai_freefrag != NULL)
! 4034: add_to_worklist(&aip->ai_freefrag->ff_list);
! 4035: WORKITEM_FREE(aip, D_ALLOCINDIR);
! 4036: }
! 4037:
! 4038: /*
! 4039: * Called from within softdep_disk_write_complete above to restore
! 4040: * in-memory inode block contents to their most up-to-date state. Note
! 4041: * that this routine is always called from interrupt level with further
! 4042: * splbio interrupts blocked.
! 4043: */
! 4044: STATIC int
! 4045: handle_written_inodeblock(inodedep, bp)
! 4046: struct inodedep *inodedep;
! 4047: struct buf *bp; /* buffer containing the inode block */
! 4048: {
! 4049: struct worklist *wk, *filefree;
! 4050: struct allocdirect *adp, *nextadp;
! 4051: struct ufs1_dinode *dp1 = NULL;
! 4052: struct ufs2_dinode *dp2 = NULL;
! 4053: int hadchanges, fstype;
! 4054:
! 4055: splassert(IPL_BIO);
! 4056:
! 4057: if ((inodedep->id_state & IOSTARTED) == 0)
! 4058: panic("handle_written_inodeblock: not started");
! 4059: inodedep->id_state &= ~IOSTARTED;
! 4060:
! 4061: if (inodedep->id_fs->fs_magic == FS_UFS1_MAGIC) {
! 4062: fstype = UM_UFS1;
! 4063: dp1 = (struct ufs1_dinode *) bp->b_data +
! 4064: ino_to_fsbo(inodedep->id_fs, inodedep->id_ino);
! 4065: } else {
! 4066: fstype = UM_UFS2;
! 4067: dp2 = (struct ufs2_dinode *) bp->b_data +
! 4068: ino_to_fsbo(inodedep->id_fs, inodedep->id_ino);
! 4069: }
! 4070:
! 4071: /*
! 4072: * If we had to rollback the inode allocation because of
! 4073: * bitmaps being incomplete, then simply restore it.
! 4074: * Keep the block dirty so that it will not be reclaimed until
! 4075: * all associated dependencies have been cleared and the
! 4076: * corresponding updates written to disk.
! 4077: */
! 4078: if (inodedep->id_savedino1 != NULL) {
! 4079: if (fstype == UM_UFS1)
! 4080: *dp1 = *inodedep->id_savedino1;
! 4081: else
! 4082: *dp2 = *inodedep->id_savedino2;
! 4083: FREE(inodedep->id_savedino1, M_INODEDEP);
! 4084: inodedep->id_savedino1 = NULL;
! 4085: if ((bp->b_flags & B_DELWRI) == 0)
! 4086: stat_inode_bitmap++;
! 4087: buf_dirty(bp);
! 4088: return (1);
! 4089: }
! 4090: inodedep->id_state |= COMPLETE;
! 4091: /*
! 4092: * Roll forward anything that had to be rolled back before
! 4093: * the inode could be updated.
! 4094: */
! 4095: hadchanges = 0;
! 4096: for (adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp; adp = nextadp) {
! 4097: nextadp = TAILQ_NEXT(adp, ad_next);
! 4098: if (adp->ad_state & ATTACHED)
! 4099: panic("handle_written_inodeblock: new entry");
! 4100: if (fstype == UM_UFS1) {
! 4101: if (adp->ad_lbn < NDADDR) {
! 4102: if (dp1->di_db[adp->ad_lbn] != adp->ad_oldblkno)
! 4103: panic("%s: %s #%ld mismatch %d != %d",
! 4104: "handle_written_inodeblock",
! 4105: "direct pointer", adp->ad_lbn,
! 4106: dp1->di_db[adp->ad_lbn],
! 4107: adp->ad_oldblkno);
! 4108: dp1->di_db[adp->ad_lbn] = adp->ad_newblkno;
! 4109: } else {
! 4110: if (dp1->di_ib[adp->ad_lbn - NDADDR] != 0)
! 4111: panic("%s: %s #%ld allocated as %d",
! 4112: "handle_written_inodeblock",
! 4113: "indirect pointer",
! 4114: adp->ad_lbn - NDADDR,
! 4115: dp1->di_ib[adp->ad_lbn - NDADDR]);
! 4116: dp1->di_ib[adp->ad_lbn - NDADDR] =
! 4117: adp->ad_newblkno;
! 4118: }
! 4119: } else {
! 4120: if (adp->ad_lbn < NDADDR) {
! 4121: if (dp2->di_db[adp->ad_lbn] != adp->ad_oldblkno)
! 4122: panic("%s: %s #%ld mismatch %d != %d",
! 4123: "handle_written_inodeblock",
! 4124: "direct pointer", adp->ad_lbn,
! 4125: dp2->di_db[adp->ad_lbn],
! 4126: adp->ad_oldblkno);
! 4127: dp2->di_db[adp->ad_lbn] = adp->ad_newblkno;
! 4128: } else {
! 4129: if (dp2->di_ib[adp->ad_lbn - NDADDR] != 0)
! 4130: panic("%s: %s #%ld allocated as %d",
! 4131: "handle_written_inodeblock",
! 4132: "indirect pointer",
! 4133: adp->ad_lbn - NDADDR,
! 4134: dp2->di_ib[adp->ad_lbn - NDADDR]);
! 4135: dp2->di_ib[adp->ad_lbn - NDADDR] =
! 4136: adp->ad_newblkno;
! 4137: }
! 4138: }
! 4139: adp->ad_state &= ~UNDONE;
! 4140: adp->ad_state |= ATTACHED;
! 4141: hadchanges = 1;
! 4142: }
! 4143: if (hadchanges && (bp->b_flags & B_DELWRI) == 0)
! 4144: stat_direct_blk_ptrs++;
! 4145: /*
! 4146: * Reset the file size to its most up-to-date value.
! 4147: */
! 4148: if (inodedep->id_savedsize == -1)
! 4149: panic("handle_written_inodeblock: bad size");
! 4150:
! 4151: if (fstype == UM_UFS1) {
! 4152: if (dp1->di_size != inodedep->id_savedsize) {
! 4153: dp1->di_size = inodedep->id_savedsize;
! 4154: hadchanges = 1;
! 4155: }
! 4156: } else {
! 4157: if (dp2->di_size != inodedep->id_savedsize) {
! 4158: dp2->di_size = inodedep->id_savedsize;
! 4159: hadchanges = 1;
! 4160: }
! 4161: }
! 4162: inodedep->id_savedsize = -1;
! 4163: /*
! 4164: * If there were any rollbacks in the inode block, then it must be
! 4165: * marked dirty so that its will eventually get written back in
! 4166: * its correct form.
! 4167: */
! 4168: if (hadchanges)
! 4169: buf_dirty(bp);
! 4170: /*
! 4171: * Process any allocdirects that completed during the update.
! 4172: */
! 4173: if ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != NULL)
! 4174: handle_allocdirect_partdone(adp);
! 4175: /*
! 4176: * Process deallocations that were held pending until the
! 4177: * inode had been written to disk. Freeing of the inode
! 4178: * is delayed until after all blocks have been freed to
! 4179: * avoid creation of new <vfsid, inum, lbn> triples
! 4180: * before the old ones have been deleted.
! 4181: */
! 4182: filefree = NULL;
! 4183: while ((wk = LIST_FIRST(&inodedep->id_bufwait)) != NULL) {
! 4184: WORKLIST_REMOVE(wk);
! 4185: switch (wk->wk_type) {
! 4186:
! 4187: case D_FREEFILE:
! 4188: /*
! 4189: * We defer adding filefree to the worklist until
! 4190: * all other additions have been made to ensure
! 4191: * that it will be done after all the old blocks
! 4192: * have been freed.
! 4193: */
! 4194: if (filefree != NULL)
! 4195: panic("handle_written_inodeblock: filefree");
! 4196: filefree = wk;
! 4197: continue;
! 4198:
! 4199: case D_MKDIR:
! 4200: handle_written_mkdir(WK_MKDIR(wk), MKDIR_PARENT);
! 4201: continue;
! 4202:
! 4203: case D_DIRADD:
! 4204: diradd_inode_written(WK_DIRADD(wk), inodedep);
! 4205: continue;
! 4206:
! 4207: case D_FREEBLKS:
! 4208: wk->wk_state |= COMPLETE;
! 4209: if ((wk->wk_state & ALLCOMPLETE) != ALLCOMPLETE)
! 4210: continue;
! 4211: /* FALLTHROUGH */
! 4212: case D_FREEFRAG:
! 4213: case D_DIRREM:
! 4214: add_to_worklist(wk);
! 4215: continue;
! 4216:
! 4217: case D_NEWDIRBLK:
! 4218: free_newdirblk(WK_NEWDIRBLK(wk));
! 4219: continue;
! 4220:
! 4221: default:
! 4222: panic("handle_written_inodeblock: Unknown type %s",
! 4223: TYPENAME(wk->wk_type));
! 4224: /* NOTREACHED */
! 4225: }
! 4226: }
! 4227: if (filefree != NULL) {
! 4228: if (free_inodedep(inodedep) == 0)
! 4229: panic("handle_written_inodeblock: live inodedep");
! 4230: add_to_worklist(filefree);
! 4231: return (0);
! 4232: }
! 4233:
! 4234: /*
! 4235: * If no outstanding dependencies, free it.
! 4236: */
! 4237: if (free_inodedep(inodedep) || TAILQ_FIRST(&inodedep->id_inoupdt) == 0)
! 4238: return (0);
! 4239: return (hadchanges);
! 4240: }
! 4241:
! 4242: /*
! 4243: * Process a diradd entry after its dependent inode has been written.
! 4244: * This routine must be called with splbio interrupts blocked.
! 4245: */
! 4246: STATIC void
! 4247: diradd_inode_written(dap, inodedep)
! 4248: struct diradd *dap;
! 4249: struct inodedep *inodedep;
! 4250: {
! 4251: struct pagedep *pagedep;
! 4252:
! 4253: splassert(IPL_BIO);
! 4254:
! 4255: dap->da_state |= COMPLETE;
! 4256: if ((dap->da_state & ALLCOMPLETE) == ALLCOMPLETE) {
! 4257: if (dap->da_state & DIRCHG)
! 4258: pagedep = dap->da_previous->dm_pagedep;
! 4259: else
! 4260: pagedep = dap->da_pagedep;
! 4261: LIST_REMOVE(dap, da_pdlist);
! 4262: LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist);
! 4263: }
! 4264: WORKLIST_INSERT(&inodedep->id_pendinghd, &dap->da_list);
! 4265: }
! 4266:
! 4267: /*
! 4268: * Handle the completion of a mkdir dependency.
! 4269: */
! 4270: STATIC void
! 4271: handle_written_mkdir(mkdir, type)
! 4272: struct mkdir *mkdir;
! 4273: int type;
! 4274: {
! 4275: struct diradd *dap;
! 4276: struct pagedep *pagedep;
! 4277:
! 4278: splassert(IPL_BIO);
! 4279:
! 4280: if (mkdir->md_state != type)
! 4281: panic("handle_written_mkdir: bad type");
! 4282: dap = mkdir->md_diradd;
! 4283: dap->da_state &= ~type;
! 4284: if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) == 0)
! 4285: dap->da_state |= DEPCOMPLETE;
! 4286: if ((dap->da_state & ALLCOMPLETE) == ALLCOMPLETE) {
! 4287: if (dap->da_state & DIRCHG)
! 4288: pagedep = dap->da_previous->dm_pagedep;
! 4289: else
! 4290: pagedep = dap->da_pagedep;
! 4291: LIST_REMOVE(dap, da_pdlist);
! 4292: LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist);
! 4293: }
! 4294: LIST_REMOVE(mkdir, md_mkdirs);
! 4295: WORKITEM_FREE(mkdir, D_MKDIR);
! 4296: }
! 4297:
! 4298: /*
! 4299: * Called from within softdep_disk_write_complete above.
! 4300: * A write operation was just completed. Removed inodes can
! 4301: * now be freed and associated block pointers may be committed.
! 4302: * Note that this routine is always called from interrupt level
! 4303: * with further splbio interrupts blocked.
! 4304: */
! 4305: STATIC int
! 4306: handle_written_filepage(pagedep, bp)
! 4307: struct pagedep *pagedep;
! 4308: struct buf *bp; /* buffer containing the written page */
! 4309: {
! 4310: struct dirrem *dirrem;
! 4311: struct diradd *dap, *nextdap;
! 4312: struct direct *ep;
! 4313: int i, chgs;
! 4314:
! 4315: splassert(IPL_BIO);
! 4316:
! 4317: if ((pagedep->pd_state & IOSTARTED) == 0)
! 4318: panic("handle_written_filepage: not started");
! 4319: pagedep->pd_state &= ~IOSTARTED;
! 4320: /*
! 4321: * Process any directory removals that have been committed.
! 4322: */
! 4323: while ((dirrem = LIST_FIRST(&pagedep->pd_dirremhd)) != NULL) {
! 4324: LIST_REMOVE(dirrem, dm_next);
! 4325: dirrem->dm_dirinum = pagedep->pd_ino;
! 4326: add_to_worklist(&dirrem->dm_list);
! 4327: }
! 4328: /*
! 4329: * Free any directory additions that have been committed.
! 4330: * If it is a newly allocated block, we have to wait until
! 4331: * the on-disk directory inode claims the new block.
! 4332: */
! 4333: if ((pagedep->pd_state & NEWBLOCK) == 0)
! 4334: while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != NULL)
! 4335: free_diradd(dap);
! 4336: /*
! 4337: * Uncommitted directory entries must be restored.
! 4338: */
! 4339: for (chgs = 0, i = 0; i < DAHASHSZ; i++) {
! 4340: for (dap = LIST_FIRST(&pagedep->pd_diraddhd[i]); dap;
! 4341: dap = nextdap) {
! 4342: nextdap = LIST_NEXT(dap, da_pdlist);
! 4343: if (dap->da_state & ATTACHED)
! 4344: panic("handle_written_filepage: attached");
! 4345: ep = (struct direct *)
! 4346: ((char *)bp->b_data + dap->da_offset);
! 4347: ep->d_ino = dap->da_newinum;
! 4348: dap->da_state &= ~UNDONE;
! 4349: dap->da_state |= ATTACHED;
! 4350: chgs = 1;
! 4351: /*
! 4352: * If the inode referenced by the directory has
! 4353: * been written out, then the dependency can be
! 4354: * moved to the pending list.
! 4355: */
! 4356: if ((dap->da_state & ALLCOMPLETE) == ALLCOMPLETE) {
! 4357: LIST_REMOVE(dap, da_pdlist);
! 4358: LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap,
! 4359: da_pdlist);
! 4360: }
! 4361: }
! 4362: }
! 4363: /*
! 4364: * If there were any rollbacks in the directory, then it must be
! 4365: * marked dirty so that its will eventually get written back in
! 4366: * its correct form.
! 4367: */
! 4368: if (chgs) {
! 4369: if ((bp->b_flags & B_DELWRI) == 0)
! 4370: stat_dir_entry++;
! 4371: buf_dirty(bp);
! 4372: return (1);
! 4373: }
! 4374: /*
! 4375: * If we are not waiting for a new directory block to be
! 4376: * claimed by its inode, then the pagedep will be freed.
! 4377: * Otherwise it will remain to track any new entries on
! 4378: * the page in case they are fsync'ed.
! 4379: */
! 4380: if ((pagedep->pd_state & NEWBLOCK) == 0) {
! 4381: LIST_REMOVE(pagedep, pd_hash);
! 4382: WORKITEM_FREE(pagedep, D_PAGEDEP);
! 4383: }
! 4384: return (0);
! 4385: }
! 4386:
! 4387: /*
! 4388: * Writing back in-core inode structures.
! 4389: *
! 4390: * The file system only accesses an inode's contents when it occupies an
! 4391: * "in-core" inode structure. These "in-core" structures are separate from
! 4392: * the page frames used to cache inode blocks. Only the latter are
! 4393: * transferred to/from the disk. So, when the updated contents of the
! 4394: * "in-core" inode structure are copied to the corresponding in-memory inode
! 4395: * block, the dependencies are also transferred. The following procedure is
! 4396: * called when copying a dirty "in-core" inode to a cached inode block.
! 4397: */
! 4398:
! 4399: /*
! 4400: * Called when an inode is loaded from disk. If the effective link count
! 4401: * differed from the actual link count when it was last flushed, then we
! 4402: * need to ensure that the correct effective link count is put back.
! 4403: */
! 4404: void
! 4405: softdep_load_inodeblock(ip)
! 4406: struct inode *ip; /* the "in_core" copy of the inode */
! 4407: {
! 4408: struct inodedep *inodedep;
! 4409:
! 4410: /*
! 4411: * Check for alternate nlink count.
! 4412: */
! 4413: ip->i_effnlink = DIP(ip, nlink);
! 4414: ACQUIRE_LOCK(&lk);
! 4415: if (inodedep_lookup(ip->i_fs, ip->i_number, 0, &inodedep) == 0) {
! 4416: FREE_LOCK(&lk);
! 4417: return;
! 4418: }
! 4419: ip->i_effnlink -= inodedep->id_nlinkdelta;
! 4420: FREE_LOCK(&lk);
! 4421: }
! 4422:
! 4423: /*
! 4424: * This routine is called just before the "in-core" inode
! 4425: * information is to be copied to the in-memory inode block.
! 4426: * Recall that an inode block contains several inodes. If
! 4427: * the force flag is set, then the dependencies will be
! 4428: * cleared so that the update can always be made. Note that
! 4429: * the buffer is locked when this routine is called, so we
! 4430: * will never be in the middle of writing the inode block
! 4431: * to disk.
! 4432: */
! 4433: void
! 4434: softdep_update_inodeblock(ip, bp, waitfor)
! 4435: struct inode *ip; /* the "in_core" copy of the inode */
! 4436: struct buf *bp; /* the buffer containing the inode block */
! 4437: int waitfor; /* nonzero => update must be allowed */
! 4438: {
! 4439: struct inodedep *inodedep;
! 4440: struct worklist *wk;
! 4441: int error, gotit;
! 4442:
! 4443: /*
! 4444: * If the effective link count is not equal to the actual link
! 4445: * count, then we must track the difference in an inodedep while
! 4446: * the inode is (potentially) tossed out of the cache. Otherwise,
! 4447: * if there is no existing inodedep, then there are no dependencies
! 4448: * to track.
! 4449: */
! 4450: ACQUIRE_LOCK(&lk);
! 4451: if (inodedep_lookup(ip->i_fs, ip->i_number, 0, &inodedep) == 0) {
! 4452: FREE_LOCK(&lk);
! 4453: if (ip->i_effnlink != DIP(ip, nlink))
! 4454: panic("softdep_update_inodeblock: bad link count");
! 4455: return;
! 4456: }
! 4457: if (inodedep->id_nlinkdelta != DIP(ip, nlink) - ip->i_effnlink) {
! 4458: FREE_LOCK(&lk);
! 4459: panic("softdep_update_inodeblock: bad delta");
! 4460: }
! 4461: /*
! 4462: * Changes have been initiated. Anything depending on these
! 4463: * changes cannot occur until this inode has been written.
! 4464: */
! 4465: inodedep->id_state &= ~COMPLETE;
! 4466: if ((inodedep->id_state & ONWORKLIST) == 0)
! 4467: WORKLIST_INSERT(&bp->b_dep, &inodedep->id_list);
! 4468: /*
! 4469: * Any new dependencies associated with the incore inode must
! 4470: * now be moved to the list associated with the buffer holding
! 4471: * the in-memory copy of the inode. Once merged process any
! 4472: * allocdirects that are completed by the merger.
! 4473: */
! 4474: merge_inode_lists(inodedep);
! 4475: if (TAILQ_FIRST(&inodedep->id_inoupdt) != NULL)
! 4476: handle_allocdirect_partdone(TAILQ_FIRST(&inodedep->id_inoupdt));
! 4477: /*
! 4478: * Now that the inode has been pushed into the buffer, the
! 4479: * operations dependent on the inode being written to disk
! 4480: * can be moved to the id_bufwait so that they will be
! 4481: * processed when the buffer I/O completes.
! 4482: */
! 4483: while ((wk = LIST_FIRST(&inodedep->id_inowait)) != NULL) {
! 4484: WORKLIST_REMOVE(wk);
! 4485: WORKLIST_INSERT(&inodedep->id_bufwait, wk);
! 4486: }
! 4487: /*
! 4488: * Newly allocated inodes cannot be written until the bitmap
! 4489: * that allocates them have been written (indicated by
! 4490: * DEPCOMPLETE being set in id_state). If we are doing a
! 4491: * forced sync (e.g., an fsync on a file), we force the bitmap
! 4492: * to be written so that the update can be done.
! 4493: */
! 4494: if ((inodedep->id_state & DEPCOMPLETE) != 0 || waitfor == 0) {
! 4495: FREE_LOCK(&lk);
! 4496: return;
! 4497: }
! 4498: bp = inodedep->id_buf;
! 4499: gotit = getdirtybuf(bp, MNT_WAIT);
! 4500: FREE_LOCK(&lk);
! 4501: if (gotit && (error = bwrite(bp)) != 0)
! 4502: softdep_error("softdep_update_inodeblock: bwrite", error);
! 4503: if ((inodedep->id_state & DEPCOMPLETE) == 0)
! 4504: panic("softdep_update_inodeblock: update failed");
! 4505: }
! 4506:
! 4507: /*
! 4508: * Merge the new inode dependency list (id_newinoupdt) into the old
! 4509: * inode dependency list (id_inoupdt). This routine must be called
! 4510: * with splbio interrupts blocked.
! 4511: */
! 4512: STATIC void
! 4513: merge_inode_lists(inodedep)
! 4514: struct inodedep *inodedep;
! 4515: {
! 4516: struct allocdirect *listadp, *newadp;
! 4517:
! 4518: splassert(IPL_BIO);
! 4519:
! 4520: newadp = TAILQ_FIRST(&inodedep->id_newinoupdt);
! 4521: for (listadp = TAILQ_FIRST(&inodedep->id_inoupdt); listadp && newadp;) {
! 4522: if (listadp->ad_lbn < newadp->ad_lbn) {
! 4523: listadp = TAILQ_NEXT(listadp, ad_next);
! 4524: continue;
! 4525: }
! 4526: TAILQ_REMOVE(&inodedep->id_newinoupdt, newadp, ad_next);
! 4527: TAILQ_INSERT_BEFORE(listadp, newadp, ad_next);
! 4528: if (listadp->ad_lbn == newadp->ad_lbn) {
! 4529: allocdirect_merge(&inodedep->id_inoupdt, newadp,
! 4530: listadp);
! 4531: listadp = newadp;
! 4532: }
! 4533: newadp = TAILQ_FIRST(&inodedep->id_newinoupdt);
! 4534: }
! 4535: while ((newadp = TAILQ_FIRST(&inodedep->id_newinoupdt)) != NULL) {
! 4536: TAILQ_REMOVE(&inodedep->id_newinoupdt, newadp, ad_next);
! 4537: TAILQ_INSERT_TAIL(&inodedep->id_inoupdt, newadp, ad_next);
! 4538: }
! 4539: }
! 4540:
! 4541: /*
! 4542: * If we are doing an fsync, then we must ensure that any directory
! 4543: * entries for the inode have been written after the inode gets to disk.
! 4544: */
! 4545: int
! 4546: softdep_fsync(vp)
! 4547: struct vnode *vp; /* the "in_core" copy of the inode */
! 4548: {
! 4549: struct inodedep *inodedep;
! 4550: struct pagedep *pagedep;
! 4551: struct worklist *wk;
! 4552: struct diradd *dap;
! 4553: struct mount *mnt;
! 4554: struct vnode *pvp;
! 4555: struct inode *ip;
! 4556: struct inode *pip;
! 4557: struct buf *bp;
! 4558: struct fs *fs;
! 4559: struct proc *p = CURPROC; /* XXX */
! 4560: int error, flushparent;
! 4561: ino_t parentino;
! 4562: daddr64_t lbn;
! 4563:
! 4564: ip = VTOI(vp);
! 4565: fs = ip->i_fs;
! 4566: ACQUIRE_LOCK(&lk);
! 4567: if (inodedep_lookup(fs, ip->i_number, 0, &inodedep) == 0) {
! 4568: FREE_LOCK(&lk);
! 4569: return (0);
! 4570: }
! 4571: if (LIST_FIRST(&inodedep->id_inowait) != NULL ||
! 4572: LIST_FIRST(&inodedep->id_bufwait) != NULL ||
! 4573: TAILQ_FIRST(&inodedep->id_inoupdt) != NULL ||
! 4574: TAILQ_FIRST(&inodedep->id_newinoupdt) != NULL) {
! 4575: FREE_LOCK(&lk);
! 4576: panic("softdep_fsync: pending ops");
! 4577: }
! 4578: for (error = 0, flushparent = 0; ; ) {
! 4579: if ((wk = LIST_FIRST(&inodedep->id_pendinghd)) == NULL)
! 4580: break;
! 4581: if (wk->wk_type != D_DIRADD) {
! 4582: FREE_LOCK(&lk);
! 4583: panic("softdep_fsync: Unexpected type %s",
! 4584: TYPENAME(wk->wk_type));
! 4585: }
! 4586: dap = WK_DIRADD(wk);
! 4587: /*
! 4588: * Flush our parent if this directory entry has a MKDIR_PARENT
! 4589: * dependency or is contained in a newly allocated block.
! 4590: */
! 4591: if (dap->da_state & DIRCHG)
! 4592: pagedep = dap->da_previous->dm_pagedep;
! 4593: else
! 4594: pagedep = dap->da_pagedep;
! 4595: mnt = pagedep->pd_mnt;
! 4596: parentino = pagedep->pd_ino;
! 4597: lbn = pagedep->pd_lbn;
! 4598: if ((dap->da_state & (MKDIR_BODY | COMPLETE)) != COMPLETE) {
! 4599: FREE_LOCK(&lk);
! 4600: panic("softdep_fsync: dirty");
! 4601: }
! 4602: if ((dap->da_state & MKDIR_PARENT) ||
! 4603: (pagedep->pd_state & NEWBLOCK))
! 4604: flushparent = 1;
! 4605: else
! 4606: flushparent = 0;
! 4607: /*
! 4608: * If we are being fsync'ed as part of vgone'ing this vnode,
! 4609: * then we will not be able to release and recover the
! 4610: * vnode below, so we just have to give up on writing its
! 4611: * directory entry out. It will eventually be written, just
! 4612: * not now, but then the user was not asking to have it
! 4613: * written, so we are not breaking any promises.
! 4614: */
! 4615: if (vp->v_flag & VXLOCK)
! 4616: break;
! 4617: /*
! 4618: * We prevent deadlock by always fetching inodes from the
! 4619: * root, moving down the directory tree. Thus, when fetching
! 4620: * our parent directory, we must unlock ourselves before
! 4621: * requesting the lock on our parent. See the comment in
! 4622: * ufs_lookup for details on possible races.
! 4623: */
! 4624: FREE_LOCK(&lk);
! 4625: VOP_UNLOCK(vp, 0, p);
! 4626: error = VFS_VGET(mnt, parentino, &pvp);
! 4627: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
! 4628: if (error != 0)
! 4629: return (error);
! 4630: /*
! 4631: * All MKDIR_PARENT dependencies and all the NEWBLOCK pagedeps
! 4632: * that are contained in direct blocks will be resolved by
! 4633: * doing a UFS_UPDATE. Pagedeps contained in indirect blocks
! 4634: * may require a complete sync'ing of the directory. So, we
! 4635: * try the cheap and fast UFS_UPDATE first, and if that fails,
! 4636: * then we do the slower VOP_FSYNC of the directory.
! 4637: */
! 4638: pip = VTOI(pvp);
! 4639: if (flushparent) {
! 4640: error = UFS_UPDATE(pip, MNT_WAIT);
! 4641: if (error) {
! 4642: vput(pvp);
! 4643: return (error);
! 4644: }
! 4645: if (pagedep->pd_state & NEWBLOCK) {
! 4646: error = VOP_FSYNC(pvp, p->p_ucred, MNT_WAIT, p);
! 4647: if (error) {
! 4648: vput(pvp);
! 4649: return (error);
! 4650: }
! 4651: }
! 4652: }
! 4653: /*
! 4654: * Flush directory page containing the inode's name.
! 4655: */
! 4656: error = bread(pvp, lbn, fs->fs_bsize, p->p_ucred, &bp);
! 4657: if (error == 0) {
! 4658: bp->b_bcount = blksize(fs, pip, lbn);
! 4659: error = bwrite(bp);
! 4660: } else
! 4661: brelse(bp);
! 4662: vput(pvp);
! 4663: if (error != 0)
! 4664: return (error);
! 4665: ACQUIRE_LOCK(&lk);
! 4666: if (inodedep_lookup(fs, ip->i_number, 0, &inodedep) == 0)
! 4667: break;
! 4668: }
! 4669: FREE_LOCK(&lk);
! 4670: return (0);
! 4671: }
! 4672:
! 4673: /*
! 4674: * Flush all the dirty bitmaps associated with the block device
! 4675: * before flushing the rest of the dirty blocks so as to reduce
! 4676: * the number of dependencies that will have to be rolled back.
! 4677: */
! 4678: void
! 4679: softdep_fsync_mountdev(vp, waitfor)
! 4680: struct vnode *vp;
! 4681: int waitfor;
! 4682: {
! 4683: struct buf *bp, *nbp;
! 4684: struct worklist *wk;
! 4685:
! 4686: if (!vn_isdisk(vp, NULL))
! 4687: panic("softdep_fsync_mountdev: vnode not a disk");
! 4688: ACQUIRE_LOCK(&lk);
! 4689: for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
! 4690: nbp = LIST_NEXT(bp, b_vnbufs);
! 4691: /*
! 4692: * If it is already scheduled, skip to the next buffer.
! 4693: */
! 4694: if (bp->b_flags & B_BUSY)
! 4695: continue;
! 4696: bp->b_flags |= B_BUSY;
! 4697:
! 4698: if ((bp->b_flags & B_DELWRI) == 0) {
! 4699: FREE_LOCK(&lk);
! 4700: panic("softdep_fsync_mountdev: not dirty");
! 4701: }
! 4702: /*
! 4703: * We are only interested in bitmaps with outstanding
! 4704: * dependencies.
! 4705: */
! 4706: if ((wk = LIST_FIRST(&bp->b_dep)) == NULL ||
! 4707: wk->wk_type != D_BMSAFEMAP) {
! 4708: bp->b_flags &= ~B_BUSY;
! 4709: continue;
! 4710: }
! 4711: bremfree(bp);
! 4712: FREE_LOCK(&lk);
! 4713: (void) bawrite(bp);
! 4714: ACQUIRE_LOCK(&lk);
! 4715: /*
! 4716: * Since we may have slept during the I/O, we need
! 4717: * to start from a known point.
! 4718: */
! 4719: nbp = LIST_FIRST(&vp->v_dirtyblkhd);
! 4720: }
! 4721: if (waitfor == MNT_WAIT)
! 4722: drain_output(vp, 1);
! 4723: FREE_LOCK(&lk);
! 4724: }
! 4725:
! 4726: /*
! 4727: * This routine is called when we are trying to synchronously flush a
! 4728: * file. This routine must eliminate any filesystem metadata dependencies
! 4729: * so that the syncing routine can succeed by pushing the dirty blocks
! 4730: * associated with the file. If any I/O errors occur, they are returned.
! 4731: */
! 4732: int
! 4733: softdep_sync_metadata(ap)
! 4734: struct vop_fsync_args /* {
! 4735: struct vnode *a_vp;
! 4736: struct ucred *a_cred;
! 4737: int a_waitfor;
! 4738: struct proc *a_p;
! 4739: } */ *ap;
! 4740: {
! 4741: struct vnode *vp = ap->a_vp;
! 4742: struct pagedep *pagedep;
! 4743: struct allocdirect *adp;
! 4744: struct allocindir *aip;
! 4745: struct buf *bp, *nbp;
! 4746: struct worklist *wk;
! 4747: int i, error, waitfor;
! 4748:
! 4749: /*
! 4750: * Check whether this vnode is involved in a filesystem
! 4751: * that is doing soft dependency processing.
! 4752: */
! 4753: if (!vn_isdisk(vp, NULL)) {
! 4754: if (!DOINGSOFTDEP(vp))
! 4755: return (0);
! 4756: } else
! 4757: if (vp->v_specmountpoint == NULL ||
! 4758: (vp->v_specmountpoint->mnt_flag & MNT_SOFTDEP) == 0)
! 4759: return (0);
! 4760: /*
! 4761: * Ensure that any direct block dependencies have been cleared.
! 4762: */
! 4763: ACQUIRE_LOCK(&lk);
! 4764: if ((error = flush_inodedep_deps(VTOI(vp)->i_fs, VTOI(vp)->i_number))) {
! 4765: FREE_LOCK(&lk);
! 4766: return (error);
! 4767: }
! 4768: /*
! 4769: * For most files, the only metadata dependencies are the
! 4770: * cylinder group maps that allocate their inode or blocks.
! 4771: * The block allocation dependencies can be found by traversing
! 4772: * the dependency lists for any buffers that remain on their
! 4773: * dirty buffer list. The inode allocation dependency will
! 4774: * be resolved when the inode is updated with MNT_WAIT.
! 4775: * This work is done in two passes. The first pass grabs most
! 4776: * of the buffers and begins asynchronously writing them. The
! 4777: * only way to wait for these asynchronous writes is to sleep
! 4778: * on the filesystem vnode which may stay busy for a long time
! 4779: * if the filesystem is active. So, instead, we make a second
! 4780: * pass over the dependencies blocking on each write. In the
! 4781: * usual case we will be blocking against a write that we
! 4782: * initiated, so when it is done the dependency will have been
! 4783: * resolved. Thus the second pass is expected to end quickly.
! 4784: */
! 4785: waitfor = MNT_NOWAIT;
! 4786: top:
! 4787: /*
! 4788: * We must wait for any I/O in progress to finish so that
! 4789: * all potential buffers on the dirty list will be visible.
! 4790: */
! 4791: drain_output(vp, 1);
! 4792: bp = LIST_FIRST(&vp->v_dirtyblkhd);
! 4793: if (getdirtybuf(bp, MNT_WAIT) == 0) {
! 4794: FREE_LOCK(&lk);
! 4795: return (0);
! 4796: }
! 4797: loop:
! 4798: /*
! 4799: * As we hold the buffer locked, none of its dependencies
! 4800: * will disappear.
! 4801: */
! 4802: LIST_FOREACH(wk, &bp->b_dep, wk_list) {
! 4803: switch (wk->wk_type) {
! 4804:
! 4805: case D_ALLOCDIRECT:
! 4806: adp = WK_ALLOCDIRECT(wk);
! 4807: if (adp->ad_state & DEPCOMPLETE)
! 4808: break;
! 4809: nbp = adp->ad_buf;
! 4810: if (getdirtybuf(nbp, waitfor) == 0)
! 4811: break;
! 4812: FREE_LOCK(&lk);
! 4813: if (waitfor == MNT_NOWAIT) {
! 4814: bawrite(nbp);
! 4815: } else if ((error = VOP_BWRITE(nbp)) != 0) {
! 4816: bawrite(bp);
! 4817: return (error);
! 4818: }
! 4819: ACQUIRE_LOCK(&lk);
! 4820: break;
! 4821:
! 4822: case D_ALLOCINDIR:
! 4823: aip = WK_ALLOCINDIR(wk);
! 4824: if (aip->ai_state & DEPCOMPLETE)
! 4825: break;
! 4826: nbp = aip->ai_buf;
! 4827: if (getdirtybuf(nbp, waitfor) == 0)
! 4828: break;
! 4829: FREE_LOCK(&lk);
! 4830: if (waitfor == MNT_NOWAIT) {
! 4831: bawrite(nbp);
! 4832: } else if ((error = VOP_BWRITE(nbp)) != 0) {
! 4833: bawrite(bp);
! 4834: return (error);
! 4835: }
! 4836: ACQUIRE_LOCK(&lk);
! 4837: break;
! 4838:
! 4839: case D_INDIRDEP:
! 4840: restart:
! 4841:
! 4842: LIST_FOREACH(aip, &WK_INDIRDEP(wk)->ir_deplisthd, ai_next) {
! 4843: if (aip->ai_state & DEPCOMPLETE)
! 4844: continue;
! 4845: nbp = aip->ai_buf;
! 4846: if (getdirtybuf(nbp, MNT_WAIT) == 0)
! 4847: goto restart;
! 4848: FREE_LOCK(&lk);
! 4849: if ((error = VOP_BWRITE(nbp)) != 0) {
! 4850: bawrite(bp);
! 4851: return (error);
! 4852: }
! 4853: ACQUIRE_LOCK(&lk);
! 4854: goto restart;
! 4855: }
! 4856: break;
! 4857:
! 4858: case D_INODEDEP:
! 4859: if ((error = flush_inodedep_deps(WK_INODEDEP(wk)->id_fs,
! 4860: WK_INODEDEP(wk)->id_ino)) != 0) {
! 4861: FREE_LOCK(&lk);
! 4862: bawrite(bp);
! 4863: return (error);
! 4864: }
! 4865: break;
! 4866:
! 4867: case D_PAGEDEP:
! 4868: /*
! 4869: * We are trying to sync a directory that may
! 4870: * have dependencies on both its own metadata
! 4871: * and/or dependencies on the inodes of any
! 4872: * recently allocated files. We walk its diradd
! 4873: * lists pushing out the associated inode.
! 4874: */
! 4875: pagedep = WK_PAGEDEP(wk);
! 4876: for (i = 0; i < DAHASHSZ; i++) {
! 4877: if (LIST_FIRST(&pagedep->pd_diraddhd[i]) == 0)
! 4878: continue;
! 4879: if ((error =
! 4880: flush_pagedep_deps(vp, pagedep->pd_mnt,
! 4881: &pagedep->pd_diraddhd[i]))) {
! 4882: FREE_LOCK(&lk);
! 4883: bawrite(bp);
! 4884: return (error);
! 4885: }
! 4886: }
! 4887: break;
! 4888:
! 4889: case D_MKDIR:
! 4890: /*
! 4891: * This case should never happen if the vnode has
! 4892: * been properly sync'ed. However, if this function
! 4893: * is used at a place where the vnode has not yet
! 4894: * been sync'ed, this dependency can show up. So,
! 4895: * rather than panic, just flush it.
! 4896: */
! 4897: nbp = WK_MKDIR(wk)->md_buf;
! 4898: if (getdirtybuf(nbp, waitfor) == 0)
! 4899: break;
! 4900: FREE_LOCK(&lk);
! 4901: if (waitfor == MNT_NOWAIT) {
! 4902: bawrite(nbp);
! 4903: } else if ((error = VOP_BWRITE(nbp)) != 0) {
! 4904: bawrite(bp);
! 4905: return (error);
! 4906: }
! 4907: ACQUIRE_LOCK(&lk);
! 4908: break;
! 4909:
! 4910: case D_BMSAFEMAP:
! 4911: /*
! 4912: * This case should never happen if the vnode has
! 4913: * been properly sync'ed. However, if this function
! 4914: * is used at a place where the vnode has not yet
! 4915: * been sync'ed, this dependency can show up. So,
! 4916: * rather than panic, just flush it.
! 4917: */
! 4918: nbp = WK_BMSAFEMAP(wk)->sm_buf;
! 4919: if (getdirtybuf(nbp, waitfor) == 0)
! 4920: break;
! 4921: FREE_LOCK(&lk);
! 4922: if (waitfor == MNT_NOWAIT) {
! 4923: bawrite(nbp);
! 4924: } else if ((error = VOP_BWRITE(nbp)) != 0) {
! 4925: bawrite(bp);
! 4926: return (error);
! 4927: }
! 4928: ACQUIRE_LOCK(&lk);
! 4929: break;
! 4930:
! 4931: default:
! 4932: FREE_LOCK(&lk);
! 4933: panic("softdep_sync_metadata: Unknown type %s",
! 4934: TYPENAME(wk->wk_type));
! 4935: /* NOTREACHED */
! 4936: }
! 4937: }
! 4938: nbp = LIST_NEXT(bp, b_vnbufs);
! 4939: getdirtybuf(nbp, MNT_WAIT);
! 4940: FREE_LOCK(&lk);
! 4941: bawrite(bp);
! 4942: ACQUIRE_LOCK(&lk);
! 4943: if (nbp != NULL) {
! 4944: bp = nbp;
! 4945: goto loop;
! 4946: }
! 4947: /*
! 4948: * The brief unlock is to allow any pent up dependency
! 4949: * processing to be done. Then proceed with the second pass.
! 4950: */
! 4951: if (waitfor == MNT_NOWAIT) {
! 4952: waitfor = MNT_WAIT;
! 4953: FREE_LOCK(&lk);
! 4954: ACQUIRE_LOCK(&lk);
! 4955: goto top;
! 4956: }
! 4957:
! 4958: /*
! 4959: * If we have managed to get rid of all the dirty buffers,
! 4960: * then we are done. For certain directories and block
! 4961: * devices, we may need to do further work.
! 4962: *
! 4963: * We must wait for any I/O in progress to finish so that
! 4964: * all potential buffers on the dirty list will be visible.
! 4965: */
! 4966: drain_output(vp, 1);
! 4967: if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
! 4968: FREE_LOCK(&lk);
! 4969: return (0);
! 4970: }
! 4971:
! 4972: FREE_LOCK(&lk);
! 4973: /*
! 4974: * If we are trying to sync a block device, some of its buffers may
! 4975: * contain metadata that cannot be written until the contents of some
! 4976: * partially written files have been written to disk. The only easy
! 4977: * way to accomplish this is to sync the entire filesystem (luckily
! 4978: * this happens rarely).
! 4979: */
! 4980: if (vn_isdisk(vp, NULL) &&
! 4981: vp->v_specmountpoint && !VOP_ISLOCKED(vp) &&
! 4982: (error = VFS_SYNC(vp->v_specmountpoint, MNT_WAIT, ap->a_cred,
! 4983: ap->a_p)) != 0)
! 4984: return (error);
! 4985: return (0);
! 4986: }
! 4987:
! 4988: /*
! 4989: * Flush the dependencies associated with an inodedep.
! 4990: * Called with splbio blocked.
! 4991: */
! 4992: STATIC int
! 4993: flush_inodedep_deps(fs, ino)
! 4994: struct fs *fs;
! 4995: ino_t ino;
! 4996: {
! 4997: struct inodedep *inodedep;
! 4998: struct allocdirect *adp;
! 4999: int error, waitfor;
! 5000: struct buf *bp;
! 5001:
! 5002: splassert(IPL_BIO);
! 5003:
! 5004: /*
! 5005: * This work is done in two passes. The first pass grabs most
! 5006: * of the buffers and begins asynchronously writing them. The
! 5007: * only way to wait for these asynchronous writes is to sleep
! 5008: * on the filesystem vnode which may stay busy for a long time
! 5009: * if the filesystem is active. So, instead, we make a second
! 5010: * pass over the dependencies blocking on each write. In the
! 5011: * usual case we will be blocking against a write that we
! 5012: * initiated, so when it is done the dependency will have been
! 5013: * resolved. Thus the second pass is expected to end quickly.
! 5014: * We give a brief window at the top of the loop to allow
! 5015: * any pending I/O to complete.
! 5016: */
! 5017: for (waitfor = MNT_NOWAIT; ; ) {
! 5018: FREE_LOCK(&lk);
! 5019: ACQUIRE_LOCK(&lk);
! 5020: if (inodedep_lookup(fs, ino, 0, &inodedep) == 0)
! 5021: return (0);
! 5022: TAILQ_FOREACH(adp, &inodedep->id_inoupdt, ad_next) {
! 5023: if (adp->ad_state & DEPCOMPLETE)
! 5024: continue;
! 5025: bp = adp->ad_buf;
! 5026: if (getdirtybuf(bp, waitfor) == 0) {
! 5027: if (waitfor == MNT_NOWAIT)
! 5028: continue;
! 5029: break;
! 5030: }
! 5031: FREE_LOCK(&lk);
! 5032: if (waitfor == MNT_NOWAIT) {
! 5033: bawrite(bp);
! 5034: } else if ((error = VOP_BWRITE(bp)) != 0) {
! 5035: ACQUIRE_LOCK(&lk);
! 5036: return (error);
! 5037: }
! 5038: ACQUIRE_LOCK(&lk);
! 5039: break;
! 5040: }
! 5041: if (adp != NULL)
! 5042: continue;
! 5043: TAILQ_FOREACH(adp, &inodedep->id_newinoupdt, ad_next) {
! 5044: if (adp->ad_state & DEPCOMPLETE)
! 5045: continue;
! 5046: bp = adp->ad_buf;
! 5047: if (getdirtybuf(bp, waitfor) == 0) {
! 5048: if (waitfor == MNT_NOWAIT)
! 5049: continue;
! 5050: break;
! 5051: }
! 5052: FREE_LOCK(&lk);
! 5053: if (waitfor == MNT_NOWAIT) {
! 5054: bawrite(bp);
! 5055: } else if ((error = VOP_BWRITE(bp)) != 0) {
! 5056: ACQUIRE_LOCK(&lk);
! 5057: return (error);
! 5058: }
! 5059: ACQUIRE_LOCK(&lk);
! 5060: break;
! 5061: }
! 5062: if (adp != NULL)
! 5063: continue;
! 5064: /*
! 5065: * If pass2, we are done, otherwise do pass 2.
! 5066: */
! 5067: if (waitfor == MNT_WAIT)
! 5068: break;
! 5069: waitfor = MNT_WAIT;
! 5070: }
! 5071: /*
! 5072: * Try freeing inodedep in case all dependencies have been removed.
! 5073: */
! 5074: if (inodedep_lookup(fs, ino, 0, &inodedep) != 0)
! 5075: (void) free_inodedep(inodedep);
! 5076: return (0);
! 5077: }
! 5078:
! 5079: /*
! 5080: * Eliminate a pagedep dependency by flushing out all its diradd dependencies.
! 5081: * Called with splbio blocked.
! 5082: */
! 5083: STATIC int
! 5084: flush_pagedep_deps(pvp, mp, diraddhdp)
! 5085: struct vnode *pvp;
! 5086: struct mount *mp;
! 5087: struct diraddhd *diraddhdp;
! 5088: {
! 5089: struct proc *p = CURPROC; /* XXX */
! 5090: struct worklist *wk;
! 5091: struct inodedep *inodedep;
! 5092: struct ufsmount *ump;
! 5093: struct diradd *dap;
! 5094: struct vnode *vp;
! 5095: int gotit, error = 0;
! 5096: struct buf *bp;
! 5097: ino_t inum;
! 5098:
! 5099: splassert(IPL_BIO);
! 5100:
! 5101: ump = VFSTOUFS(mp);
! 5102: while ((dap = LIST_FIRST(diraddhdp)) != NULL) {
! 5103: /*
! 5104: * Flush ourselves if this directory entry
! 5105: * has a MKDIR_PARENT dependency.
! 5106: */
! 5107: if (dap->da_state & MKDIR_PARENT) {
! 5108: FREE_LOCK(&lk);
! 5109: if ((error = UFS_UPDATE(VTOI(pvp), MNT_WAIT)))
! 5110: break;
! 5111: ACQUIRE_LOCK(&lk);
! 5112: /*
! 5113: * If that cleared dependencies, go on to next.
! 5114: */
! 5115: if (dap != LIST_FIRST(diraddhdp))
! 5116: continue;
! 5117: if (dap->da_state & MKDIR_PARENT) {
! 5118: FREE_LOCK(&lk);
! 5119: panic("flush_pagedep_deps: MKDIR_PARENT");
! 5120: }
! 5121: }
! 5122: /*
! 5123: * A newly allocated directory must have its "." and
! 5124: * ".." entries written out before its name can be
! 5125: * committed in its parent. We do not want or need
! 5126: * the full semantics of a synchronous VOP_FSYNC as
! 5127: * that may end up here again, once for each directory
! 5128: * level in the filesystem. Instead, we push the blocks
! 5129: * and wait for them to clear. We have to fsync twice
! 5130: * because the first call may choose to defer blocks
! 5131: * that still have dependencies, but deferral will
! 5132: * happen at most once.
! 5133: */
! 5134: inum = dap->da_newinum;
! 5135: if (dap->da_state & MKDIR_BODY) {
! 5136: FREE_LOCK(&lk);
! 5137: if ((error = VFS_VGET(mp, inum, &vp)) != 0)
! 5138: break;
! 5139: if ((error=VOP_FSYNC(vp, p->p_ucred, MNT_NOWAIT, p)) ||
! 5140: (error=VOP_FSYNC(vp, p->p_ucred, MNT_NOWAIT, p))) {
! 5141: vput(vp);
! 5142: break;
! 5143: }
! 5144: drain_output(vp, 0);
! 5145: /*
! 5146: * If first block is still dirty with a D_MKDIR
! 5147: * dependency then it needs to be written now.
! 5148: */
! 5149: for (;;) {
! 5150: error = 0;
! 5151: ACQUIRE_LOCK(&lk);
! 5152: bp = incore(vp, 0);
! 5153: if (bp == NULL) {
! 5154: FREE_LOCK(&lk);
! 5155: break;
! 5156: }
! 5157: LIST_FOREACH(wk, &bp->b_dep, wk_list)
! 5158: if (wk->wk_type == D_MKDIR)
! 5159: break;
! 5160: if (wk) {
! 5161: gotit = getdirtybuf(bp, MNT_WAIT);
! 5162: FREE_LOCK(&lk);
! 5163: if (gotit && (error = bwrite(bp)) != 0)
! 5164: break;
! 5165: } else
! 5166: FREE_LOCK(&lk);
! 5167: break;
! 5168: }
! 5169: vput(vp);
! 5170: /* Flushing of first block failed */
! 5171: if (error)
! 5172: break;
! 5173: ACQUIRE_LOCK(&lk);
! 5174: /*
! 5175: * If that cleared dependencies, go on to next.
! 5176: */
! 5177: if (dap != LIST_FIRST(diraddhdp))
! 5178: continue;
! 5179: if (dap->da_state & MKDIR_BODY) {
! 5180: FREE_LOCK(&lk);
! 5181: panic("flush_pagedep_deps: MKDIR_BODY");
! 5182: }
! 5183: }
! 5184: /*
! 5185: * Flush the inode on which the directory entry depends.
! 5186: * Having accounted for MKDIR_PARENT and MKDIR_BODY above,
! 5187: * the only remaining dependency is that the updated inode
! 5188: * count must get pushed to disk. The inode has already
! 5189: * been pushed into its inode buffer (via VOP_UPDATE) at
! 5190: * the time of the reference count change. So we need only
! 5191: * locate that buffer, ensure that there will be no rollback
! 5192: * caused by a bitmap dependency, then write the inode buffer.
! 5193: */
! 5194: if (inodedep_lookup(ump->um_fs, inum, 0, &inodedep) == 0) {
! 5195: FREE_LOCK(&lk);
! 5196: panic("flush_pagedep_deps: lost inode");
! 5197: }
! 5198: /*
! 5199: * If the inode still has bitmap dependencies,
! 5200: * push them to disk.
! 5201: */
! 5202: if ((inodedep->id_state & DEPCOMPLETE) == 0) {
! 5203: bp = inodedep->id_buf;
! 5204: gotit = getdirtybuf(bp, MNT_WAIT);
! 5205: FREE_LOCK(&lk);
! 5206: if (gotit && (error = bwrite(bp)) != 0)
! 5207: break;
! 5208: ACQUIRE_LOCK(&lk);
! 5209: if (dap != LIST_FIRST(diraddhdp))
! 5210: continue;
! 5211: }
! 5212: /*
! 5213: * If the inode is still sitting in a buffer waiting
! 5214: * to be written, push it to disk.
! 5215: */
! 5216: FREE_LOCK(&lk);
! 5217: if ((error = bread(ump->um_devvp,
! 5218: fsbtodb(ump->um_fs, ino_to_fsba(ump->um_fs, inum)),
! 5219: (int)ump->um_fs->fs_bsize, NOCRED, &bp)) != 0) {
! 5220: brelse(bp);
! 5221: break;
! 5222: }
! 5223: if ((error = bwrite(bp)) != 0)
! 5224: break;
! 5225: ACQUIRE_LOCK(&lk);
! 5226: /*
! 5227: * If we have failed to get rid of all the dependencies
! 5228: * then something is seriously wrong.
! 5229: */
! 5230: if (dap == LIST_FIRST(diraddhdp)) {
! 5231: FREE_LOCK(&lk);
! 5232: panic("flush_pagedep_deps: flush failed");
! 5233: }
! 5234: }
! 5235: if (error)
! 5236: ACQUIRE_LOCK(&lk);
! 5237: return (error);
! 5238: }
! 5239:
! 5240: /*
! 5241: * A large burst of file addition or deletion activity can drive the
! 5242: * memory load excessively high. First attempt to slow things down
! 5243: * using the techniques below. If that fails, this routine requests
! 5244: * the offending operations to fall back to running synchronously
! 5245: * until the memory load returns to a reasonable level.
! 5246: */
! 5247: int
! 5248: softdep_slowdown(vp)
! 5249: struct vnode *vp;
! 5250: {
! 5251: int max_softdeps_hard;
! 5252:
! 5253: max_softdeps_hard = max_softdeps * 11 / 10;
! 5254: if (num_dirrem < max_softdeps_hard / 2 &&
! 5255: num_inodedep < max_softdeps_hard)
! 5256: return (0);
! 5257: stat_sync_limit_hit += 1;
! 5258: return (1);
! 5259: }
! 5260:
! 5261: /*
! 5262: * If memory utilization has gotten too high, deliberately slow things
! 5263: * down and speed up the I/O processing.
! 5264: */
! 5265: STATIC int
! 5266: request_cleanup(resource, islocked)
! 5267: int resource;
! 5268: int islocked;
! 5269: {
! 5270: struct proc *p = CURPROC;
! 5271: int s;
! 5272:
! 5273: /*
! 5274: * We never hold up the filesystem syncer process.
! 5275: */
! 5276: if (p == filesys_syncer || (p->p_flag & P_SOFTDEP))
! 5277: return (0);
! 5278: /*
! 5279: * First check to see if the work list has gotten backlogged.
! 5280: * If it has, co-opt this process to help clean up two entries.
! 5281: * Because this process may hold inodes locked, we cannot
! 5282: * handle any remove requests that might block on a locked
! 5283: * inode as that could lead to deadlock. We set P_SOFTDEP
! 5284: * to avoid recursively processing the worklist.
! 5285: */
! 5286: if (num_on_worklist > max_softdeps / 10) {
! 5287: atomic_setbits_int(&p->p_flag, P_SOFTDEP);
! 5288: if (islocked)
! 5289: FREE_LOCK(&lk);
! 5290: process_worklist_item(NULL, LK_NOWAIT);
! 5291: process_worklist_item(NULL, LK_NOWAIT);
! 5292: atomic_clearbits_int(&p->p_flag, P_SOFTDEP);
! 5293: stat_worklist_push += 2;
! 5294: if (islocked)
! 5295: ACQUIRE_LOCK(&lk);
! 5296: return(1);
! 5297: }
! 5298: /*
! 5299: * Next, we attempt to speed up the syncer process. If that
! 5300: * is successful, then we allow the process to continue.
! 5301: */
! 5302: if (speedup_syncer())
! 5303: return(0);
! 5304: /*
! 5305: * If we are resource constrained on inode dependencies, try
! 5306: * flushing some dirty inodes. Otherwise, we are constrained
! 5307: * by file deletions, so try accelerating flushes of directories
! 5308: * with removal dependencies. We would like to do the cleanup
! 5309: * here, but we probably hold an inode locked at this point and
! 5310: * that might deadlock against one that we try to clean. So,
! 5311: * the best that we can do is request the syncer daemon to do
! 5312: * the cleanup for us.
! 5313: */
! 5314: switch (resource) {
! 5315:
! 5316: case FLUSH_INODES:
! 5317: stat_ino_limit_push += 1;
! 5318: req_clear_inodedeps += 1;
! 5319: stat_countp = &stat_ino_limit_hit;
! 5320: break;
! 5321:
! 5322: case FLUSH_REMOVE:
! 5323: stat_blk_limit_push += 1;
! 5324: req_clear_remove += 1;
! 5325: stat_countp = &stat_blk_limit_hit;
! 5326: break;
! 5327:
! 5328: default:
! 5329: if (islocked)
! 5330: FREE_LOCK(&lk);
! 5331: panic("request_cleanup: unknown type");
! 5332: }
! 5333: /*
! 5334: * Hopefully the syncer daemon will catch up and awaken us.
! 5335: * We wait at most tickdelay before proceeding in any case.
! 5336: */
! 5337: if (islocked == 0)
! 5338: ACQUIRE_LOCK(&lk);
! 5339: proc_waiting += 1;
! 5340: if (!timeout_pending(&proc_waiting_timeout))
! 5341: timeout_add(&proc_waiting_timeout, tickdelay > 2 ? tickdelay : 2);
! 5342:
! 5343: s = FREE_LOCK_INTERLOCKED(&lk);
! 5344: (void) tsleep((caddr_t)&proc_waiting, PPAUSE, "softupdate", 0);
! 5345: ACQUIRE_LOCK_INTERLOCKED(&lk, s);
! 5346: proc_waiting -= 1;
! 5347: if (islocked == 0)
! 5348: FREE_LOCK(&lk);
! 5349: return (1);
! 5350: }
! 5351:
! 5352: /*
! 5353: * Awaken processes pausing in request_cleanup and clear proc_waiting
! 5354: * to indicate that there is no longer a timer running.
! 5355: */
! 5356: void
! 5357: pause_timer(arg)
! 5358: void *arg;
! 5359: {
! 5360:
! 5361: *stat_countp += 1;
! 5362: wakeup_one(&proc_waiting);
! 5363: if (proc_waiting > 0)
! 5364: timeout_add(&proc_waiting_timeout, tickdelay > 2 ? tickdelay : 2);
! 5365: }
! 5366:
! 5367: /*
! 5368: * Flush out a directory with at least one removal dependency in an effort to
! 5369: * reduce the number of dirrem, freefile, and freeblks dependency structures.
! 5370: */
! 5371: STATIC void
! 5372: clear_remove(p)
! 5373: struct proc *p;
! 5374: {
! 5375: struct pagedep_hashhead *pagedephd;
! 5376: struct pagedep *pagedep;
! 5377: static int next = 0;
! 5378: struct mount *mp;
! 5379: struct vnode *vp;
! 5380: int error, cnt;
! 5381: ino_t ino;
! 5382:
! 5383: ACQUIRE_LOCK(&lk);
! 5384: for (cnt = 0; cnt < pagedep_hash; cnt++) {
! 5385: pagedephd = &pagedep_hashtbl[next++];
! 5386: if (next >= pagedep_hash)
! 5387: next = 0;
! 5388: LIST_FOREACH(pagedep, pagedephd, pd_hash) {
! 5389: if (LIST_FIRST(&pagedep->pd_dirremhd) == NULL)
! 5390: continue;
! 5391: mp = pagedep->pd_mnt;
! 5392: ino = pagedep->pd_ino;
! 5393: #if 0
! 5394: if (vn_start_write(NULL, &mp, V_NOWAIT) != 0)
! 5395: continue;
! 5396: #endif
! 5397: FREE_LOCK(&lk);
! 5398: if ((error = VFS_VGET(mp, ino, &vp)) != 0) {
! 5399: softdep_error("clear_remove: vget", error);
! 5400: #if 0
! 5401: vn_finished_write(mp);
! 5402: #endif
! 5403: return;
! 5404: }
! 5405: if ((error = VOP_FSYNC(vp, p->p_ucred, MNT_NOWAIT, p)))
! 5406: softdep_error("clear_remove: fsync", error);
! 5407: drain_output(vp, 0);
! 5408: vput(vp);
! 5409: #if 0
! 5410: vn_finished_write(mp);
! 5411: #endif
! 5412: return;
! 5413: }
! 5414: }
! 5415: FREE_LOCK(&lk);
! 5416: }
! 5417:
! 5418: /*
! 5419: * Clear out a block of dirty inodes in an effort to reduce
! 5420: * the number of inodedep dependency structures.
! 5421: */
! 5422: STATIC void
! 5423: clear_inodedeps(p)
! 5424: struct proc *p;
! 5425: {
! 5426: struct inodedep_hashhead *inodedephd;
! 5427: struct inodedep *inodedep;
! 5428: static int next = 0;
! 5429: struct mount *mp;
! 5430: struct vnode *vp;
! 5431: struct fs *fs;
! 5432: int error, cnt;
! 5433: ino_t firstino, lastino, ino;
! 5434:
! 5435: ACQUIRE_LOCK(&lk);
! 5436: /*
! 5437: * Pick a random inode dependency to be cleared.
! 5438: * We will then gather up all the inodes in its block
! 5439: * that have dependencies and flush them out.
! 5440: */
! 5441: for (cnt = 0; cnt < inodedep_hash; cnt++) {
! 5442: inodedephd = &inodedep_hashtbl[next++];
! 5443: if (next >= inodedep_hash)
! 5444: next = 0;
! 5445: if ((inodedep = LIST_FIRST(inodedephd)) != NULL)
! 5446: break;
! 5447: }
! 5448: if (inodedep == NULL) {
! 5449: FREE_LOCK(&lk);
! 5450: return;
! 5451: }
! 5452: /*
! 5453: * Ugly code to find mount point given pointer to superblock.
! 5454: */
! 5455: fs = inodedep->id_fs;
! 5456: CIRCLEQ_FOREACH(mp, &mountlist, mnt_list)
! 5457: if ((mp->mnt_flag & MNT_SOFTDEP) && fs == VFSTOUFS(mp)->um_fs)
! 5458: break;
! 5459: /*
! 5460: * Find the last inode in the block with dependencies.
! 5461: */
! 5462: firstino = inodedep->id_ino & ~(INOPB(fs) - 1);
! 5463: for (lastino = firstino + INOPB(fs) - 1; lastino > firstino; lastino--)
! 5464: if (inodedep_lookup(fs, lastino, 0, &inodedep) != 0)
! 5465: break;
! 5466: /*
! 5467: * Asynchronously push all but the last inode with dependencies.
! 5468: * Synchronously push the last inode with dependencies to ensure
! 5469: * that the inode block gets written to free up the inodedeps.
! 5470: */
! 5471: for (ino = firstino; ino <= lastino; ino++) {
! 5472: if (inodedep_lookup(fs, ino, 0, &inodedep) == 0)
! 5473: continue;
! 5474: FREE_LOCK(&lk);
! 5475: #if 0
! 5476: if (vn_start_write(NULL, &mp, V_NOWAIT) != 0)
! 5477: continue;
! 5478: #endif
! 5479: if ((error = VFS_VGET(mp, ino, &vp)) != 0) {
! 5480: softdep_error("clear_inodedeps: vget", error);
! 5481: #if 0
! 5482: vn_finished_write(mp);
! 5483: #endif
! 5484: return;
! 5485: }
! 5486: if (ino == lastino) {
! 5487: if ((error = VOP_FSYNC(vp, p->p_ucred, MNT_WAIT, p)))
! 5488: softdep_error("clear_inodedeps: fsync1", error);
! 5489: } else {
! 5490: if ((error = VOP_FSYNC(vp, p->p_ucred, MNT_NOWAIT, p)))
! 5491: softdep_error("clear_inodedeps: fsync2", error);
! 5492: drain_output(vp, 0);
! 5493: }
! 5494: vput(vp);
! 5495: #if 0
! 5496: vn_finished_write(mp);
! 5497: #endif
! 5498: ACQUIRE_LOCK(&lk);
! 5499: }
! 5500: FREE_LOCK(&lk);
! 5501: }
! 5502:
! 5503: /*
! 5504: * Function to determine if the buffer has outstanding dependencies
! 5505: * that will cause a roll-back if the buffer is written. If wantcount
! 5506: * is set, return number of dependencies, otherwise just yes or no.
! 5507: */
! 5508: int
! 5509: softdep_count_dependencies(bp, wantcount, islocked)
! 5510: struct buf *bp;
! 5511: int wantcount;
! 5512: int islocked;
! 5513: {
! 5514: struct worklist *wk;
! 5515: struct inodedep *inodedep;
! 5516: struct indirdep *indirdep;
! 5517: struct allocindir *aip;
! 5518: struct pagedep *pagedep;
! 5519: struct diradd *dap;
! 5520: int i, retval;
! 5521:
! 5522: retval = 0;
! 5523: if (!islocked)
! 5524: ACQUIRE_LOCK(&lk);
! 5525: LIST_FOREACH(wk, &bp->b_dep, wk_list) {
! 5526: switch (wk->wk_type) {
! 5527:
! 5528: case D_INODEDEP:
! 5529: inodedep = WK_INODEDEP(wk);
! 5530: if ((inodedep->id_state & DEPCOMPLETE) == 0) {
! 5531: /* bitmap allocation dependency */
! 5532: retval += 1;
! 5533: if (!wantcount)
! 5534: goto out;
! 5535: }
! 5536: if (TAILQ_FIRST(&inodedep->id_inoupdt)) {
! 5537: /* direct block pointer dependency */
! 5538: retval += 1;
! 5539: if (!wantcount)
! 5540: goto out;
! 5541: }
! 5542: continue;
! 5543:
! 5544: case D_INDIRDEP:
! 5545: indirdep = WK_INDIRDEP(wk);
! 5546:
! 5547: LIST_FOREACH(aip, &indirdep->ir_deplisthd, ai_next) {
! 5548: /* indirect block pointer dependency */
! 5549: retval += 1;
! 5550: if (!wantcount)
! 5551: goto out;
! 5552: }
! 5553: continue;
! 5554:
! 5555: case D_PAGEDEP:
! 5556: pagedep = WK_PAGEDEP(wk);
! 5557: for (i = 0; i < DAHASHSZ; i++) {
! 5558:
! 5559: LIST_FOREACH(dap, &pagedep->pd_diraddhd[i], da_pdlist) {
! 5560: /* directory entry dependency */
! 5561: retval += 1;
! 5562: if (!wantcount)
! 5563: goto out;
! 5564: }
! 5565: }
! 5566: continue;
! 5567:
! 5568: case D_BMSAFEMAP:
! 5569: case D_ALLOCDIRECT:
! 5570: case D_ALLOCINDIR:
! 5571: case D_MKDIR:
! 5572: /* never a dependency on these blocks */
! 5573: continue;
! 5574:
! 5575: default:
! 5576: if (!islocked)
! 5577: FREE_LOCK(&lk);
! 5578: panic("softdep_check_for_rollback: Unexpected type %s",
! 5579: TYPENAME(wk->wk_type));
! 5580: /* NOTREACHED */
! 5581: }
! 5582: }
! 5583: out:
! 5584: if (!islocked)
! 5585: FREE_LOCK(&lk);
! 5586: return retval;
! 5587: }
! 5588:
! 5589: /*
! 5590: * Acquire exclusive access to a buffer.
! 5591: * Must be called with splbio blocked.
! 5592: * Return 1 if buffer was acquired.
! 5593: */
! 5594: STATIC int
! 5595: getdirtybuf(bp, waitfor)
! 5596: struct buf *bp;
! 5597: int waitfor;
! 5598: {
! 5599: int s;
! 5600:
! 5601: if (bp == NULL)
! 5602: return (0);
! 5603:
! 5604: splassert(IPL_BIO);
! 5605:
! 5606: for (;;) {
! 5607: if ((bp->b_flags & B_BUSY) == 0)
! 5608: break;
! 5609: if (waitfor != MNT_WAIT)
! 5610: return (0);
! 5611: bp->b_flags |= B_WANTED;
! 5612: s = FREE_LOCK_INTERLOCKED(&lk);
! 5613: tsleep((caddr_t)bp, PRIBIO + 1, "sdsdty", 0);
! 5614: ACQUIRE_LOCK_INTERLOCKED(&lk, s);
! 5615: }
! 5616: if ((bp->b_flags & B_DELWRI) == 0)
! 5617: return (0);
! 5618: bremfree(bp);
! 5619: bp->b_flags |= B_BUSY;
! 5620: return (1);
! 5621: }
! 5622:
! 5623: /*
! 5624: * Wait for pending output on a vnode to complete.
! 5625: * Must be called with vnode locked.
! 5626: */
! 5627: STATIC void
! 5628: drain_output(vp, islocked)
! 5629: struct vnode *vp;
! 5630: int islocked;
! 5631: {
! 5632: int s;
! 5633:
! 5634: if (!islocked)
! 5635: ACQUIRE_LOCK(&lk);
! 5636:
! 5637: splassert(IPL_BIO);
! 5638:
! 5639: while (vp->v_numoutput) {
! 5640: vp->v_bioflag |= VBIOWAIT;
! 5641: s = FREE_LOCK_INTERLOCKED(&lk);
! 5642: tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "drain_output", 0);
! 5643: ACQUIRE_LOCK_INTERLOCKED(&lk, s);
! 5644: }
! 5645: if (!islocked)
! 5646: FREE_LOCK(&lk);
! 5647: }
! 5648:
! 5649: /*
! 5650: * Called whenever a buffer that is being invalidated or reallocated
! 5651: * contains dependencies. This should only happen if an I/O error has
! 5652: * occurred. The routine is called with the buffer locked.
! 5653: */
! 5654: void
! 5655: softdep_deallocate_dependencies(bp)
! 5656: struct buf *bp;
! 5657: {
! 5658:
! 5659: if ((bp->b_flags & B_ERROR) == 0)
! 5660: panic("softdep_deallocate_dependencies: dangling deps");
! 5661: softdep_error(bp->b_vp->v_mount->mnt_stat.f_mntonname, bp->b_error);
! 5662: panic("softdep_deallocate_dependencies: unrecovered I/O error");
! 5663: }
! 5664:
! 5665: /*
! 5666: * Function to handle asynchronous write errors in the filesystem.
! 5667: */
! 5668: void
! 5669: softdep_error(func, error)
! 5670: char *func;
! 5671: int error;
! 5672: {
! 5673:
! 5674: /* XXX should do something better! */
! 5675: printf("%s: got error %d while accessing filesystem\n", func, error);
! 5676: }
! 5677:
! 5678: #ifdef DDB
! 5679: #include <machine/db_machdep.h>
! 5680: #include <ddb/db_interface.h>
! 5681: #include <ddb/db_output.h>
! 5682:
! 5683: void
! 5684: softdep_print(struct buf *bp, int full, int (*pr)(const char *, ...))
! 5685: {
! 5686: struct worklist *wk;
! 5687:
! 5688: (*pr)(" deps:\n");
! 5689: LIST_FOREACH(wk, &bp->b_dep, wk_list)
! 5690: worklist_print(wk, full, pr);
! 5691: }
! 5692:
! 5693: void
! 5694: worklist_print(struct worklist *wk, int full, int (*pr)(const char *, ...))
! 5695: {
! 5696: struct pagedep *pagedep;
! 5697: struct inodedep *inodedep;
! 5698: struct newblk *newblk;
! 5699: struct bmsafemap *bmsafemap;
! 5700: struct allocdirect *adp;
! 5701: struct indirdep *indirdep;
! 5702: struct allocindir *aip;
! 5703: struct freefrag *freefrag;
! 5704: struct freeblks *freeblks;
! 5705: struct freefile *freefile;
! 5706: struct diradd *dap;
! 5707: struct mkdir *mkdir;
! 5708: struct dirrem *dirrem;
! 5709: struct newdirblk *newdirblk;
! 5710: char prefix[33];
! 5711: int i;
! 5712:
! 5713: for (prefix[i = 2 * MIN(16, full)] = '\0'; i--; prefix[i] = ' ')
! 5714: ;
! 5715:
! 5716: (*pr)("%s%s(%p) state %b\n%s", prefix, TYPENAME(wk->wk_type), wk,
! 5717: wk->wk_state, DEP_BITS, prefix);
! 5718: switch (wk->wk_type) {
! 5719: case D_PAGEDEP:
! 5720: pagedep = WK_PAGEDEP(wk);
! 5721: (*pr)("mount %p ino %u lbn %lld\n", pagedep->pd_mnt,
! 5722: pagedep->pd_ino, pagedep->pd_lbn);
! 5723: break;
! 5724: case D_INODEDEP:
! 5725: inodedep = WK_INODEDEP(wk);
! 5726: (*pr)("fs %p ino %u nlinkdelta %u dino %p\n"
! 5727: "%s bp %p savsz %lld\n", inodedep->id_fs,
! 5728: inodedep->id_ino, inodedep->id_nlinkdelta,
! 5729: inodedep->id_un.idu_savedino1,
! 5730: prefix, inodedep->id_buf, inodedep->id_savedsize);
! 5731: break;
! 5732: case D_NEWBLK:
! 5733: newblk = WK_NEWBLK(wk);
! 5734: (*pr)("fs %p newblk %d state %d bmsafemap %p\n",
! 5735: newblk->nb_fs, newblk->nb_newblkno, newblk->nb_state,
! 5736: newblk->nb_bmsafemap);
! 5737: break;
! 5738: case D_BMSAFEMAP:
! 5739: bmsafemap = WK_BMSAFEMAP(wk);
! 5740: (*pr)("buf %p\n", bmsafemap->sm_buf);
! 5741: break;
! 5742: case D_ALLOCDIRECT:
! 5743: adp = WK_ALLOCDIRECT(wk);
! 5744: (*pr)("lbn %lld newlbk %d oldblk %d newsize %lu olsize %lu\n"
! 5745: "%s bp %p inodedep %p freefrag %p\n", adp->ad_lbn,
! 5746: adp->ad_newblkno, adp->ad_oldblkno, adp->ad_newsize,
! 5747: adp->ad_oldsize,
! 5748: prefix, adp->ad_buf, adp->ad_inodedep, adp->ad_freefrag);
! 5749: break;
! 5750: case D_INDIRDEP:
! 5751: indirdep = WK_INDIRDEP(wk);
! 5752: (*pr)("savedata %p savebp %p\n", indirdep->ir_saveddata,
! 5753: indirdep->ir_savebp);
! 5754: break;
! 5755: case D_ALLOCINDIR:
! 5756: aip = WK_ALLOCINDIR(wk);
! 5757: (*pr)("off %d newblk %d oldblk %d freefrag %p\n"
! 5758: "%s indirdep %p buf %p\n", aip->ai_offset,
! 5759: aip->ai_newblkno, aip->ai_oldblkno, aip->ai_freefrag,
! 5760: prefix, aip->ai_indirdep, aip->ai_buf);
! 5761: break;
! 5762: case D_FREEFRAG:
! 5763: freefrag = WK_FREEFRAG(wk);
! 5764: (*pr)("vnode %p mp %p blkno %d fsize %ld ino %u\n",
! 5765: freefrag->ff_devvp, freefrag->ff_mnt, freefrag->ff_blkno,
! 5766: freefrag->ff_fragsize, freefrag->ff_inum);
! 5767: break;
! 5768: case D_FREEBLKS:
! 5769: freeblks = WK_FREEBLKS(wk);
! 5770: (*pr)("previno %u devvp %p mp %p oldsz %lld newsz %lld\n"
! 5771: "%s chkcnt %d uid %d\n", freeblks->fb_previousinum,
! 5772: freeblks->fb_devvp, freeblks->fb_mnt, freeblks->fb_oldsize,
! 5773: freeblks->fb_newsize,
! 5774: prefix, freeblks->fb_chkcnt, freeblks->fb_uid);
! 5775: break;
! 5776: case D_FREEFILE:
! 5777: freefile = WK_FREEFILE(wk);
! 5778: (*pr)("mode %x oldino %u vnode %p mp %p\n", freefile->fx_mode,
! 5779: freefile->fx_oldinum, freefile->fx_devvp, freefile->fx_mnt);
! 5780: break;
! 5781: case D_DIRADD:
! 5782: dap = WK_DIRADD(wk);
! 5783: (*pr)("off %ld ino %u da_un %p\n", dap->da_offset,
! 5784: dap->da_newinum, dap->da_un.dau_previous);
! 5785: break;
! 5786: case D_MKDIR:
! 5787: mkdir = WK_MKDIR(wk);
! 5788: (*pr)("diradd %p bp %p\n", mkdir->md_diradd, mkdir->md_buf);
! 5789: break;
! 5790: case D_DIRREM:
! 5791: dirrem = WK_DIRREM(wk);
! 5792: (*pr)("mp %p ino %u dm_un %p\n", dirrem->dm_mnt,
! 5793: dirrem->dm_oldinum, dirrem->dm_un.dmu_pagedep);
! 5794: break;
! 5795: case D_NEWDIRBLK:
! 5796: newdirblk = WK_NEWDIRBLK(wk);
! 5797: (*pr)("pagedep %p\n", newdirblk->db_pagedep);
! 5798: break;
! 5799: }
! 5800: }
! 5801: #endif
CVSweb