Annotation of sys/uvm/uvm_swap.c, Revision 1.1
1.1 ! nbrk 1: /* $OpenBSD: uvm_swap.c,v 1.72 2007/06/18 21:51:15 pedro Exp $ */
! 2: /* $NetBSD: uvm_swap.c,v 1.40 2000/11/17 11:39:39 mrg Exp $ */
! 3:
! 4: /*
! 5: * Copyright (c) 1995, 1996, 1997 Matthew R. Green
! 6: * All rights reserved.
! 7: *
! 8: * Redistribution and use in source and binary forms, with or without
! 9: * modification, are permitted provided that the following conditions
! 10: * are met:
! 11: * 1. Redistributions of source code must retain the above copyright
! 12: * notice, this list of conditions and the following disclaimer.
! 13: * 2. Redistributions in binary form must reproduce the above copyright
! 14: * notice, this list of conditions and the following disclaimer in the
! 15: * documentation and/or other materials provided with the distribution.
! 16: * 3. The name of the author may not be used to endorse or promote products
! 17: * derived from this software without specific prior written permission.
! 18: *
! 19: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
! 20: * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
! 21: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
! 22: * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
! 23: * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
! 24: * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
! 25: * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
! 26: * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
! 27: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
! 28: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
! 29: * SUCH DAMAGE.
! 30: *
! 31: * from: NetBSD: vm_swap.c,v 1.52 1997/12/02 13:47:37 pk Exp
! 32: * from: Id: uvm_swap.c,v 1.1.2.42 1998/02/02 20:38:06 chuck Exp
! 33: */
! 34:
! 35: #include <sys/param.h>
! 36: #include <sys/systm.h>
! 37: #include <sys/buf.h>
! 38: #include <sys/conf.h>
! 39: #include <sys/proc.h>
! 40: #include <sys/namei.h>
! 41: #include <sys/disklabel.h>
! 42: #include <sys/errno.h>
! 43: #include <sys/kernel.h>
! 44: #include <sys/malloc.h>
! 45: #include <sys/vnode.h>
! 46: #include <sys/file.h>
! 47: #include <sys/extent.h>
! 48: #include <sys/mount.h>
! 49: #include <sys/pool.h>
! 50: #include <sys/syscallargs.h>
! 51: #include <sys/swap.h>
! 52:
! 53: #include <uvm/uvm.h>
! 54: #ifdef UVM_SWAP_ENCRYPT
! 55: #include <sys/syslog.h>
! 56: #endif
! 57:
! 58: #include <miscfs/specfs/specdev.h>
! 59:
! 60: /*
! 61: * uvm_swap.c: manage configuration and i/o to swap space.
! 62: */
! 63:
! 64: /*
! 65: * swap space is managed in the following way:
! 66: *
! 67: * each swap partition or file is described by a "swapdev" structure.
! 68: * each "swapdev" structure contains a "swapent" structure which contains
! 69: * information that is passed up to the user (via system calls).
! 70: *
! 71: * each swap partition is assigned a "priority" (int) which controls
! 72: * swap partition usage.
! 73: *
! 74: * the system maintains a global data structure describing all swap
! 75: * partitions/files. there is a sorted LIST of "swappri" structures
! 76: * which describe "swapdev"'s at that priority. this LIST is headed
! 77: * by the "swap_priority" global var. each "swappri" contains a
! 78: * CIRCLEQ of "swapdev" structures at that priority.
! 79: *
! 80: * locking:
! 81: * - swap_syscall_lock (sleep lock): this lock serializes the swapctl
! 82: * system call and prevents the swap priority list from changing
! 83: * while we are in the middle of a system call (e.g. SWAP_STATS).
! 84: * - uvm.swap_data_lock (simple_lock): this lock protects all swap data
! 85: * structures including the priority list, the swapdev structures,
! 86: * and the swapmap extent.
! 87: *
! 88: * each swap device has the following info:
! 89: * - swap device in use (could be disabled, preventing future use)
! 90: * - swap enabled (allows new allocations on swap)
! 91: * - map info in /dev/drum
! 92: * - vnode pointer
! 93: * for swap files only:
! 94: * - block size
! 95: * - max byte count in buffer
! 96: * - buffer
! 97: * - credentials to use when doing i/o to file
! 98: *
! 99: * userland controls and configures swap with the swapctl(2) system call.
! 100: * the sys_swapctl performs the following operations:
! 101: * [1] SWAP_NSWAP: returns the number of swap devices currently configured
! 102: * [2] SWAP_STATS: given a pointer to an array of swapent structures
! 103: * (passed in via "arg") of a size passed in via "misc" ... we load
! 104: * the current swap config into the array.
! 105: * [3] SWAP_ON: given a pathname in arg (could be device or file) and a
! 106: * priority in "misc", start swapping on it.
! 107: * [4] SWAP_OFF: as SWAP_ON, but stops swapping to a device
! 108: * [5] SWAP_CTL: changes the priority of a swap device (new priority in
! 109: * "misc")
! 110: */
! 111:
! 112: /*
! 113: * swapdev: describes a single swap partition/file
! 114: *
! 115: * note the following should be true:
! 116: * swd_inuse <= swd_nblks [number of blocks in use is <= total blocks]
! 117: * swd_nblks <= swd_mapsize [because mapsize includes miniroot+disklabel]
! 118: */
! 119: struct swapdev {
! 120: struct swapent swd_se;
! 121: #define swd_dev swd_se.se_dev /* device id */
! 122: #define swd_flags swd_se.se_flags /* flags:inuse/enable/fake */
! 123: #define swd_priority swd_se.se_priority /* our priority */
! 124: #define swd_inuse swd_se.se_inuse /* our priority */
! 125: #define swd_nblks swd_se.se_nblks /* our priority */
! 126: char *swd_path; /* saved pathname of device */
! 127: int swd_pathlen; /* length of pathname */
! 128: int swd_npages; /* #pages we can use */
! 129: int swd_npginuse; /* #pages in use */
! 130: int swd_npgbad; /* #pages bad */
! 131: int swd_drumoffset; /* page0 offset in drum */
! 132: int swd_drumsize; /* #pages in drum */
! 133: struct extent *swd_ex; /* extent for this swapdev */
! 134: char swd_exname[12]; /* name of extent above */
! 135: struct vnode *swd_vp; /* backing vnode */
! 136: CIRCLEQ_ENTRY(swapdev) swd_next; /* priority circleq */
! 137:
! 138: int swd_bsize; /* blocksize (bytes) */
! 139: int swd_maxactive; /* max active i/o reqs */
! 140: struct buf swd_tab; /* buffer list */
! 141: struct ucred *swd_cred; /* cred for file access */
! 142: #ifdef UVM_SWAP_ENCRYPT
! 143: #define SWD_KEY_SHIFT 7 /* One key per 0.5 MByte */
! 144: #define SWD_KEY(x,y) &((x)->swd_keys[((y) - (x)->swd_drumoffset) >> SWD_KEY_SHIFT])
! 145:
! 146: #define SWD_DCRYPT_SHIFT 5
! 147: #define SWD_DCRYPT_BITS 32
! 148: #define SWD_DCRYPT_MASK (SWD_DCRYPT_BITS - 1)
! 149: #define SWD_DCRYPT_OFF(x) ((x) >> SWD_DCRYPT_SHIFT)
! 150: #define SWD_DCRYPT_BIT(x) ((x) & SWD_DCRYPT_MASK)
! 151: #define SWD_DCRYPT_SIZE(x) (SWD_DCRYPT_OFF((x) + SWD_DCRYPT_MASK) * sizeof(u_int32_t))
! 152: u_int32_t *swd_decrypt; /* bitmap for decryption */
! 153: struct swap_key *swd_keys; /* keys for different parts */
! 154: int swd_nkeys; /* active keys */
! 155: #endif
! 156: };
! 157:
! 158: /*
! 159: * swap device priority entry; the list is kept sorted on `spi_priority'.
! 160: */
! 161: struct swappri {
! 162: int spi_priority; /* priority */
! 163: CIRCLEQ_HEAD(spi_swapdev, swapdev) spi_swapdev;
! 164: /* circleq of swapdevs at this priority */
! 165: LIST_ENTRY(swappri) spi_swappri; /* global list of pri's */
! 166: };
! 167:
! 168: /*
! 169: * The following two structures are used to keep track of data transfers
! 170: * on swap devices associated with regular files.
! 171: * NOTE: this code is more or less a copy of vnd.c; we use the same
! 172: * structure names here to ease porting..
! 173: */
! 174: struct vndxfer {
! 175: struct buf *vx_bp; /* Pointer to parent buffer */
! 176: struct swapdev *vx_sdp;
! 177: int vx_error;
! 178: int vx_pending; /* # of pending aux buffers */
! 179: int vx_flags;
! 180: #define VX_BUSY 1
! 181: #define VX_DEAD 2
! 182: };
! 183:
! 184: struct vndbuf {
! 185: struct buf vb_buf;
! 186: struct vndxfer *vb_xfer;
! 187: };
! 188:
! 189:
! 190: /*
! 191: * We keep a of pool vndbuf's and vndxfer structures.
! 192: */
! 193: struct pool vndxfer_pool;
! 194: struct pool vndbuf_pool;
! 195:
! 196: #define getvndxfer(vnx) do { \
! 197: int s = splbio(); \
! 198: vnx = pool_get(&vndxfer_pool, PR_WAITOK); \
! 199: splx(s); \
! 200: } while (0)
! 201:
! 202: #define putvndxfer(vnx) { \
! 203: pool_put(&vndxfer_pool, (void *)(vnx)); \
! 204: }
! 205:
! 206: #define getvndbuf(vbp) do { \
! 207: int s = splbio(); \
! 208: vbp = pool_get(&vndbuf_pool, PR_WAITOK); \
! 209: splx(s); \
! 210: } while (0)
! 211:
! 212: #define putvndbuf(vbp) { \
! 213: pool_put(&vndbuf_pool, (void *)(vbp)); \
! 214: }
! 215:
! 216: /* /dev/drum */
! 217: bdev_decl(sw);
! 218: cdev_decl(sw);
! 219:
! 220: /*
! 221: * local variables
! 222: */
! 223: static struct extent *swapmap; /* controls the mapping of /dev/drum */
! 224:
! 225: /* list of all active swap devices [by priority] */
! 226: LIST_HEAD(swap_priority, swappri);
! 227: static struct swap_priority swap_priority;
! 228:
! 229: /* locks */
! 230: struct rwlock swap_syscall_lock = RWLOCK_INITIALIZER("swplk");
! 231:
! 232: /*
! 233: * prototypes
! 234: */
! 235: static void swapdrum_add(struct swapdev *, int);
! 236: static struct swapdev *swapdrum_getsdp(int);
! 237:
! 238: static struct swapdev *swaplist_find(struct vnode *, int);
! 239: static void swaplist_insert(struct swapdev *,
! 240: struct swappri *, int);
! 241: static void swaplist_trim(void);
! 242:
! 243: static int swap_on(struct proc *, struct swapdev *);
! 244: static int swap_off(struct proc *, struct swapdev *);
! 245:
! 246: static void sw_reg_strategy(struct swapdev *, struct buf *, int);
! 247: static void sw_reg_iodone(struct buf *);
! 248: static void sw_reg_start(struct swapdev *);
! 249:
! 250: static int uvm_swap_io(struct vm_page **, int, int, int);
! 251:
! 252: static void swapmount(void);
! 253:
! 254: #ifdef UVM_SWAP_ENCRYPT
! 255: /* for swap encrypt */
! 256: boolean_t uvm_swap_allocpages(struct vm_page **, int);
! 257: void uvm_swap_markdecrypt(struct swapdev *, int, int, int);
! 258: boolean_t uvm_swap_needdecrypt(struct swapdev *, int);
! 259: void uvm_swap_initcrypt(struct swapdev *, int);
! 260: #endif
! 261:
! 262: /*
! 263: * uvm_swap_init: init the swap system data structures and locks
! 264: *
! 265: * => called at boot time from init_main.c after the filesystems
! 266: * are brought up (which happens after uvm_init())
! 267: */
! 268: void
! 269: uvm_swap_init()
! 270: {
! 271: UVMHIST_FUNC("uvm_swap_init");
! 272:
! 273: UVMHIST_CALLED(pdhist);
! 274: /*
! 275: * first, init the swap list, its counter, and its lock.
! 276: * then get a handle on the vnode for /dev/drum by using
! 277: * the its dev_t number ("swapdev", from MD conf.c).
! 278: */
! 279:
! 280: LIST_INIT(&swap_priority);
! 281: uvmexp.nswapdev = 0;
! 282: simple_lock_init(&uvm.swap_data_lock);
! 283:
! 284: if (!swapdev_vp && bdevvp(swapdev, &swapdev_vp))
! 285: panic("uvm_swap_init: can't get vnode for swap device");
! 286:
! 287: /*
! 288: * create swap block resource map to map /dev/drum. the range
! 289: * from 1 to INT_MAX allows 2 gigablocks of swap space. note
! 290: * that block 0 is reserved (used to indicate an allocation
! 291: * failure, or no allocation).
! 292: */
! 293: swapmap = extent_create("swapmap", 1, INT_MAX,
! 294: M_VMSWAP, 0, 0, EX_NOWAIT);
! 295: if (swapmap == 0)
! 296: panic("uvm_swap_init: extent_create failed");
! 297:
! 298: /*
! 299: * allocate pools for structures used for swapping to files.
! 300: */
! 301:
! 302:
! 303: pool_init(&vndxfer_pool, sizeof(struct vndxfer), 0, 0, 0, "swp vnx",
! 304: NULL);
! 305:
! 306: pool_init(&vndbuf_pool, sizeof(struct vndbuf), 0, 0, 0, "swp vnd",
! 307: NULL);
! 308:
! 309: /*
! 310: * Setup the initial swap partition
! 311: */
! 312: swapmount();
! 313:
! 314: /*
! 315: * done!
! 316: */
! 317: UVMHIST_LOG(pdhist, "<- done", 0, 0, 0, 0);
! 318: }
! 319:
! 320: #ifdef UVM_SWAP_ENCRYPT
! 321: void
! 322: uvm_swap_initcrypt_all(void)
! 323: {
! 324: struct swapdev *sdp;
! 325: struct swappri *spp;
! 326:
! 327: simple_lock(&uvm.swap_data_lock);
! 328:
! 329: LIST_FOREACH(spp, &swap_priority, spi_swappri) {
! 330: CIRCLEQ_FOREACH(sdp, &spp->spi_swapdev, swd_next)
! 331: if (sdp->swd_decrypt == NULL)
! 332: uvm_swap_initcrypt(sdp, sdp->swd_npages);
! 333: }
! 334: simple_unlock(&uvm.swap_data_lock);
! 335: }
! 336:
! 337: void
! 338: uvm_swap_initcrypt(struct swapdev *sdp, int npages)
! 339: {
! 340: /*
! 341: * keep information if a page needs to be decrypted when we get it
! 342: * from the swap device.
! 343: * We cannot chance a malloc later, if we are doing ASYNC puts,
! 344: * we may not call malloc with M_WAITOK. This consumes only
! 345: * 8KB memory for a 256MB swap partition.
! 346: */
! 347: sdp->swd_decrypt = malloc(SWD_DCRYPT_SIZE(npages), M_VMSWAP, M_WAITOK);
! 348: memset(sdp->swd_decrypt, 0, SWD_DCRYPT_SIZE(npages));
! 349: sdp->swd_keys = malloc((npages >> SWD_KEY_SHIFT) * sizeof(struct swap_key),
! 350: M_VMSWAP, M_WAITOK);
! 351: memset(sdp->swd_keys, 0, (npages >> SWD_KEY_SHIFT) * sizeof(struct swap_key));
! 352: sdp->swd_nkeys = 0;
! 353: }
! 354:
! 355: boolean_t
! 356: uvm_swap_allocpages(struct vm_page **pps, int npages)
! 357: {
! 358: int i, s;
! 359: int minus, reserve;
! 360: boolean_t fail;
! 361:
! 362: /* Estimate if we will succeed */
! 363: s = uvm_lock_fpageq();
! 364:
! 365: minus = uvmexp.free - npages;
! 366: reserve = uvmexp.reserve_kernel;
! 367: fail = uvmexp.free - npages < uvmexp.reserve_kernel;
! 368:
! 369: uvm_unlock_fpageq(s);
! 370:
! 371: if (fail)
! 372: return FALSE;
! 373:
! 374: /* Get new pages */
! 375: for (i = 0; i < npages; i++) {
! 376: pps[i] = uvm_pagealloc(NULL, 0, NULL, 0);
! 377: if (pps[i] == NULL)
! 378: break;
! 379: }
! 380:
! 381: /* On failure free and return */
! 382: if (i < npages) {
! 383: uvm_swap_freepages(pps, i);
! 384: return FALSE;
! 385: }
! 386:
! 387: return TRUE;
! 388: }
! 389:
! 390: void
! 391: uvm_swap_freepages(struct vm_page **pps, int npages)
! 392: {
! 393: int i;
! 394:
! 395: uvm_lock_pageq();
! 396: for (i = 0; i < npages; i++)
! 397: uvm_pagefree(pps[i]);
! 398: uvm_unlock_pageq();
! 399: }
! 400:
! 401: /*
! 402: * Mark pages on the swap device for later decryption
! 403: */
! 404:
! 405: void
! 406: uvm_swap_markdecrypt(struct swapdev *sdp, int startslot, int npages,
! 407: int decrypt)
! 408: {
! 409: int pagestart, i;
! 410: int off, bit;
! 411:
! 412: if (!sdp)
! 413: return;
! 414:
! 415: pagestart = startslot - sdp->swd_drumoffset;
! 416: for (i = 0; i < npages; i++, pagestart++) {
! 417: off = SWD_DCRYPT_OFF(pagestart);
! 418: bit = SWD_DCRYPT_BIT(pagestart);
! 419: if (decrypt)
! 420: /* pages read need decryption */
! 421: sdp->swd_decrypt[off] |= 1 << bit;
! 422: else
! 423: /* pages read do not need decryption */
! 424: sdp->swd_decrypt[off] &= ~(1 << bit);
! 425: }
! 426: }
! 427:
! 428: /*
! 429: * Check if the page that we got from disk needs to be decrypted
! 430: */
! 431:
! 432: boolean_t
! 433: uvm_swap_needdecrypt(struct swapdev *sdp, int off)
! 434: {
! 435: if (!sdp)
! 436: return FALSE;
! 437:
! 438: off -= sdp->swd_drumoffset;
! 439: return sdp->swd_decrypt[SWD_DCRYPT_OFF(off)] & (1 << SWD_DCRYPT_BIT(off)) ?
! 440: TRUE : FALSE;
! 441: }
! 442: #endif /* UVM_SWAP_ENCRYPT */
! 443: /*
! 444: * swaplist functions: functions that operate on the list of swap
! 445: * devices on the system.
! 446: */
! 447:
! 448: /*
! 449: * swaplist_insert: insert swap device "sdp" into the global list
! 450: *
! 451: * => caller must hold both swap_syscall_lock and uvm.swap_data_lock
! 452: * => caller must provide a newly malloc'd swappri structure (we will
! 453: * FREE it if we don't need it... this it to prevent malloc blocking
! 454: * here while adding swap)
! 455: */
! 456: static void
! 457: swaplist_insert(sdp, newspp, priority)
! 458: struct swapdev *sdp;
! 459: struct swappri *newspp;
! 460: int priority;
! 461: {
! 462: struct swappri *spp, *pspp;
! 463: UVMHIST_FUNC("swaplist_insert"); UVMHIST_CALLED(pdhist);
! 464:
! 465: /*
! 466: * find entry at or after which to insert the new device.
! 467: */
! 468: for (pspp = NULL, spp = LIST_FIRST(&swap_priority); spp != NULL;
! 469: spp = LIST_NEXT(spp, spi_swappri)) {
! 470: if (priority <= spp->spi_priority)
! 471: break;
! 472: pspp = spp;
! 473: }
! 474:
! 475: /*
! 476: * new priority?
! 477: */
! 478: if (spp == NULL || spp->spi_priority != priority) {
! 479: spp = newspp; /* use newspp! */
! 480: UVMHIST_LOG(pdhist, "created new swappri = %ld",
! 481: priority, 0, 0, 0);
! 482:
! 483: spp->spi_priority = priority;
! 484: CIRCLEQ_INIT(&spp->spi_swapdev);
! 485:
! 486: if (pspp)
! 487: LIST_INSERT_AFTER(pspp, spp, spi_swappri);
! 488: else
! 489: LIST_INSERT_HEAD(&swap_priority, spp, spi_swappri);
! 490: } else {
! 491: /* we don't need a new priority structure, free it */
! 492: FREE(newspp, M_VMSWAP);
! 493: }
! 494:
! 495: /*
! 496: * priority found (or created). now insert on the priority's
! 497: * circleq list and bump the total number of swapdevs.
! 498: */
! 499: sdp->swd_priority = priority;
! 500: CIRCLEQ_INSERT_TAIL(&spp->spi_swapdev, sdp, swd_next);
! 501: uvmexp.nswapdev++;
! 502: }
! 503:
! 504: /*
! 505: * swaplist_find: find and optionally remove a swap device from the
! 506: * global list.
! 507: *
! 508: * => caller must hold both swap_syscall_lock and uvm.swap_data_lock
! 509: * => we return the swapdev we found (and removed)
! 510: */
! 511: static struct swapdev *
! 512: swaplist_find(vp, remove)
! 513: struct vnode *vp;
! 514: boolean_t remove;
! 515: {
! 516: struct swapdev *sdp;
! 517: struct swappri *spp;
! 518:
! 519: /*
! 520: * search the lists for the requested vp
! 521: */
! 522: for (spp = LIST_FIRST(&swap_priority); spp != NULL;
! 523: spp = LIST_NEXT(spp, spi_swappri)) {
! 524: for (sdp = CIRCLEQ_FIRST(&spp->spi_swapdev);
! 525: sdp != (void *)&spp->spi_swapdev;
! 526: sdp = CIRCLEQ_NEXT(sdp, swd_next))
! 527: if (sdp->swd_vp == vp) {
! 528: if (remove) {
! 529: CIRCLEQ_REMOVE(&spp->spi_swapdev,
! 530: sdp, swd_next);
! 531: uvmexp.nswapdev--;
! 532: }
! 533: return(sdp);
! 534: }
! 535: }
! 536: return (NULL);
! 537: }
! 538:
! 539:
! 540: /*
! 541: * swaplist_trim: scan priority list for empty priority entries and kill
! 542: * them.
! 543: *
! 544: * => caller must hold both swap_syscall_lock and uvm.swap_data_lock
! 545: */
! 546: static void
! 547: swaplist_trim()
! 548: {
! 549: struct swappri *spp, *nextspp;
! 550:
! 551: for (spp = LIST_FIRST(&swap_priority); spp != NULL; spp = nextspp) {
! 552: nextspp = LIST_NEXT(spp, spi_swappri);
! 553: if (CIRCLEQ_FIRST(&spp->spi_swapdev) !=
! 554: (void *)&spp->spi_swapdev)
! 555: continue;
! 556: LIST_REMOVE(spp, spi_swappri);
! 557: free(spp, M_VMSWAP);
! 558: }
! 559: }
! 560:
! 561: /*
! 562: * swapdrum_add: add a "swapdev"'s blocks into /dev/drum's area.
! 563: *
! 564: * => caller must hold swap_syscall_lock
! 565: * => uvm.swap_data_lock should be unlocked (we may sleep)
! 566: */
! 567: static void
! 568: swapdrum_add(sdp, npages)
! 569: struct swapdev *sdp;
! 570: int npages;
! 571: {
! 572: u_long result;
! 573:
! 574: if (extent_alloc(swapmap, npages, EX_NOALIGN, 0, EX_NOBOUNDARY,
! 575: EX_WAITOK, &result))
! 576: panic("swapdrum_add");
! 577:
! 578: sdp->swd_drumoffset = result;
! 579: sdp->swd_drumsize = npages;
! 580: }
! 581:
! 582: /*
! 583: * swapdrum_getsdp: given a page offset in /dev/drum, convert it back
! 584: * to the "swapdev" that maps that section of the drum.
! 585: *
! 586: * => each swapdev takes one big contig chunk of the drum
! 587: * => caller must hold uvm.swap_data_lock
! 588: */
! 589: static struct swapdev *
! 590: swapdrum_getsdp(pgno)
! 591: int pgno;
! 592: {
! 593: struct swapdev *sdp;
! 594: struct swappri *spp;
! 595:
! 596: for (spp = LIST_FIRST(&swap_priority); spp != NULL;
! 597: spp = LIST_NEXT(spp, spi_swappri))
! 598: for (sdp = CIRCLEQ_FIRST(&spp->spi_swapdev);
! 599: sdp != (void *)&spp->spi_swapdev;
! 600: sdp = CIRCLEQ_NEXT(sdp, swd_next))
! 601: if (pgno >= sdp->swd_drumoffset &&
! 602: pgno < (sdp->swd_drumoffset + sdp->swd_drumsize)) {
! 603: return sdp;
! 604: }
! 605: return NULL;
! 606: }
! 607:
! 608:
! 609: /*
! 610: * sys_swapctl: main entry point for swapctl(2) system call
! 611: * [with two helper functions: swap_on and swap_off]
! 612: */
! 613: int
! 614: sys_swapctl(p, v, retval)
! 615: struct proc *p;
! 616: void *v;
! 617: register_t *retval;
! 618: {
! 619: struct sys_swapctl_args /* {
! 620: syscallarg(int) cmd;
! 621: syscallarg(void *) arg;
! 622: syscallarg(int) misc;
! 623: } */ *uap = (struct sys_swapctl_args *)v;
! 624: struct vnode *vp;
! 625: struct nameidata nd;
! 626: struct swappri *spp;
! 627: struct swapdev *sdp;
! 628: struct swapent *sep;
! 629: char userpath[MAXPATHLEN];
! 630: size_t len;
! 631: int count, error, misc;
! 632: int priority;
! 633: UVMHIST_FUNC("sys_swapctl"); UVMHIST_CALLED(pdhist);
! 634:
! 635: misc = SCARG(uap, misc);
! 636:
! 637: /*
! 638: * ensure serialized syscall access by grabbing the swap_syscall_lock
! 639: */
! 640: rw_enter_write(&swap_syscall_lock);
! 641:
! 642: /*
! 643: * we handle the non-priv NSWAP and STATS request first.
! 644: *
! 645: * SWAP_NSWAP: return number of config'd swap devices
! 646: * [can also be obtained with uvmexp sysctl]
! 647: */
! 648: if (SCARG(uap, cmd) == SWAP_NSWAP) {
! 649: UVMHIST_LOG(pdhist, "<- done SWAP_NSWAP=%ld", uvmexp.nswapdev,
! 650: 0, 0, 0);
! 651: *retval = uvmexp.nswapdev;
! 652: error = 0;
! 653: goto out;
! 654: }
! 655:
! 656: /*
! 657: * SWAP_STATS: get stats on current # of configured swap devs
! 658: *
! 659: * note that the swap_priority list can't change as long
! 660: * as we are holding the swap_syscall_lock. we don't want
! 661: * to grab the uvm.swap_data_lock because we may fault&sleep during
! 662: * copyout() and we don't want to be holding that lock then!
! 663: */
! 664: if (SCARG(uap, cmd) == SWAP_STATS
! 665: #if defined(COMPAT_13)
! 666: || SCARG(uap, cmd) == SWAP_OSTATS
! 667: #endif
! 668: ) {
! 669: sep = (struct swapent *)SCARG(uap, arg);
! 670: count = 0;
! 671:
! 672: for (spp = LIST_FIRST(&swap_priority); spp != NULL;
! 673: spp = LIST_NEXT(spp, spi_swappri)) {
! 674: for (sdp = CIRCLEQ_FIRST(&spp->spi_swapdev);
! 675: sdp != (void *)&spp->spi_swapdev && misc-- > 0;
! 676: sdp = CIRCLEQ_NEXT(sdp, swd_next)) {
! 677: sdp->swd_inuse =
! 678: btodb((u_int64_t)sdp->swd_npginuse <<
! 679: PAGE_SHIFT);
! 680: error = copyout(&sdp->swd_se, sep,
! 681: sizeof(struct swapent));
! 682:
! 683: /* now copy out the path if necessary */
! 684: #if defined(COMPAT_13)
! 685: if (error == 0 && SCARG(uap, cmd) == SWAP_STATS)
! 686: #else
! 687: if (error == 0)
! 688: #endif
! 689: error = copyout(sdp->swd_path,
! 690: &sep->se_path, sdp->swd_pathlen);
! 691:
! 692: if (error)
! 693: goto out;
! 694: count++;
! 695: #if defined(COMPAT_13)
! 696: if (SCARG(uap, cmd) == SWAP_OSTATS)
! 697: ((struct oswapent *)sep)++;
! 698: else
! 699: #endif
! 700: sep++;
! 701: }
! 702: }
! 703:
! 704: UVMHIST_LOG(pdhist, "<- done SWAP_STATS", 0, 0, 0, 0);
! 705:
! 706: *retval = count;
! 707: error = 0;
! 708: goto out;
! 709: }
! 710:
! 711: /*
! 712: * all other requests require superuser privs. verify.
! 713: */
! 714: if ((error = suser(p, 0)))
! 715: goto out;
! 716:
! 717: /*
! 718: * at this point we expect a path name in arg. we will
! 719: * use namei() to gain a vnode reference (vref), and lock
! 720: * the vnode (VOP_LOCK).
! 721: *
! 722: * XXX: a NULL arg means use the root vnode pointer (e.g. for
! 723: * miniroot)
! 724: */
! 725: if (SCARG(uap, arg) == NULL) {
! 726: vp = rootvp; /* miniroot */
! 727: if (vget(vp, LK_EXCLUSIVE, p)) {
! 728: error = EBUSY;
! 729: goto out;
! 730: }
! 731: if (SCARG(uap, cmd) == SWAP_ON &&
! 732: copystr("miniroot", userpath, sizeof userpath, &len))
! 733: panic("swapctl: miniroot copy failed");
! 734: } else {
! 735: int space;
! 736: char *where;
! 737:
! 738: if (SCARG(uap, cmd) == SWAP_ON) {
! 739: if ((error = copyinstr(SCARG(uap, arg), userpath,
! 740: sizeof userpath, &len)))
! 741: goto out;
! 742: space = UIO_SYSSPACE;
! 743: where = userpath;
! 744: } else {
! 745: space = UIO_USERSPACE;
! 746: where = (char *)SCARG(uap, arg);
! 747: }
! 748: NDINIT(&nd, LOOKUP, FOLLOW|LOCKLEAF, space, where, p);
! 749: if ((error = namei(&nd)))
! 750: goto out;
! 751: vp = nd.ni_vp;
! 752: }
! 753: /* note: "vp" is referenced and locked */
! 754:
! 755: error = 0; /* assume no error */
! 756: switch(SCARG(uap, cmd)) {
! 757:
! 758: case SWAP_DUMPDEV:
! 759: if (vp->v_type != VBLK) {
! 760: error = ENOTBLK;
! 761: break;
! 762: }
! 763: dumpdev = vp->v_rdev;
! 764: break;
! 765:
! 766: case SWAP_CTL:
! 767: /*
! 768: * get new priority, remove old entry (if any) and then
! 769: * reinsert it in the correct place. finally, prune out
! 770: * any empty priority structures.
! 771: */
! 772: priority = SCARG(uap, misc);
! 773: spp = malloc(sizeof *spp, M_VMSWAP, M_WAITOK);
! 774: simple_lock(&uvm.swap_data_lock);
! 775: if ((sdp = swaplist_find(vp, 1)) == NULL) {
! 776: error = ENOENT;
! 777: } else {
! 778: swaplist_insert(sdp, spp, priority);
! 779: swaplist_trim();
! 780: }
! 781: simple_unlock(&uvm.swap_data_lock);
! 782: if (error)
! 783: free(spp, M_VMSWAP);
! 784: break;
! 785:
! 786: case SWAP_ON:
! 787:
! 788: /*
! 789: * check for duplicates. if none found, then insert a
! 790: * dummy entry on the list to prevent someone else from
! 791: * trying to enable this device while we are working on
! 792: * it.
! 793: */
! 794:
! 795: priority = SCARG(uap, misc);
! 796: simple_lock(&uvm.swap_data_lock);
! 797: if ((sdp = swaplist_find(vp, 0)) != NULL) {
! 798: error = EBUSY;
! 799: simple_unlock(&uvm.swap_data_lock);
! 800: break;
! 801: }
! 802: sdp = malloc(sizeof *sdp, M_VMSWAP, M_WAITOK);
! 803: spp = malloc(sizeof *spp, M_VMSWAP, M_WAITOK);
! 804: memset(sdp, 0, sizeof(*sdp));
! 805: sdp->swd_flags = SWF_FAKE; /* placeholder only */
! 806: sdp->swd_vp = vp;
! 807: sdp->swd_dev = (vp->v_type == VBLK) ? vp->v_rdev : NODEV;
! 808:
! 809: /*
! 810: * XXX Is NFS elaboration necessary?
! 811: */
! 812: if (vp->v_type == VREG) {
! 813: sdp->swd_cred = crdup(p->p_ucred);
! 814: }
! 815:
! 816: swaplist_insert(sdp, spp, priority);
! 817: simple_unlock(&uvm.swap_data_lock);
! 818:
! 819: sdp->swd_pathlen = len;
! 820: sdp->swd_path = malloc(sdp->swd_pathlen, M_VMSWAP, M_WAITOK);
! 821: if (copystr(userpath, sdp->swd_path, sdp->swd_pathlen, 0) != 0)
! 822: panic("swapctl: copystr");
! 823:
! 824: /*
! 825: * we've now got a FAKE placeholder in the swap list.
! 826: * now attempt to enable swap on it. if we fail, undo
! 827: * what we've done and kill the fake entry we just inserted.
! 828: * if swap_on is a success, it will clear the SWF_FAKE flag
! 829: */
! 830:
! 831: if ((error = swap_on(p, sdp)) != 0) {
! 832: simple_lock(&uvm.swap_data_lock);
! 833: (void) swaplist_find(vp, 1); /* kill fake entry */
! 834: swaplist_trim();
! 835: simple_unlock(&uvm.swap_data_lock);
! 836: if (vp->v_type == VREG) {
! 837: crfree(sdp->swd_cred);
! 838: }
! 839: free(sdp->swd_path, M_VMSWAP);
! 840: free(sdp, M_VMSWAP);
! 841: break;
! 842: }
! 843: break;
! 844:
! 845: case SWAP_OFF:
! 846: simple_lock(&uvm.swap_data_lock);
! 847: if ((sdp = swaplist_find(vp, 0)) == NULL) {
! 848: simple_unlock(&uvm.swap_data_lock);
! 849: error = ENXIO;
! 850: break;
! 851: }
! 852:
! 853: /*
! 854: * If a device isn't in use or enabled, we
! 855: * can't stop swapping from it (again).
! 856: */
! 857: if ((sdp->swd_flags & (SWF_INUSE|SWF_ENABLE)) == 0) {
! 858: simple_unlock(&uvm.swap_data_lock);
! 859: error = EBUSY;
! 860: break;
! 861: }
! 862:
! 863: /*
! 864: * do the real work.
! 865: */
! 866: error = swap_off(p, sdp);
! 867: break;
! 868:
! 869: default:
! 870: error = EINVAL;
! 871: }
! 872:
! 873: /*
! 874: * done! release the ref gained by namei() and unlock.
! 875: */
! 876: vput(vp);
! 877:
! 878: out:
! 879: rw_exit_write(&swap_syscall_lock);
! 880:
! 881: UVMHIST_LOG(pdhist, "<- done! error=%ld", error, 0, 0, 0);
! 882: return (error);
! 883: }
! 884:
! 885: /*
! 886: * swap_on: attempt to enable a swapdev for swapping. note that the
! 887: * swapdev is already on the global list, but disabled (marked
! 888: * SWF_FAKE).
! 889: *
! 890: * => we avoid the start of the disk (to protect disk labels)
! 891: * => we also avoid the miniroot, if we are swapping to root.
! 892: * => caller should leave uvm.swap_data_lock unlocked, we may lock it
! 893: * if needed.
! 894: */
! 895: static int
! 896: swap_on(p, sdp)
! 897: struct proc *p;
! 898: struct swapdev *sdp;
! 899: {
! 900: static int count = 0; /* static */
! 901: struct vnode *vp;
! 902: int error, npages, nblocks, size;
! 903: long addr;
! 904: struct vattr va;
! 905: #if defined(NFSCLIENT)
! 906: extern int (**nfsv2_vnodeop_p)(void *);
! 907: #endif /* defined(NFSCLIENT) */
! 908: dev_t dev;
! 909: UVMHIST_FUNC("swap_on"); UVMHIST_CALLED(pdhist);
! 910:
! 911: /*
! 912: * we want to enable swapping on sdp. the swd_vp contains
! 913: * the vnode we want (locked and ref'd), and the swd_dev
! 914: * contains the dev_t of the file, if it a block device.
! 915: */
! 916:
! 917: vp = sdp->swd_vp;
! 918: dev = sdp->swd_dev;
! 919:
! 920: /*
! 921: * open the swap file (mostly useful for block device files to
! 922: * let device driver know what is up).
! 923: *
! 924: * we skip the open/close for root on swap because the root
! 925: * has already been opened when root was mounted (mountroot).
! 926: */
! 927: if (vp != rootvp) {
! 928: if ((error = VOP_OPEN(vp, FREAD|FWRITE, p->p_ucred, p)))
! 929: return (error);
! 930: }
! 931:
! 932: /* XXX this only works for block devices */
! 933: UVMHIST_LOG(pdhist, " dev=%ld, major(dev)=%ld", dev, major(dev), 0,0);
! 934:
! 935: /*
! 936: * we now need to determine the size of the swap area. for
! 937: * block specials we can call the d_psize function.
! 938: * for normal files, we must stat [get attrs].
! 939: *
! 940: * we put the result in nblks.
! 941: * for normal files, we also want the filesystem block size
! 942: * (which we get with statfs).
! 943: */
! 944: switch (vp->v_type) {
! 945: case VBLK:
! 946: if (bdevsw[major(dev)].d_psize == 0 ||
! 947: (nblocks = (*bdevsw[major(dev)].d_psize)(dev)) == -1) {
! 948: error = ENXIO;
! 949: goto bad;
! 950: }
! 951: break;
! 952:
! 953: case VREG:
! 954: if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)))
! 955: goto bad;
! 956: nblocks = (int)btodb(va.va_size);
! 957: if ((error =
! 958: VFS_STATFS(vp->v_mount, &vp->v_mount->mnt_stat, p)) != 0)
! 959: goto bad;
! 960:
! 961: sdp->swd_bsize = vp->v_mount->mnt_stat.f_iosize;
! 962: /*
! 963: * limit the max # of outstanding I/O requests we issue
! 964: * at any one time. take it easy on NFS servers.
! 965: */
! 966: #if defined(NFSCLIENT)
! 967: if (vp->v_op == nfsv2_vnodeop_p)
! 968: sdp->swd_maxactive = 2; /* XXX */
! 969: else
! 970: #endif /* defined(NFSCLIENT) */
! 971: sdp->swd_maxactive = 8; /* XXX */
! 972: break;
! 973:
! 974: default:
! 975: error = ENXIO;
! 976: goto bad;
! 977: }
! 978:
! 979: /*
! 980: * save nblocks in a safe place and convert to pages.
! 981: */
! 982:
! 983: sdp->swd_nblks = nblocks;
! 984: npages = dbtob((u_int64_t)nblocks) >> PAGE_SHIFT;
! 985:
! 986: /*
! 987: * for block special files, we want to make sure that leave
! 988: * the disklabel and bootblocks alone, so we arrange to skip
! 989: * over them (arbitrarily choosing to skip PAGE_SIZE bytes).
! 990: * note that because of this the "size" can be less than the
! 991: * actual number of blocks on the device.
! 992: */
! 993: if (vp->v_type == VBLK) {
! 994: /* we use pages 1 to (size - 1) [inclusive] */
! 995: size = npages - 1;
! 996: addr = 1;
! 997: } else {
! 998: /* we use pages 0 to (size - 1) [inclusive] */
! 999: size = npages;
! 1000: addr = 0;
! 1001: }
! 1002:
! 1003: /*
! 1004: * make sure we have enough blocks for a reasonable sized swap
! 1005: * area. we want at least one page.
! 1006: */
! 1007:
! 1008: if (size < 1) {
! 1009: UVMHIST_LOG(pdhist, " size <= 1!!", 0, 0, 0, 0);
! 1010: error = EINVAL;
! 1011: goto bad;
! 1012: }
! 1013:
! 1014: UVMHIST_LOG(pdhist, " dev=%lx: size=%ld addr=0x%lx\n",
! 1015: dev, size, addr, 0);
! 1016:
! 1017: /*
! 1018: * now we need to allocate an extent to manage this swap device
! 1019: */
! 1020: snprintf(sdp->swd_exname, sizeof(sdp->swd_exname), "swap0x%04x",
! 1021: count++);
! 1022:
! 1023: /* note that extent_create's 3rd arg is inclusive, thus "- 1" */
! 1024: sdp->swd_ex = extent_create(sdp->swd_exname, 0, npages - 1, M_VMSWAP,
! 1025: 0, 0, EX_WAITOK);
! 1026: /* allocate the `saved' region from the extent so it won't be used */
! 1027: if (addr) {
! 1028: if (extent_alloc_region(sdp->swd_ex, 0, addr, EX_WAITOK))
! 1029: panic("disklabel region");
! 1030: }
! 1031:
! 1032: /*
! 1033: * if the vnode we are swapping to is the root vnode
! 1034: * (i.e. we are swapping to the miniroot) then we want
! 1035: * to make sure we don't overwrite it. do a statfs to
! 1036: * find its size and skip over it.
! 1037: */
! 1038: if (vp == rootvp) {
! 1039: struct mount *mp;
! 1040: struct statfs *sp;
! 1041: int rootblocks, rootpages;
! 1042:
! 1043: mp = rootvnode->v_mount;
! 1044: sp = &mp->mnt_stat;
! 1045: rootblocks = sp->f_blocks * btodb(sp->f_bsize);
! 1046: rootpages = round_page(dbtob(rootblocks)) >> PAGE_SHIFT;
! 1047: if (rootpages > size)
! 1048: panic("swap_on: miniroot larger than swap?");
! 1049:
! 1050: if (extent_alloc_region(sdp->swd_ex, addr,
! 1051: rootpages, EX_WAITOK))
! 1052: panic("swap_on: unable to preserve miniroot");
! 1053:
! 1054: size -= rootpages;
! 1055: printf("Preserved %d pages of miniroot ", rootpages);
! 1056: printf("leaving %d pages of swap\n", size);
! 1057: }
! 1058:
! 1059: /*
! 1060: * add a ref to vp to reflect usage as a swap device.
! 1061: */
! 1062: vref(vp);
! 1063:
! 1064: #ifdef UVM_SWAP_ENCRYPT
! 1065: if (uvm_doswapencrypt)
! 1066: uvm_swap_initcrypt(sdp, npages);
! 1067: #endif
! 1068: /*
! 1069: * now add the new swapdev to the drum and enable.
! 1070: */
! 1071: simple_lock(&uvm.swap_data_lock);
! 1072: swapdrum_add(sdp, npages);
! 1073: sdp->swd_npages = size;
! 1074: sdp->swd_flags &= ~SWF_FAKE; /* going live */
! 1075: sdp->swd_flags |= (SWF_INUSE|SWF_ENABLE);
! 1076: uvmexp.swpages += size;
! 1077: simple_unlock(&uvm.swap_data_lock);
! 1078: return (0);
! 1079:
! 1080: bad:
! 1081: /*
! 1082: * failure: close device if necessary and return error.
! 1083: */
! 1084: if (vp != rootvp)
! 1085: (void)VOP_CLOSE(vp, FREAD|FWRITE, p->p_ucred, p);
! 1086: return (error);
! 1087: }
! 1088:
! 1089: /*
! 1090: * swap_off: stop swapping on swapdev
! 1091: *
! 1092: * => swap data should be locked, we will unlock.
! 1093: */
! 1094: static int
! 1095: swap_off(p, sdp)
! 1096: struct proc *p;
! 1097: struct swapdev *sdp;
! 1098: {
! 1099: int error;
! 1100: UVMHIST_FUNC("swap_off"); UVMHIST_CALLED(pdhist);
! 1101: UVMHIST_LOG(pdhist, " dev=%lx", sdp->swd_dev,0,0,0);
! 1102:
! 1103: /* disable the swap area being removed */
! 1104: sdp->swd_flags &= ~SWF_ENABLE;
! 1105: simple_unlock(&uvm.swap_data_lock);
! 1106:
! 1107: /*
! 1108: * the idea is to find all the pages that are paged out to this
! 1109: * device, and page them all in. in uvm, swap-backed pageable
! 1110: * memory can take two forms: aobjs and anons. call the
! 1111: * swapoff hook for each subsystem to bring in pages.
! 1112: */
! 1113:
! 1114: if (uao_swap_off(sdp->swd_drumoffset,
! 1115: sdp->swd_drumoffset + sdp->swd_drumsize) ||
! 1116: amap_swap_off(sdp->swd_drumoffset,
! 1117: sdp->swd_drumoffset + sdp->swd_drumsize)) {
! 1118:
! 1119: error = ENOMEM;
! 1120: } else if (sdp->swd_npginuse > sdp->swd_npgbad) {
! 1121: error = EBUSY;
! 1122: }
! 1123:
! 1124: if (error) {
! 1125: simple_lock(&uvm.swap_data_lock);
! 1126: sdp->swd_flags |= SWF_ENABLE;
! 1127: simple_unlock(&uvm.swap_data_lock);
! 1128: return (error);
! 1129: }
! 1130:
! 1131: /*
! 1132: * done with the vnode and saved creds.
! 1133: * drop our ref on the vnode before calling VOP_CLOSE()
! 1134: * so that spec_close() can tell if this is the last close.
! 1135: */
! 1136: if (sdp->swd_vp->v_type == VREG) {
! 1137: crfree(sdp->swd_cred);
! 1138: }
! 1139: vrele(sdp->swd_vp);
! 1140: if (sdp->swd_vp != rootvp) {
! 1141: (void) VOP_CLOSE(sdp->swd_vp, FREAD|FWRITE, p->p_ucred, p);
! 1142: }
! 1143:
! 1144: simple_lock(&uvm.swap_data_lock);
! 1145: uvmexp.swpages -= sdp->swd_npages;
! 1146:
! 1147: if (swaplist_find(sdp->swd_vp, 1) == NULL)
! 1148: panic("swap_off: swapdev not in list");
! 1149: swaplist_trim();
! 1150:
! 1151: /*
! 1152: * free all resources!
! 1153: */
! 1154: extent_free(swapmap, sdp->swd_drumoffset, sdp->swd_drumsize,
! 1155: EX_WAITOK);
! 1156: extent_destroy(sdp->swd_ex);
! 1157: free(sdp, M_VMSWAP);
! 1158: simple_unlock(&uvm.swap_data_lock);
! 1159: return (0);
! 1160: }
! 1161:
! 1162: /*
! 1163: * /dev/drum interface and i/o functions
! 1164: */
! 1165:
! 1166: /*
! 1167: * swread: the read function for the drum (just a call to physio)
! 1168: */
! 1169: /*ARGSUSED*/
! 1170: int
! 1171: swread(dev, uio, ioflag)
! 1172: dev_t dev;
! 1173: struct uio *uio;
! 1174: int ioflag;
! 1175: {
! 1176: UVMHIST_FUNC("swread"); UVMHIST_CALLED(pdhist);
! 1177:
! 1178: UVMHIST_LOG(pdhist, " dev=%lx offset=%lx",
! 1179: dev, (u_long)uio->uio_offset, 0, 0);
! 1180: return (physio(swstrategy, NULL, dev, B_READ, minphys, uio));
! 1181: }
! 1182:
! 1183: /*
! 1184: * swwrite: the write function for the drum (just a call to physio)
! 1185: */
! 1186: /*ARGSUSED*/
! 1187: int
! 1188: swwrite(dev, uio, ioflag)
! 1189: dev_t dev;
! 1190: struct uio *uio;
! 1191: int ioflag;
! 1192: {
! 1193: UVMHIST_FUNC("swwrite"); UVMHIST_CALLED(pdhist);
! 1194:
! 1195: UVMHIST_LOG(pdhist, " dev=%lx offset=%lx",
! 1196: dev, (u_long)uio->uio_offset, 0, 0);
! 1197: return (physio(swstrategy, NULL, dev, B_WRITE, minphys, uio));
! 1198: }
! 1199:
! 1200: /*
! 1201: * swstrategy: perform I/O on the drum
! 1202: *
! 1203: * => we must map the i/o request from the drum to the correct swapdev.
! 1204: */
! 1205: void
! 1206: swstrategy(bp)
! 1207: struct buf *bp;
! 1208: {
! 1209: struct swapdev *sdp;
! 1210: int s, pageno, bn;
! 1211: UVMHIST_FUNC("swstrategy"); UVMHIST_CALLED(pdhist);
! 1212:
! 1213: /*
! 1214: * convert block number to swapdev. note that swapdev can't
! 1215: * be yanked out from under us because we are holding resources
! 1216: * in it (i.e. the blocks we are doing I/O on).
! 1217: */
! 1218: pageno = dbtob((int64_t)bp->b_blkno) >> PAGE_SHIFT;
! 1219: simple_lock(&uvm.swap_data_lock);
! 1220: sdp = swapdrum_getsdp(pageno);
! 1221: simple_unlock(&uvm.swap_data_lock);
! 1222: if (sdp == NULL) {
! 1223: bp->b_error = EINVAL;
! 1224: bp->b_flags |= B_ERROR;
! 1225: s = splbio();
! 1226: biodone(bp);
! 1227: splx(s);
! 1228: UVMHIST_LOG(pdhist, " failed to get swap device", 0, 0, 0, 0);
! 1229: return;
! 1230: }
! 1231:
! 1232: /*
! 1233: * convert drum page number to block number on this swapdev.
! 1234: */
! 1235:
! 1236: pageno -= sdp->swd_drumoffset; /* page # on swapdev */
! 1237: bn = btodb((u_int64_t)pageno << PAGE_SHIFT); /* convert to diskblock */
! 1238:
! 1239: UVMHIST_LOG(pdhist, " %s: mapoff=%lx bn=0x%lx bcount=%ld",
! 1240: ((bp->b_flags & B_READ) == 0) ? "write" : "read",
! 1241: sdp->swd_drumoffset, bn, bp->b_bcount);
! 1242:
! 1243: /*
! 1244: * for block devices we finish up here.
! 1245: * for regular files we have to do more work which we delegate
! 1246: * to sw_reg_strategy().
! 1247: */
! 1248:
! 1249: switch (sdp->swd_vp->v_type) {
! 1250: default:
! 1251: panic("swstrategy: vnode type 0x%x", sdp->swd_vp->v_type);
! 1252:
! 1253: case VBLK:
! 1254:
! 1255: /*
! 1256: * must convert "bp" from an I/O on /dev/drum to an I/O
! 1257: * on the swapdev (sdp).
! 1258: */
! 1259: s = splbio();
! 1260: buf_replacevnode(bp, sdp->swd_vp);
! 1261:
! 1262: bp->b_blkno = bn;
! 1263: splx(s);
! 1264: VOP_STRATEGY(bp);
! 1265: return;
! 1266:
! 1267: case VREG:
! 1268: /*
! 1269: * delegate to sw_reg_strategy function.
! 1270: */
! 1271: sw_reg_strategy(sdp, bp, bn);
! 1272: return;
! 1273: }
! 1274: /* NOTREACHED */
! 1275: }
! 1276:
! 1277: /*
! 1278: * sw_reg_strategy: handle swap i/o to regular files
! 1279: */
! 1280: static void
! 1281: sw_reg_strategy(sdp, bp, bn)
! 1282: struct swapdev *sdp;
! 1283: struct buf *bp;
! 1284: int bn;
! 1285: {
! 1286: struct vnode *vp;
! 1287: struct vndxfer *vnx;
! 1288: daddr64_t nbn;
! 1289: caddr_t addr;
! 1290: off_t byteoff;
! 1291: int s, off, nra, error, sz, resid;
! 1292: UVMHIST_FUNC("sw_reg_strategy"); UVMHIST_CALLED(pdhist);
! 1293:
! 1294: /*
! 1295: * allocate a vndxfer head for this transfer and point it to
! 1296: * our buffer.
! 1297: */
! 1298: getvndxfer(vnx);
! 1299: vnx->vx_flags = VX_BUSY;
! 1300: vnx->vx_error = 0;
! 1301: vnx->vx_pending = 0;
! 1302: vnx->vx_bp = bp;
! 1303: vnx->vx_sdp = sdp;
! 1304:
! 1305: /*
! 1306: * setup for main loop where we read filesystem blocks into
! 1307: * our buffer.
! 1308: */
! 1309: error = 0;
! 1310: bp->b_resid = bp->b_bcount; /* nothing transferred yet! */
! 1311: addr = bp->b_data; /* current position in buffer */
! 1312: byteoff = dbtob((u_int64_t)bn);
! 1313:
! 1314: for (resid = bp->b_resid; resid; resid -= sz) {
! 1315: struct vndbuf *nbp;
! 1316:
! 1317: /*
! 1318: * translate byteoffset into block number. return values:
! 1319: * vp = vnode of underlying device
! 1320: * nbn = new block number (on underlying vnode dev)
! 1321: * nra = num blocks we can read-ahead (excludes requested
! 1322: * block)
! 1323: */
! 1324: nra = 0;
! 1325: error = VOP_BMAP(sdp->swd_vp, byteoff / sdp->swd_bsize,
! 1326: &vp, &nbn, &nra);
! 1327:
! 1328: if (error == 0 && nbn == (daddr64_t)-1) {
! 1329: /*
! 1330: * this used to just set error, but that doesn't
! 1331: * do the right thing. Instead, it causes random
! 1332: * memory errors. The panic() should remain until
! 1333: * this condition doesn't destabilize the system.
! 1334: */
! 1335: #if 1
! 1336: panic("sw_reg_strategy: swap to sparse file");
! 1337: #else
! 1338: error = EIO; /* failure */
! 1339: #endif
! 1340: }
! 1341:
! 1342: /*
! 1343: * punt if there was an error or a hole in the file.
! 1344: * we must wait for any i/o ops we have already started
! 1345: * to finish before returning.
! 1346: *
! 1347: * XXX we could deal with holes here but it would be
! 1348: * a hassle (in the write case).
! 1349: */
! 1350: if (error) {
! 1351: s = splbio();
! 1352: vnx->vx_error = error; /* pass error up */
! 1353: goto out;
! 1354: }
! 1355:
! 1356: /*
! 1357: * compute the size ("sz") of this transfer (in bytes).
! 1358: */
! 1359: off = byteoff % sdp->swd_bsize;
! 1360: sz = (1 + nra) * sdp->swd_bsize - off;
! 1361: if (sz > resid)
! 1362: sz = resid;
! 1363:
! 1364: UVMHIST_LOG(pdhist, "sw_reg_strategy: "
! 1365: "vp %p/%p offset 0x%lx/0x%llx",
! 1366: sdp->swd_vp, vp, (u_long)byteoff, nbn);
! 1367:
! 1368: /*
! 1369: * now get a buf structure. note that the vb_buf is
! 1370: * at the front of the nbp structure so that you can
! 1371: * cast pointers between the two structure easily.
! 1372: */
! 1373: getvndbuf(nbp);
! 1374: nbp->vb_buf.b_flags = bp->b_flags | B_CALL;
! 1375: nbp->vb_buf.b_bcount = sz;
! 1376: nbp->vb_buf.b_bufsize = sz;
! 1377: nbp->vb_buf.b_error = 0;
! 1378: nbp->vb_buf.b_data = addr;
! 1379: nbp->vb_buf.b_blkno = nbn + btodb(off);
! 1380: nbp->vb_buf.b_proc = bp->b_proc;
! 1381: nbp->vb_buf.b_iodone = sw_reg_iodone;
! 1382: nbp->vb_buf.b_vp = NULLVP;
! 1383: nbp->vb_buf.b_vnbufs.le_next = NOLIST;
! 1384: LIST_INIT(&nbp->vb_buf.b_dep);
! 1385:
! 1386: /*
! 1387: * set b_dirtyoff/end and b_validoff/end. this is
! 1388: * required by the NFS client code (otherwise it will
! 1389: * just discard our I/O request).
! 1390: */
! 1391: if (bp->b_dirtyend == 0) {
! 1392: nbp->vb_buf.b_dirtyoff = 0;
! 1393: nbp->vb_buf.b_dirtyend = sz;
! 1394: } else {
! 1395: nbp->vb_buf.b_dirtyoff =
! 1396: max(0, bp->b_dirtyoff - (bp->b_bcount-resid));
! 1397: nbp->vb_buf.b_dirtyend =
! 1398: min(sz,
! 1399: max(0, bp->b_dirtyend - (bp->b_bcount-resid)));
! 1400: }
! 1401: if (bp->b_validend == 0) {
! 1402: nbp->vb_buf.b_validoff = 0;
! 1403: nbp->vb_buf.b_validend = sz;
! 1404: } else {
! 1405: nbp->vb_buf.b_validoff =
! 1406: max(0, bp->b_validoff - (bp->b_bcount-resid));
! 1407: nbp->vb_buf.b_validend =
! 1408: min(sz,
! 1409: max(0, bp->b_validend - (bp->b_bcount-resid)));
! 1410: }
! 1411:
! 1412: nbp->vb_xfer = vnx; /* patch it back in to vnx */
! 1413:
! 1414: /*
! 1415: * Just sort by block number
! 1416: */
! 1417: nbp->vb_buf.b_cylinder = nbp->vb_buf.b_blkno;
! 1418: s = splbio();
! 1419: if (vnx->vx_error != 0) {
! 1420: putvndbuf(nbp);
! 1421: goto out;
! 1422: }
! 1423: vnx->vx_pending++;
! 1424:
! 1425: /* assoc new buffer with underlying vnode */
! 1426: bgetvp(vp, &nbp->vb_buf);
! 1427:
! 1428: /* sort it in and start I/O if we are not over our limit */
! 1429: disksort(&sdp->swd_tab, &nbp->vb_buf);
! 1430: sw_reg_start(sdp);
! 1431: splx(s);
! 1432:
! 1433: /*
! 1434: * advance to the next I/O
! 1435: */
! 1436: byteoff += sz;
! 1437: addr += sz;
! 1438: }
! 1439:
! 1440: s = splbio();
! 1441:
! 1442: out: /* Arrive here at splbio */
! 1443: vnx->vx_flags &= ~VX_BUSY;
! 1444: if (vnx->vx_pending == 0) {
! 1445: if (vnx->vx_error != 0) {
! 1446: bp->b_error = vnx->vx_error;
! 1447: bp->b_flags |= B_ERROR;
! 1448: }
! 1449: putvndxfer(vnx);
! 1450: biodone(bp);
! 1451: }
! 1452: splx(s);
! 1453: }
! 1454:
! 1455: /*
! 1456: * sw_reg_start: start an I/O request on the requested swapdev
! 1457: *
! 1458: * => reqs are sorted by disksort (above)
! 1459: */
! 1460: static void
! 1461: sw_reg_start(sdp)
! 1462: struct swapdev *sdp;
! 1463: {
! 1464: struct buf *bp;
! 1465: UVMHIST_FUNC("sw_reg_start"); UVMHIST_CALLED(pdhist);
! 1466:
! 1467: /* recursion control */
! 1468: if ((sdp->swd_flags & SWF_BUSY) != 0)
! 1469: return;
! 1470:
! 1471: sdp->swd_flags |= SWF_BUSY;
! 1472:
! 1473: while (sdp->swd_tab.b_active < sdp->swd_maxactive) {
! 1474: bp = sdp->swd_tab.b_actf;
! 1475: if (bp == NULL)
! 1476: break;
! 1477: sdp->swd_tab.b_actf = bp->b_actf;
! 1478: sdp->swd_tab.b_active++;
! 1479:
! 1480: UVMHIST_LOG(pdhist,
! 1481: "sw_reg_start: bp %p vp %p blkno 0x%lx cnt 0x%lx",
! 1482: bp, bp->b_vp, bp->b_blkno, bp->b_bcount);
! 1483: if ((bp->b_flags & B_READ) == 0)
! 1484: bp->b_vp->v_numoutput++;
! 1485:
! 1486: VOP_STRATEGY(bp);
! 1487: }
! 1488: sdp->swd_flags &= ~SWF_BUSY;
! 1489: }
! 1490:
! 1491: /*
! 1492: * sw_reg_iodone: one of our i/o's has completed and needs post-i/o cleanup
! 1493: *
! 1494: * => note that we can recover the vndbuf struct by casting the buf ptr
! 1495: */
! 1496: static void
! 1497: sw_reg_iodone(bp)
! 1498: struct buf *bp;
! 1499: {
! 1500: struct vndbuf *vbp = (struct vndbuf *) bp;
! 1501: struct vndxfer *vnx = vbp->vb_xfer;
! 1502: struct buf *pbp = vnx->vx_bp; /* parent buffer */
! 1503: struct swapdev *sdp = vnx->vx_sdp;
! 1504: int resid;
! 1505: UVMHIST_FUNC("sw_reg_iodone"); UVMHIST_CALLED(pdhist);
! 1506:
! 1507: UVMHIST_LOG(pdhist, " vbp=%p vp=%p blkno=0x%lx addr=%p",
! 1508: vbp, vbp->vb_buf.b_vp, vbp->vb_buf.b_blkno, vbp->vb_buf.b_data);
! 1509: UVMHIST_LOG(pdhist, " cnt=%lx resid=%lx",
! 1510: vbp->vb_buf.b_bcount, vbp->vb_buf.b_resid, 0, 0);
! 1511:
! 1512: splassert(IPL_BIO);
! 1513:
! 1514: resid = vbp->vb_buf.b_bcount - vbp->vb_buf.b_resid;
! 1515: pbp->b_resid -= resid;
! 1516: vnx->vx_pending--;
! 1517:
! 1518: if (vbp->vb_buf.b_error) {
! 1519: UVMHIST_LOG(pdhist, " got error=%ld !",
! 1520: vbp->vb_buf.b_error, 0, 0, 0);
! 1521:
! 1522: /* pass error upward */
! 1523: vnx->vx_error = vbp->vb_buf.b_error;
! 1524: }
! 1525:
! 1526: /*
! 1527: * disassociate this buffer from the vnode (if any).
! 1528: */
! 1529: if (vbp->vb_buf.b_vp != NULL) {
! 1530: brelvp(&vbp->vb_buf);
! 1531: }
! 1532:
! 1533: /*
! 1534: * kill vbp structure
! 1535: */
! 1536: putvndbuf(vbp);
! 1537:
! 1538: /*
! 1539: * wrap up this transaction if it has run to completion or, in
! 1540: * case of an error, when all auxiliary buffers have returned.
! 1541: */
! 1542: if (vnx->vx_error != 0) {
! 1543: /* pass error upward */
! 1544: pbp->b_flags |= B_ERROR;
! 1545: pbp->b_error = vnx->vx_error;
! 1546: if ((vnx->vx_flags & VX_BUSY) == 0 && vnx->vx_pending == 0) {
! 1547: putvndxfer(vnx);
! 1548: biodone(pbp);
! 1549: }
! 1550: } else if (pbp->b_resid == 0) {
! 1551: KASSERT(vnx->vx_pending == 0);
! 1552: if ((vnx->vx_flags & VX_BUSY) == 0) {
! 1553: UVMHIST_LOG(pdhist, " iodone error=%ld !",
! 1554: pbp, vnx->vx_error, 0, 0);
! 1555: putvndxfer(vnx);
! 1556: biodone(pbp);
! 1557: }
! 1558: }
! 1559:
! 1560: /*
! 1561: * done! start next swapdev I/O if one is pending
! 1562: */
! 1563: sdp->swd_tab.b_active--;
! 1564: sw_reg_start(sdp);
! 1565: }
! 1566:
! 1567:
! 1568: /*
! 1569: * uvm_swap_alloc: allocate space on swap
! 1570: *
! 1571: * => allocation is done "round robin" down the priority list, as we
! 1572: * allocate in a priority we "rotate" the circle queue.
! 1573: * => space can be freed with uvm_swap_free
! 1574: * => we return the page slot number in /dev/drum (0 == invalid slot)
! 1575: * => we lock uvm.swap_data_lock
! 1576: * => XXXMRG: "LESSOK" INTERFACE NEEDED TO EXTENT SYSTEM
! 1577: */
! 1578: int
! 1579: uvm_swap_alloc(nslots, lessok)
! 1580: int *nslots; /* IN/OUT */
! 1581: boolean_t lessok;
! 1582: {
! 1583: struct swapdev *sdp;
! 1584: struct swappri *spp;
! 1585: u_long result;
! 1586: UVMHIST_FUNC("uvm_swap_alloc"); UVMHIST_CALLED(pdhist);
! 1587:
! 1588: /*
! 1589: * no swap devices configured yet? definite failure.
! 1590: */
! 1591: if (uvmexp.nswapdev < 1)
! 1592: return 0;
! 1593:
! 1594: /*
! 1595: * lock data lock, convert slots into blocks, and enter loop
! 1596: */
! 1597: simple_lock(&uvm.swap_data_lock);
! 1598:
! 1599: ReTry: /* XXXMRG */
! 1600: for (spp = LIST_FIRST(&swap_priority); spp != NULL;
! 1601: spp = LIST_NEXT(spp, spi_swappri)) {
! 1602: for (sdp = CIRCLEQ_FIRST(&spp->spi_swapdev);
! 1603: sdp != (void *)&spp->spi_swapdev;
! 1604: sdp = CIRCLEQ_NEXT(sdp,swd_next)) {
! 1605: /* if it's not enabled, then we can't swap from it */
! 1606: if ((sdp->swd_flags & SWF_ENABLE) == 0)
! 1607: continue;
! 1608: if (sdp->swd_npginuse + *nslots > sdp->swd_npages)
! 1609: continue;
! 1610: if (extent_alloc(sdp->swd_ex, *nslots, EX_NOALIGN, 0,
! 1611: EX_NOBOUNDARY, EX_MALLOCOK|EX_NOWAIT,
! 1612: &result) != 0) {
! 1613: continue;
! 1614: }
! 1615:
! 1616: /*
! 1617: * successful allocation! now rotate the circleq.
! 1618: */
! 1619: CIRCLEQ_REMOVE(&spp->spi_swapdev, sdp, swd_next);
! 1620: CIRCLEQ_INSERT_TAIL(&spp->spi_swapdev, sdp, swd_next);
! 1621: sdp->swd_npginuse += *nslots;
! 1622: uvmexp.swpginuse += *nslots;
! 1623: simple_unlock(&uvm.swap_data_lock);
! 1624: /* done! return drum slot number */
! 1625: UVMHIST_LOG(pdhist,
! 1626: "success! returning %ld slots starting at %ld",
! 1627: *nslots, result + sdp->swd_drumoffset, 0, 0);
! 1628: return(result + sdp->swd_drumoffset);
! 1629: }
! 1630: }
! 1631:
! 1632: /* XXXMRG: BEGIN HACK */
! 1633: if (*nslots > 1 && lessok) {
! 1634: *nslots = 1;
! 1635: goto ReTry; /* XXXMRG: ugh! extent should support this for us */
! 1636: }
! 1637: /* XXXMRG: END HACK */
! 1638:
! 1639: simple_unlock(&uvm.swap_data_lock);
! 1640: return 0; /* failed */
! 1641: }
! 1642:
! 1643: /*
! 1644: * uvm_swap_markbad: keep track of swap ranges where we've had i/o errors
! 1645: *
! 1646: * => we lock uvm.swap_data_lock
! 1647: */
! 1648: void
! 1649: uvm_swap_markbad(startslot, nslots)
! 1650: int startslot;
! 1651: int nslots;
! 1652: {
! 1653: struct swapdev *sdp;
! 1654: UVMHIST_FUNC("uvm_swap_markbad"); UVMHIST_CALLED(pdhist);
! 1655:
! 1656: simple_lock(&uvm.swap_data_lock);
! 1657: sdp = swapdrum_getsdp(startslot);
! 1658: if (sdp != NULL) {
! 1659: /*
! 1660: * we just keep track of how many pages have been marked bad
! 1661: * in this device, to make everything add up in swap_off().
! 1662: * we assume here that the range of slots will all be within
! 1663: * one swap device.
! 1664: */
! 1665: sdp->swd_npgbad += nslots;
! 1666: UVMHIST_LOG(pdhist, "now %ld bad", sdp->swd_npgbad, 0,0,0);
! 1667: }
! 1668: simple_unlock(&uvm.swap_data_lock);
! 1669: }
! 1670:
! 1671: /*
! 1672: * uvm_swap_free: free swap slots
! 1673: *
! 1674: * => this can be all or part of an allocation made by uvm_swap_alloc
! 1675: * => we lock uvm.swap_data_lock
! 1676: */
! 1677: void
! 1678: uvm_swap_free(startslot, nslots)
! 1679: int startslot;
! 1680: int nslots;
! 1681: {
! 1682: struct swapdev *sdp;
! 1683: UVMHIST_FUNC("uvm_swap_free"); UVMHIST_CALLED(pdhist);
! 1684:
! 1685: UVMHIST_LOG(pdhist, "freeing %ld slots starting at %ld", nslots,
! 1686: startslot, 0, 0);
! 1687:
! 1688: /*
! 1689: * ignore attempts to free the "bad" slot.
! 1690: */
! 1691:
! 1692: if (startslot == SWSLOT_BAD) {
! 1693: return;
! 1694: }
! 1695:
! 1696: /*
! 1697: * convert drum slot offset back to sdp, free the blocks
! 1698: * in the extent, and return. must hold pri lock to do
! 1699: * lookup and access the extent.
! 1700: */
! 1701:
! 1702: simple_lock(&uvm.swap_data_lock);
! 1703: sdp = swapdrum_getsdp(startslot);
! 1704: KASSERT(uvmexp.nswapdev >= 1);
! 1705: KASSERT(sdp != NULL);
! 1706: KASSERT(sdp->swd_npginuse >= nslots);
! 1707: if (extent_free(sdp->swd_ex, startslot - sdp->swd_drumoffset, nslots,
! 1708: EX_MALLOCOK|EX_NOWAIT) != 0) {
! 1709: printf("warning: resource shortage: %d pages of swap lost\n",
! 1710: nslots);
! 1711: }
! 1712:
! 1713: sdp->swd_npginuse -= nslots;
! 1714: uvmexp.swpginuse -= nslots;
! 1715: #ifdef UVM_SWAP_ENCRYPT
! 1716: {
! 1717: int i;
! 1718: if (swap_encrypt_initialized) {
! 1719: /* Dereference keys */
! 1720: for (i = 0; i < nslots; i++)
! 1721: if (uvm_swap_needdecrypt(sdp, startslot + i))
! 1722: SWAP_KEY_PUT(sdp, SWD_KEY(sdp, startslot + i));
! 1723:
! 1724: /* Mark range as not decrypt */
! 1725: uvm_swap_markdecrypt(sdp, startslot, nslots, 0);
! 1726: }
! 1727: }
! 1728: #endif /* UVM_SWAP_ENCRYPT */
! 1729: simple_unlock(&uvm.swap_data_lock);
! 1730: }
! 1731:
! 1732: /*
! 1733: * uvm_swap_put: put any number of pages into a contig place on swap
! 1734: *
! 1735: * => can be sync or async
! 1736: * => XXXMRG: consider making it an inline or macro
! 1737: */
! 1738: int
! 1739: uvm_swap_put(swslot, ppsp, npages, flags)
! 1740: int swslot;
! 1741: struct vm_page **ppsp;
! 1742: int npages;
! 1743: int flags;
! 1744: {
! 1745: int result;
! 1746:
! 1747: result = uvm_swap_io(ppsp, swslot, npages, B_WRITE |
! 1748: ((flags & PGO_SYNCIO) ? 0 : B_ASYNC));
! 1749:
! 1750: return (result);
! 1751: }
! 1752:
! 1753: /*
! 1754: * uvm_swap_get: get a single page from swap
! 1755: *
! 1756: * => usually a sync op (from fault)
! 1757: * => XXXMRG: consider making it an inline or macro
! 1758: */
! 1759: int
! 1760: uvm_swap_get(page, swslot, flags)
! 1761: struct vm_page *page;
! 1762: int swslot, flags;
! 1763: {
! 1764: int result;
! 1765:
! 1766: uvmexp.nswget++;
! 1767: KASSERT(flags & PGO_SYNCIO);
! 1768: if (swslot == SWSLOT_BAD) {
! 1769: return VM_PAGER_ERROR;
! 1770: }
! 1771:
! 1772: /*
! 1773: * this page is (about to be) no longer only in swap.
! 1774: */
! 1775: simple_lock(&uvm.swap_data_lock);
! 1776: uvmexp.swpgonly--;
! 1777: simple_unlock(&uvm.swap_data_lock);
! 1778:
! 1779: result = uvm_swap_io(&page, swslot, 1, B_READ |
! 1780: ((flags & PGO_SYNCIO) ? 0 : B_ASYNC));
! 1781:
! 1782: if (result != VM_PAGER_OK && result != VM_PAGER_PEND) {
! 1783: /*
! 1784: * oops, the read failed so it really is still only in swap.
! 1785: */
! 1786: simple_lock(&uvm.swap_data_lock);
! 1787: uvmexp.swpgonly++;
! 1788: simple_unlock(&uvm.swap_data_lock);
! 1789: }
! 1790:
! 1791: return (result);
! 1792: }
! 1793:
! 1794: /*
! 1795: * uvm_swap_io: do an i/o operation to swap
! 1796: */
! 1797:
! 1798: static int
! 1799: uvm_swap_io(pps, startslot, npages, flags)
! 1800: struct vm_page **pps;
! 1801: int startslot, npages, flags;
! 1802: {
! 1803: daddr64_t startblk;
! 1804: struct buf *bp;
! 1805: vaddr_t kva;
! 1806: int result, s, mapinflags, pflag;
! 1807: boolean_t write, async;
! 1808: #ifdef UVM_SWAP_ENCRYPT
! 1809: vaddr_t dstkva;
! 1810: struct vm_page *tpps[MAXBSIZE >> PAGE_SHIFT];
! 1811: struct swapdev *sdp;
! 1812: int encrypt = 0;
! 1813: #endif
! 1814: UVMHIST_FUNC("uvm_swap_io"); UVMHIST_CALLED(pdhist);
! 1815:
! 1816: UVMHIST_LOG(pdhist, "<- called, startslot=%ld, npages=%ld, flags=%ld",
! 1817: startslot, npages, flags, 0);
! 1818:
! 1819: write = (flags & B_READ) == 0;
! 1820: async = (flags & B_ASYNC) != 0;
! 1821:
! 1822: /*
! 1823: * convert starting drum slot to block number
! 1824: */
! 1825: startblk = btodb((u_int64_t)startslot << PAGE_SHIFT);
! 1826:
! 1827: /*
! 1828: * first, map the pages into the kernel (XXX: currently required
! 1829: * by buffer system).
! 1830: */
! 1831: mapinflags = !write ? UVMPAGER_MAPIN_READ : UVMPAGER_MAPIN_WRITE;
! 1832: if (!async)
! 1833: mapinflags |= UVMPAGER_MAPIN_WAITOK;
! 1834: kva = uvm_pagermapin(pps, npages, mapinflags);
! 1835: if (kva == 0)
! 1836: return (VM_PAGER_AGAIN);
! 1837:
! 1838: #ifdef UVM_SWAP_ENCRYPT
! 1839: if (write) {
! 1840: /*
! 1841: * Check if we need to do swap encryption on old pages.
! 1842: * Later we need a different scheme, that swap encrypts
! 1843: * all pages of a process that had at least one page swap
! 1844: * encrypted. Then we might not need to copy all pages
! 1845: * in the cluster, and avoid the memory overheard in
! 1846: * swapping.
! 1847: */
! 1848: if (uvm_doswapencrypt)
! 1849: encrypt = 1;
! 1850: }
! 1851:
! 1852: if (swap_encrypt_initialized || encrypt) {
! 1853: /*
! 1854: * we need to know the swap device that we are swapping to/from
! 1855: * to see if the pages need to be marked for decryption or
! 1856: * actually need to be decrypted.
! 1857: * XXX - does this information stay the same over the whole
! 1858: * execution of this function?
! 1859: */
! 1860: simple_lock(&uvm.swap_data_lock);
! 1861: sdp = swapdrum_getsdp(startslot);
! 1862: simple_unlock(&uvm.swap_data_lock);
! 1863: }
! 1864:
! 1865: /*
! 1866: * encrypt to swap
! 1867: */
! 1868: if (write && encrypt) {
! 1869: int i, opages;
! 1870: caddr_t src, dst;
! 1871: struct swap_key *key;
! 1872: u_int64_t block;
! 1873: int swmapflags;
! 1874:
! 1875: /* We always need write access. */
! 1876: swmapflags = UVMPAGER_MAPIN_READ;
! 1877: if (!async)
! 1878: swmapflags |= UVMPAGER_MAPIN_WAITOK;
! 1879:
! 1880: if (!uvm_swap_allocpages(tpps, npages)) {
! 1881: uvm_pagermapout(kva, npages);
! 1882: return (VM_PAGER_AGAIN);
! 1883: }
! 1884:
! 1885: dstkva = uvm_pagermapin(tpps, npages, swmapflags);
! 1886: if (dstkva == 0) {
! 1887: uvm_pagermapout(kva, npages);
! 1888: uvm_swap_freepages(tpps, npages);
! 1889: return (VM_PAGER_AGAIN);
! 1890: }
! 1891:
! 1892: src = (caddr_t) kva;
! 1893: dst = (caddr_t) dstkva;
! 1894: block = startblk;
! 1895: for (i = 0; i < npages; i++) {
! 1896: key = SWD_KEY(sdp, startslot + i);
! 1897: SWAP_KEY_GET(sdp, key); /* add reference */
! 1898:
! 1899: /* mark for async writes */
! 1900: atomic_setbits_int(&tpps[i]->pg_flags, PQ_ENCRYPT);
! 1901: swap_encrypt(key, src, dst, block, 1 << PAGE_SHIFT);
! 1902: src += 1 << PAGE_SHIFT;
! 1903: dst += 1 << PAGE_SHIFT;
! 1904: block += btodb(1 << PAGE_SHIFT);
! 1905: }
! 1906:
! 1907: uvm_pagermapout(kva, npages);
! 1908:
! 1909: /* dispose of pages we dont use anymore */
! 1910: opages = npages;
! 1911: uvm_pager_dropcluster(NULL, NULL, pps, &opages,
! 1912: PGO_PDFREECLUST);
! 1913:
! 1914: kva = dstkva;
! 1915: }
! 1916: #endif /* UVM_SWAP_ENCRYPT */
! 1917:
! 1918: /*
! 1919: * now allocate a buf for the i/o.
! 1920: * [make sure we don't put the pagedaemon to sleep...]
! 1921: */
! 1922: s = splbio();
! 1923: pflag = (async || curproc == uvm.pagedaemon_proc) ? 0 : PR_WAITOK;
! 1924: bp = pool_get(&bufpool, pflag);
! 1925: splx(s);
! 1926:
! 1927: /*
! 1928: * if we failed to get a swapbuf, return "try again"
! 1929: */
! 1930: if (bp == NULL) {
! 1931: #ifdef UVM_SWAP_ENCRYPT
! 1932: if (write && encrypt) {
! 1933: int i;
! 1934:
! 1935: /* swap encrypt needs cleanup */
! 1936: for (i = 0; i < npages; i++)
! 1937: SWAP_KEY_PUT(sdp, SWD_KEY(sdp, startslot + i));
! 1938:
! 1939: uvm_pagermapout(kva, npages);
! 1940: uvm_swap_freepages(tpps, npages);
! 1941: }
! 1942: #endif
! 1943: return (VM_PAGER_AGAIN);
! 1944: }
! 1945:
! 1946: #ifdef UVM_SWAP_ENCRYPT
! 1947: /*
! 1948: * prevent ASYNC reads.
! 1949: * uvm_swap_io is only called from uvm_swap_get, uvm_swap_get
! 1950: * assumes that all gets are SYNCIO. Just make sure here.
! 1951: * XXXARTUBC - might not be true anymore.
! 1952: */
! 1953: if (!write) {
! 1954: flags &= ~B_ASYNC;
! 1955: async = 0;
! 1956: }
! 1957: #endif
! 1958: /*
! 1959: * fill in the bp. we currently route our i/o through
! 1960: * /dev/drum's vnode [swapdev_vp].
! 1961: */
! 1962: bp->b_flags = B_BUSY | B_NOCACHE | (flags & (B_READ|B_ASYNC));
! 1963: bp->b_proc = &proc0; /* XXX */
! 1964: bp->b_vnbufs.le_next = NOLIST;
! 1965: bp->b_data = (caddr_t)kva;
! 1966: bp->b_blkno = startblk;
! 1967: LIST_INIT(&bp->b_dep);
! 1968: s = splbio();
! 1969: bp->b_vp = NULL;
! 1970: buf_replacevnode(bp, swapdev_vp);
! 1971: splx(s);
! 1972: bp->b_bufsize = bp->b_bcount = npages << PAGE_SHIFT;
! 1973:
! 1974: /*
! 1975: * for pageouts we must set "dirtyoff" [NFS client code needs it].
! 1976: * and we bump v_numoutput (counter of number of active outputs).
! 1977: */
! 1978: if (write) {
! 1979: bp->b_dirtyoff = 0;
! 1980: bp->b_dirtyend = npages << PAGE_SHIFT;
! 1981: #ifdef UVM_SWAP_ENCRYPT
! 1982: /* mark the pages in the drum for decryption */
! 1983: if (swap_encrypt_initialized)
! 1984: uvm_swap_markdecrypt(sdp, startslot, npages, encrypt);
! 1985: #endif
! 1986: s = splbio();
! 1987: swapdev_vp->v_numoutput++;
! 1988: splx(s);
! 1989: }
! 1990:
! 1991: /*
! 1992: * for async ops we must set up the iodone handler.
! 1993: */
! 1994: if (async) {
! 1995: bp->b_flags |= B_CALL | (curproc == uvm.pagedaemon_proc ?
! 1996: B_PDAEMON : 0);
! 1997: bp->b_iodone = uvm_aio_biodone;
! 1998: UVMHIST_LOG(pdhist, "doing async!", 0, 0, 0, 0);
! 1999: }
! 2000: UVMHIST_LOG(pdhist,
! 2001: "about to start io: data = %p blkno = 0x%lx, bcount = %ld",
! 2002: bp->b_data, bp->b_blkno, bp->b_bcount, 0);
! 2003:
! 2004: /*
! 2005: * now we start the I/O, and if async, return.
! 2006: */
! 2007: VOP_STRATEGY(bp);
! 2008: if (async)
! 2009: return (VM_PAGER_PEND);
! 2010:
! 2011: /*
! 2012: * must be sync i/o. wait for it to finish
! 2013: */
! 2014: (void) biowait(bp);
! 2015: result = (bp->b_flags & B_ERROR) ? VM_PAGER_ERROR : VM_PAGER_OK;
! 2016:
! 2017: #ifdef UVM_SWAP_ENCRYPT
! 2018: /*
! 2019: * decrypt swap
! 2020: */
! 2021: if (swap_encrypt_initialized &&
! 2022: (bp->b_flags & B_READ) && !(bp->b_flags & B_ERROR)) {
! 2023: int i;
! 2024: caddr_t data = bp->b_data;
! 2025: u_int64_t block = startblk;
! 2026: struct swap_key *key = NULL;
! 2027:
! 2028: for (i = 0; i < npages; i++) {
! 2029: /* Check if we need to decrypt */
! 2030: if (uvm_swap_needdecrypt(sdp, startslot + i)) {
! 2031: key = SWD_KEY(sdp, startslot + i);
! 2032: swap_decrypt(key, data, data, block,
! 2033: 1 << PAGE_SHIFT);
! 2034: }
! 2035: data += 1 << PAGE_SHIFT;
! 2036: block += btodb(1 << PAGE_SHIFT);
! 2037: }
! 2038: }
! 2039: #endif
! 2040: /*
! 2041: * kill the pager mapping
! 2042: */
! 2043: uvm_pagermapout(kva, npages);
! 2044:
! 2045: #ifdef UVM_SWAP_ENCRYPT
! 2046: /*
! 2047: * Not anymore needed, free after encryption
! 2048: */
! 2049: if ((bp->b_flags & B_READ) == 0 && encrypt)
! 2050: uvm_swap_freepages(tpps, npages);
! 2051: #endif
! 2052: /*
! 2053: * now dispose of the buf
! 2054: */
! 2055: s = splbio();
! 2056: if (bp->b_vp)
! 2057: brelvp(bp);
! 2058:
! 2059: if (write && bp->b_vp)
! 2060: vwakeup(bp->b_vp);
! 2061: pool_put(&bufpool, bp);
! 2062: splx(s);
! 2063:
! 2064: /*
! 2065: * finally return.
! 2066: */
! 2067: UVMHIST_LOG(pdhist, "<- done (sync) result=%ld", result, 0, 0, 0);
! 2068: return (result);
! 2069: }
! 2070:
! 2071: static void
! 2072: swapmount()
! 2073: {
! 2074: struct swapdev *sdp;
! 2075: struct swappri *spp;
! 2076: struct vnode *vp;
! 2077: dev_t swap_dev = swdevt[0].sw_dev;
! 2078:
! 2079: /*
! 2080: * No locking here since we happen to know that we will just be called
! 2081: * once before any other process has forked.
! 2082: */
! 2083:
! 2084: if (swap_dev == NODEV) {
! 2085: printf("swapmount: no device\n");
! 2086: return;
! 2087: }
! 2088:
! 2089: if (bdevvp(swap_dev, &vp)) {
! 2090: printf("swapmount: no device 2\n");
! 2091: return;
! 2092: }
! 2093:
! 2094: sdp = malloc(sizeof(*sdp), M_VMSWAP, M_WAITOK);
! 2095: spp = malloc(sizeof(*spp), M_VMSWAP, M_WAITOK);
! 2096: memset(sdp, 0, sizeof(*sdp));
! 2097:
! 2098: sdp->swd_flags = SWF_FAKE;
! 2099: sdp->swd_dev = swap_dev;
! 2100: sdp->swd_vp = vp;
! 2101: swaplist_insert(sdp, spp, 0);
! 2102: sdp->swd_pathlen = strlen("swap_device") + 1;
! 2103: sdp->swd_path = malloc(sdp->swd_pathlen, M_VMSWAP, M_WAITOK);
! 2104: if (copystr("swap_device", sdp->swd_path, sdp->swd_pathlen, 0))
! 2105: panic("swapmount: copystr");
! 2106:
! 2107: if (swap_on(curproc, sdp)) {
! 2108: swaplist_find(vp, 1);
! 2109: swaplist_trim();
! 2110: vput(sdp->swd_vp);
! 2111: free(sdp->swd_path, M_VMSWAP);
! 2112: free(sdp, M_VMSWAP);
! 2113: return;
! 2114: }
! 2115:
! 2116: VOP_UNLOCK(vp, 0, curproc);
! 2117: }
CVSweb