Annotation of sys/dev/vnd.c, Revision 1.1.1.1
1.1 nbrk 1: /* $OpenBSD: vnd.c,v 1.79 2007/06/20 18:15:46 deraadt Exp $ */
2: /* $NetBSD: vnd.c,v 1.26 1996/03/30 23:06:11 christos Exp $ */
3:
4: /*
5: * Copyright (c) 1988 University of Utah.
6: * Copyright (c) 1990, 1993
7: * The Regents of the University of California. All rights reserved.
8: *
9: * This code is derived from software contributed to Berkeley by
10: * the Systems Programming Group of the University of Utah Computer
11: * Science Department.
12: *
13: * Redistribution and use in source and binary forms, with or without
14: * modification, are permitted provided that the following conditions
15: * are met:
16: * 1. Redistributions of source code must retain the above copyright
17: * notice, this list of conditions and the following disclaimer.
18: * 2. Redistributions in binary form must reproduce the above copyright
19: * notice, this list of conditions and the following disclaimer in the
20: * documentation and/or other materials provided with the distribution.
21: * 3. Neither the name of the University nor the names of its contributors
22: * may be used to endorse or promote products derived from this software
23: * without specific prior written permission.
24: *
25: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35: * SUCH DAMAGE.
36: *
37: * from: Utah $Hdr: vn.c 1.13 94/04/02$
38: *
39: * @(#)vn.c 8.6 (Berkeley) 4/1/94
40: */
41:
42: /*
43: * Vnode disk driver.
44: *
45: * Block/character interface to a vnode. Allows one to treat a file
46: * as a disk (e.g. build a filesystem in it, mount it, etc.).
47: *
48: * NOTE 1: This uses either the VOP_BMAP/VOP_STRATEGY interface to the
49: * vnode or simple VOP_READ/VOP_WRITE. The former is suitable for swapping
50: * as it doesn't distort the local buffer cache. The latter is good for
51: * building disk images as it keeps the cache consistent after the block
52: * device is closed.
53: *
54: * NOTE 2: There is a security issue involved with this driver.
55: * Once mounted all access to the contents of the "mapped" file via
56: * the special file is controlled by the permissions on the special
57: * file, the protection of the mapped file is ignored (effectively,
58: * by using root credentials in all transactions).
59: *
60: * NOTE 3: Doesn't interact with leases, should it?
61: */
62:
63: #include <sys/param.h>
64: #include <sys/systm.h>
65: #include <sys/namei.h>
66: #include <sys/proc.h>
67: #include <sys/errno.h>
68: #include <sys/buf.h>
69: #include <sys/malloc.h>
70: #include <sys/pool.h>
71: #include <sys/ioctl.h>
72: #include <sys/disklabel.h>
73: #include <sys/device.h>
74: #include <sys/disk.h>
75: #include <sys/stat.h>
76: #include <sys/mount.h>
77: #include <sys/vnode.h>
78: #include <sys/file.h>
79: #include <sys/rwlock.h>
80: #include <sys/uio.h>
81: #include <sys/conf.h>
82:
83: #include <crypto/blf.h>
84:
85: #include <miscfs/specfs/specdev.h>
86:
87: #include <dev/vndioctl.h>
88:
89: #ifdef VNDDEBUG
90: int dovndcluster = 1;
91: int vnddebug = 0x00;
92: #define VDB_FOLLOW 0x01
93: #define VDB_INIT 0x02
94: #define VDB_IO 0x04
95: #define DNPRINTF(f, p...) do { if ((f) & vnddebug) printf(p); } while (0)
96: #else
97: #define DNPRINTF(f, p...) /* nothing */
98: #endif /* VNDDEBUG */
99:
100: /*
101: * vndunit is a bit weird. have to reconstitute the dev_t for
102: * DISKUNIT(), but with the minor masked off.
103: */
104: #define vndunit(x) DISKUNIT(makedev(major(x), minor(x) & 0x7ff))
105: #define vndsimple(x) (minor(x) & 0x800)
106:
107: /* same as MAKEDISKDEV, preserving the vndsimple() property */
108: #define VNDLABELDEV(dev) \
109: makedev(major(dev), DISKMINOR(vndunit(dev), RAW_PART) | \
110: (vndsimple(dev) ? 0x800 : 0))
111:
112: struct vndbuf {
113: struct buf vb_buf;
114: struct buf *vb_obp;
115: };
116:
117: /*
118: * struct vndbuf allocator
119: */
120: struct pool vndbufpl;
121:
122: #define getvndbuf() pool_get(&vndbufpl, PR_WAITOK)
123: #define putvndbuf(vbp) pool_put(&vndbufpl, vbp);
124:
125: struct vnd_softc {
126: struct device sc_dev;
127: struct disk sc_dk;
128:
129: char sc_file[VNDNLEN]; /* file we're covering */
130: int sc_flags; /* flags */
131: size_t sc_size; /* size of vnd in blocks */
132: struct vnode *sc_vp; /* vnode */
133: struct ucred *sc_cred; /* credentials */
134: struct buf sc_tab; /* transfer queue */
135: blf_ctx *sc_keyctx; /* key context */
136: struct rwlock sc_rwlock;
137: };
138:
139: /* sc_flags */
140: #define VNF_ALIVE 0x0001
141: #define VNF_INITED 0x0002
142: #define VNF_LABELLING 0x0100
143: #define VNF_WLABEL 0x0200
144: #define VNF_HAVELABEL 0x0400
145: #define VNF_SIMPLE 0x1000
146: #define VNF_READONLY 0x2000
147:
148: #define VNDRW(v) ((v)->sc_flags & VNF_READONLY ? FREAD : FREAD|FWRITE)
149:
150: struct vnd_softc *vnd_softc;
151: int numvnd = 0;
152:
153: struct dkdriver vnddkdriver = { vndstrategy };
154:
155: /* called by main() at boot time */
156: void vndattach(int);
157:
158: void vndclear(struct vnd_softc *);
159: void vndstart(struct vnd_softc *);
160: int vndsetcred(struct vnd_softc *, struct ucred *);
161: void vndiodone(struct buf *);
162: void vndshutdown(void);
163: void vndgetdisklabel(dev_t, struct vnd_softc *);
164: void vndencrypt(struct vnd_softc *, caddr_t, size_t, daddr64_t, int);
165:
166: #define vndlock(sc) rw_enter(&sc->sc_rwlock, RW_WRITE|RW_INTR)
167: #define vndunlock(sc) rw_exit_write(&sc->sc_rwlock)
168:
169: void
170: vndencrypt(struct vnd_softc *vnd, caddr_t addr, size_t size, daddr64_t off,
171: int encrypt)
172: {
173: int i, bsize;
174: u_char iv[8];
175:
176: bsize = dbtob(1);
177: for (i = 0; i < size/bsize; i++) {
178: bzero(iv, sizeof(iv));
179: bcopy((u_char *)&off, iv, sizeof(off));
180: blf_ecb_encrypt(vnd->sc_keyctx, iv, sizeof(iv));
181: if (encrypt)
182: blf_cbc_encrypt(vnd->sc_keyctx, iv, addr, bsize);
183: else
184: blf_cbc_decrypt(vnd->sc_keyctx, iv, addr, bsize);
185:
186: addr += bsize;
187: off++;
188: }
189: }
190:
191: void
192: vndattach(int num)
193: {
194: char *mem;
195: u_long size;
196: int i;
197:
198: if (num <= 0)
199: return;
200: size = num * sizeof(struct vnd_softc);
201: mem = malloc(size, M_DEVBUF, M_NOWAIT);
202: if (mem == NULL) {
203: printf("WARNING: no memory for vnode disks\n");
204: return;
205: }
206: bzero(mem, size);
207: vnd_softc = (struct vnd_softc *)mem;
208: for (i = 0; i < num; i++) {
209: rw_init(&vnd_softc[i].sc_rwlock, "vndlock");
210: }
211: numvnd = num;
212:
213: pool_init(&vndbufpl, sizeof(struct vndbuf), 0, 0, 0, "vndbufpl", NULL);
214: pool_setlowat(&vndbufpl, 16);
215: pool_sethiwat(&vndbufpl, 1024);
216: }
217:
218: int
219: vndopen(dev_t dev, int flags, int mode, struct proc *p)
220: {
221: int unit = vndunit(dev);
222: struct vnd_softc *sc;
223: int error = 0, part, pmask;
224:
225: DNPRINTF(VDB_FOLLOW, "vndopen(%x, %x, %x, %p)\n", dev, flags, mode, p);
226:
227: if (unit >= numvnd)
228: return (ENXIO);
229: sc = &vnd_softc[unit];
230:
231: if ((error = vndlock(sc)) != 0)
232: return (error);
233:
234: if ((flags & FWRITE) && (sc->sc_flags & VNF_READONLY)) {
235: error = EROFS;
236: goto bad;
237: }
238:
239: if ((sc->sc_flags & VNF_INITED) &&
240: (sc->sc_flags & VNF_HAVELABEL) == 0) {
241: sc->sc_flags |= VNF_HAVELABEL;
242: vndgetdisklabel(dev, sc);
243: }
244:
245: part = DISKPART(dev);
246: pmask = 1 << part;
247:
248: /*
249: * If any partition is open, all succeeding openings must be of the
250: * same type or read-only.
251: */
252: if (sc->sc_dk.dk_openmask) {
253: if (((sc->sc_flags & VNF_SIMPLE) != 0) !=
254: (vndsimple(dev) != 0) && (flags & FWRITE)) {
255: error = EBUSY;
256: goto bad;
257: }
258: } else if (vndsimple(dev))
259: sc->sc_flags |= VNF_SIMPLE;
260: else
261: sc->sc_flags &= ~VNF_SIMPLE;
262:
263: /* Check that the partition exists. */
264: if (part != RAW_PART &&
265: ((sc->sc_flags & VNF_HAVELABEL) == 0 ||
266: part >= sc->sc_dk.dk_label->d_npartitions ||
267: sc->sc_dk.dk_label->d_partitions[part].p_fstype == FS_UNUSED)) {
268: error = ENXIO;
269: goto bad;
270: }
271:
272: /* Prevent our unit from being unconfigured while open. */
273: switch (mode) {
274: case S_IFCHR:
275: sc->sc_dk.dk_copenmask |= pmask;
276: break;
277:
278: case S_IFBLK:
279: sc->sc_dk.dk_bopenmask |= pmask;
280: break;
281: }
282: sc->sc_dk.dk_openmask =
283: sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask;
284:
285: error = 0;
286: bad:
287: vndunlock(sc);
288: return (error);
289: }
290:
291: /*
292: * Load the label information on the named device
293: */
294: void
295: vndgetdisklabel(dev_t dev, struct vnd_softc *sc)
296: {
297: struct disklabel *lp = sc->sc_dk.dk_label;
298: char *errstring = NULL;
299:
300: bzero(lp, sizeof(struct disklabel));
301:
302: lp->d_secsize = 512;
303: lp->d_ntracks = 1;
304: lp->d_nsectors = 100;
305: lp->d_ncylinders = sc->sc_size / 100;
306: lp->d_secpercyl = 100; /* lp->d_ntracks * lp->d_nsectors */
307:
308: strncpy(lp->d_typename, "vnd device", sizeof(lp->d_typename));
309: lp->d_type = DTYPE_VND;
310: strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
311: DL_SETDSIZE(lp, sc->sc_size);
312: lp->d_rpm = 3600;
313: lp->d_interleave = 1;
314: lp->d_flags = 0;
315: lp->d_version = 1;
316:
317: lp->d_magic = DISKMAGIC;
318: lp->d_magic2 = DISKMAGIC;
319: lp->d_checksum = dkcksum(lp);
320:
321: /* Call the generic disklabel extraction routine */
322: errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, 0);
323: if (errstring) {
324: DNPRINTF(VDB_IO, "%s: %s\n", sc->sc_dev.dv_xname,
325: errstring);
326: return;
327: }
328: }
329:
330: int
331: vndclose(dev_t dev, int flags, int mode, struct proc *p)
332: {
333: int unit = vndunit(dev);
334: struct vnd_softc *sc;
335: int error = 0, part;
336:
337: DNPRINTF(VDB_FOLLOW, "vndclose(%x, %x, %x, %p)\n", dev, flags, mode, p);
338:
339: if (unit >= numvnd)
340: return (ENXIO);
341: sc = &vnd_softc[unit];
342:
343: if ((error = vndlock(sc)) != 0)
344: return (error);
345:
346: part = DISKPART(dev);
347:
348: /* ...that much closer to allowing unconfiguration... */
349: switch (mode) {
350: case S_IFCHR:
351: sc->sc_dk.dk_copenmask &= ~(1 << part);
352: break;
353:
354: case S_IFBLK:
355: sc->sc_dk.dk_bopenmask &= ~(1 << part);
356: break;
357: }
358: sc->sc_dk.dk_openmask =
359: sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask;
360:
361: vndunlock(sc);
362: return (0);
363: }
364:
365: /*
366: * Two methods are used, the traditional buffercache bypassing and the
367: * newer, cache-coherent on unmount, one.
368: *
369: * Former method:
370: * Break the request into bsize pieces and submit using VOP_BMAP/VOP_STRATEGY.
371: * Note that this driver can only be used for swapping over NFS on the hp
372: * since nfs_strategy on the vax cannot handle u-areas and page tables.
373: *
374: * Latter method:
375: * Repack the buffer into an uio structure and use VOP_READ/VOP_WRITE to
376: * access the underlying file.
377: */
378: void
379: vndstrategy(struct buf *bp)
380: {
381: int unit = vndunit(bp->b_dev);
382: struct vnd_softc *vnd = &vnd_softc[unit];
383: struct vndbuf *nbp;
384: int bsize;
385: off_t bn;
386: caddr_t addr;
387: size_t resid;
388: int sz, flags, error, s;
389: struct iovec aiov;
390: struct uio auio;
391: struct proc *p = curproc;
392:
393: DNPRINTF(VDB_FOLLOW, "vndstrategy(%p): unit %d\n", bp, unit);
394:
395: if ((vnd->sc_flags & VNF_INITED) == 0) {
396: bp->b_error = ENXIO;
397: bp->b_flags |= B_ERROR;
398: s = splbio();
399: biodone(bp);
400: splx(s);
401: return;
402: }
403:
404: bn = bp->b_blkno;
405: bp->b_resid = bp->b_bcount;
406:
407: if (bn < 0) {
408: bp->b_error = EINVAL;
409: bp->b_flags |= B_ERROR;
410: s = splbio();
411: biodone(bp);
412: splx(s);
413: return;
414: }
415:
416: /* If we have a label, do a boundary check. */
417: if (vnd->sc_flags & VNF_HAVELABEL) {
418: if (bounds_check_with_label(bp, vnd->sc_dk.dk_label, 1) <= 0) {
419: s = splbio();
420: biodone(bp);
421: splx(s);
422: return;
423: }
424:
425: /*
426: * bounds_check_with_label() changes bp->b_resid, reset it
427: */
428: bp->b_resid = bp->b_bcount;
429: }
430:
431: sz = howmany(bp->b_bcount, DEV_BSIZE);
432:
433: /* No bypassing of buffer cache? */
434: if (vndsimple(bp->b_dev)) {
435: /* Loop until all queued requests are handled. */
436: for (;;) {
437: int part = DISKPART(bp->b_dev);
438: daddr64_t off = DL_GETPOFFSET(&vnd->sc_dk.dk_label->d_partitions[part]);
439:
440: aiov.iov_base = bp->b_data;
441: auio.uio_resid = aiov.iov_len = bp->b_bcount;
442: auio.uio_iov = &aiov;
443: auio.uio_iovcnt = 1;
444: auio.uio_offset = dbtob((off_t)(bp->b_blkno + off));
445: auio.uio_segflg = UIO_SYSSPACE;
446: auio.uio_procp = p;
447:
448: vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY, p);
449: if (bp->b_flags & B_READ) {
450: auio.uio_rw = UIO_READ;
451: bp->b_error = VOP_READ(vnd->sc_vp, &auio, 0,
452: vnd->sc_cred);
453: if (vnd->sc_keyctx)
454: vndencrypt(vnd, bp->b_data,
455: bp->b_bcount, bp->b_blkno, 0);
456: } else {
457: if (vnd->sc_keyctx)
458: vndencrypt(vnd, bp->b_data,
459: bp->b_bcount, bp->b_blkno, 1);
460: auio.uio_rw = UIO_WRITE;
461: /*
462: * Upper layer has already checked I/O for
463: * limits, so there is no need to do it again.
464: */
465: bp->b_error = VOP_WRITE(vnd->sc_vp, &auio,
466: IO_NOLIMIT, vnd->sc_cred);
467: /* Data in buffer cache needs to be in clear */
468: if (vnd->sc_keyctx)
469: vndencrypt(vnd, bp->b_data,
470: bp->b_bcount, bp->b_blkno, 0);
471: }
472: VOP_UNLOCK(vnd->sc_vp, 0, p);
473: if (bp->b_error)
474: bp->b_flags |= B_ERROR;
475: bp->b_resid = auio.uio_resid;
476: s = splbio();
477: biodone(bp);
478: splx(s);
479:
480: /* If nothing more is queued, we are done. */
481: if (!vnd->sc_tab.b_active)
482: return;
483:
484: /*
485: * Dequeue now since lower level strategy
486: * routine might queue using same links.
487: */
488: s = splbio();
489: bp = vnd->sc_tab.b_actf;
490: vnd->sc_tab.b_actf = bp->b_actf;
491: vnd->sc_tab.b_active--;
492: splx(s);
493: }
494: }
495:
496: /* The old-style buffercache bypassing method. */
497: bn += DL_GETPOFFSET(&vnd->sc_dk.dk_label->d_partitions[DISKPART(bp->b_dev)]);
498: bn = dbtob(bn);
499: bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize;
500: addr = bp->b_data;
501: flags = bp->b_flags | B_CALL;
502: for (resid = bp->b_resid; resid; resid -= sz) {
503: struct vnode *vp;
504: daddr64_t nbn;
505: int off, s, nra;
506:
507: nra = 0;
508: vn_lock(vnd->sc_vp, LK_RETRY | LK_EXCLUSIVE, p);
509: error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra);
510: VOP_UNLOCK(vnd->sc_vp, 0, p);
511: if (error == 0 && (long)nbn == -1)
512: error = EIO;
513: #ifdef VNDDEBUG
514: if (!dovndcluster)
515: nra = 0;
516: #endif
517:
518: if ((off = bn % bsize) != 0)
519: sz = bsize - off;
520: else
521: sz = (1 + nra) * bsize;
522: if (resid < sz)
523: sz = resid;
524:
525: DNPRINTF(VDB_IO, "vndstrategy: vp %p/%p bn %x/%x sz %x\n",
526: vnd->sc_vp, vp, bn, nbn, sz);
527:
528: s = splbio();
529: nbp = getvndbuf();
530: splx(s);
531: nbp->vb_buf.b_flags = flags;
532: nbp->vb_buf.b_bcount = sz;
533: nbp->vb_buf.b_bufsize = bp->b_bufsize;
534: nbp->vb_buf.b_error = 0;
535: if (vp->v_type == VBLK || vp->v_type == VCHR)
536: nbp->vb_buf.b_dev = vp->v_rdev;
537: else
538: nbp->vb_buf.b_dev = NODEV;
539: nbp->vb_buf.b_data = addr;
540: nbp->vb_buf.b_blkno = nbn + btodb(off);
541: nbp->vb_buf.b_proc = bp->b_proc;
542: nbp->vb_buf.b_iodone = vndiodone;
543: nbp->vb_buf.b_vp = vp;
544: nbp->vb_buf.b_dirtyoff = bp->b_dirtyoff;
545: nbp->vb_buf.b_dirtyend = bp->b_dirtyend;
546: nbp->vb_buf.b_validoff = bp->b_validoff;
547: nbp->vb_buf.b_validend = bp->b_validend;
548: LIST_INIT(&nbp->vb_buf.b_dep);
549:
550: /* save a reference to the old buffer */
551: nbp->vb_obp = bp;
552:
553: /*
554: * If there was an error or a hole in the file...punt.
555: * Note that we deal with this after the nbp allocation.
556: * This ensures that we properly clean up any operations
557: * that we have already fired off.
558: *
559: * XXX we could deal with holes here but it would be
560: * a hassle (in the write case).
561: * We must still however charge for the write even if there
562: * was an error.
563: */
564: if (error) {
565: nbp->vb_buf.b_error = error;
566: nbp->vb_buf.b_flags |= B_ERROR;
567: bp->b_resid -= (resid - sz);
568: s = splbio();
569: /* charge for the write */
570: if ((nbp->vb_buf.b_flags & B_READ) == 0)
571: nbp->vb_buf.b_vp->v_numoutput++;
572: biodone(&nbp->vb_buf);
573: splx(s);
574: return;
575: }
576: /*
577: * Just sort by block number
578: */
579: nbp->vb_buf.b_cylinder = nbp->vb_buf.b_blkno;
580: s = splbio();
581: disksort(&vnd->sc_tab, &nbp->vb_buf);
582: vnd->sc_tab.b_active++;
583: vndstart(vnd);
584: splx(s);
585: bn += sz;
586: addr += sz;
587: }
588: }
589:
590: /*
591: * Feed requests sequentially.
592: * We do it this way to keep from flooding NFS servers if we are connected
593: * to an NFS file. This places the burden on the client rather than the
594: * server.
595: */
596: void
597: vndstart(struct vnd_softc *vnd)
598: {
599: struct buf *bp;
600:
601: /*
602: * Dequeue now since lower level strategy routine might
603: * queue using same links
604: */
605: bp = vnd->sc_tab.b_actf;
606: vnd->sc_tab.b_actf = bp->b_actf;
607:
608: DNPRINTF(VDB_IO,
609: "vndstart(%d): bp %p vp %p blkno %x addr %p cnt %lx\n",
610: vnd-vnd_softc, bp, bp->b_vp, bp->b_blkno, bp->b_data,
611: bp->b_bcount);
612:
613: /* Instrumentation. */
614: disk_busy(&vnd->sc_dk);
615:
616: if ((bp->b_flags & B_READ) == 0)
617: bp->b_vp->v_numoutput++;
618: VOP_STRATEGY(bp);
619: }
620:
621: void
622: vndiodone(struct buf *bp)
623: {
624: struct vndbuf *vbp = (struct vndbuf *) bp;
625: struct buf *pbp = vbp->vb_obp;
626: struct vnd_softc *vnd = &vnd_softc[vndunit(pbp->b_dev)];
627:
628: splassert(IPL_BIO);
629:
630: DNPRINTF(VDB_IO,
631: "vndiodone(%d): vbp %p vp %p blkno %x addr %p cnt %lx\n",
632: vnd-vnd_softc, vbp, vbp->vb_buf.b_vp, vbp->vb_buf.b_blkno,
633: vbp->vb_buf.b_data, vbp->vb_buf.b_bcount);
634:
635: if (vbp->vb_buf.b_error) {
636: DNPRINTF(VDB_IO, "vndiodone: vbp %p error %d\n", vbp,
637: vbp->vb_buf.b_error);
638:
639: pbp->b_flags |= B_ERROR;
640: pbp->b_error = biowait(&vbp->vb_buf);
641: }
642: pbp->b_resid -= vbp->vb_buf.b_bcount;
643: putvndbuf(vbp);
644: if (vnd->sc_tab.b_active) {
645: disk_unbusy(&vnd->sc_dk, (pbp->b_bcount - pbp->b_resid),
646: (pbp->b_flags & B_READ));
647: if (!vnd->sc_tab.b_actf)
648: vnd->sc_tab.b_active--;
649: }
650: if (pbp->b_resid == 0) {
651: DNPRINTF(VDB_IO, "vndiodone: pbp %p iodone\n", pbp);
652: biodone(pbp);
653: }
654:
655: }
656:
657: /* ARGSUSED */
658: int
659: vndread(dev_t dev, struct uio *uio, int flags)
660: {
661: int unit = vndunit(dev);
662: struct vnd_softc *sc;
663:
664: DNPRINTF(VDB_FOLLOW, "vndread(%x, %p)\n", dev, uio);
665:
666: if (unit >= numvnd)
667: return (ENXIO);
668: sc = &vnd_softc[unit];
669:
670: if ((sc->sc_flags & VNF_INITED) == 0)
671: return (ENXIO);
672:
673: return (physio(vndstrategy, NULL, dev, B_READ, minphys, uio));
674: }
675:
676: /* ARGSUSED */
677: int
678: vndwrite(dev_t dev, struct uio *uio, int flags)
679: {
680: int unit = vndunit(dev);
681: struct vnd_softc *sc;
682:
683: DNPRINTF(VDB_FOLLOW, "vndwrite(%x, %p)\n", dev, uio);
684:
685: if (unit >= numvnd)
686: return (ENXIO);
687: sc = &vnd_softc[unit];
688:
689: if ((sc->sc_flags & VNF_INITED) == 0)
690: return (ENXIO);
691:
692: return (physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio));
693: }
694:
695: /* ARGSUSED */
696: int
697: vndioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p)
698: {
699: int unit = vndunit(dev);
700: struct vnd_softc *vnd;
701: struct vnd_ioctl *vio;
702: struct vnd_user *vnu;
703: struct vattr vattr;
704: struct nameidata nd;
705: int error, part, pmask, s;
706:
707: DNPRINTF(VDB_FOLLOW, "vndioctl(%x, %lx, %p, %x, %p): unit %d\n",
708: dev, cmd, addr, flag, p, unit);
709:
710: error = suser(p, 0);
711: if (error)
712: return (error);
713: if (unit >= numvnd)
714: return (ENXIO);
715:
716: vnd = &vnd_softc[unit];
717: vio = (struct vnd_ioctl *)addr;
718: switch (cmd) {
719:
720: case VNDIOCSET:
721: if (vnd->sc_flags & VNF_INITED)
722: return (EBUSY);
723: if (!(vnd->sc_flags & VNF_SIMPLE) && vio->vnd_keylen)
724: return (EINVAL);
725:
726: if ((error = vndlock(vnd)) != 0)
727: return (error);
728:
729: if ((error = copyinstr(vio->vnd_file, vnd->sc_file,
730: sizeof(vnd->sc_file), NULL))) {
731: vndunlock(vnd);
732: return (error);
733: }
734:
735: bzero(vnd->sc_dev.dv_xname, sizeof(vnd->sc_dev.dv_xname));
736: if (snprintf(vnd->sc_dev.dv_xname, sizeof(vnd->sc_dev.dv_xname),
737: "vnd%d", unit) >= sizeof(vnd->sc_dev.dv_xname)) {
738: printf("VNDIOCSET: device name too long\n");
739: vndunlock(vnd);
740: return(ENXIO);
741: }
742:
743: /*
744: * Open for read and write first. This lets vn_open() weed out
745: * directories, sockets, etc. so we don't have to worry about
746: * them.
747: */
748: NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vnd_file, p);
749: vnd->sc_flags &= ~VNF_READONLY;
750: error = vn_open(&nd, FREAD|FWRITE, 0);
751: if (error == EROFS) {
752: vnd->sc_flags |= VNF_READONLY;
753: error = vn_open(&nd, FREAD, 0);
754: }
755: if (error) {
756: vndunlock(vnd);
757: return (error);
758: }
759:
760: error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p);
761: if (error) {
762: VOP_UNLOCK(nd.ni_vp, 0, p);
763: (void) vn_close(nd.ni_vp, VNDRW(vnd), p->p_ucred, p);
764: vndunlock(vnd);
765: return (error);
766: }
767: VOP_UNLOCK(nd.ni_vp, 0, p);
768: vnd->sc_vp = nd.ni_vp;
769: vnd->sc_size = btodb(vattr.va_size); /* note truncation */
770: if ((error = vndsetcred(vnd, p->p_ucred)) != 0) {
771: (void) vn_close(nd.ni_vp, VNDRW(vnd), p->p_ucred, p);
772: vndunlock(vnd);
773: return (error);
774: }
775:
776: if (vio->vnd_keylen > 0) {
777: char key[BLF_MAXUTILIZED];
778:
779: if (vio->vnd_keylen > sizeof(key))
780: vio->vnd_keylen = sizeof(key);
781:
782: if ((error = copyin(vio->vnd_key, key,
783: vio->vnd_keylen)) != 0) {
784: (void) vn_close(nd.ni_vp, VNDRW(vnd),
785: p->p_ucred, p);
786: vndunlock(vnd);
787: return (error);
788: }
789:
790: vnd->sc_keyctx = malloc(sizeof(*vnd->sc_keyctx), M_DEVBUF,
791: M_WAITOK);
792: blf_key(vnd->sc_keyctx, key, vio->vnd_keylen);
793: bzero(key, vio->vnd_keylen);
794: } else
795: vnd->sc_keyctx = NULL;
796:
797: vio->vnd_size = dbtob((off_t)vnd->sc_size);
798: vnd->sc_flags |= VNF_INITED;
799:
800: DNPRINTF(VDB_INIT, "vndioctl: SET vp %p size %llx\n",
801: vnd->sc_vp, (unsigned long long)vnd->sc_size);
802:
803: /* Attach the disk. */
804: vnd->sc_dk.dk_driver = &vnddkdriver;
805: vnd->sc_dk.dk_name = vnd->sc_dev.dv_xname;
806: disk_attach(&vnd->sc_dk);
807:
808: vndunlock(vnd);
809:
810: break;
811:
812: case VNDIOCCLR:
813: if ((vnd->sc_flags & VNF_INITED) == 0)
814: return (ENXIO);
815:
816: if ((error = vndlock(vnd)) != 0)
817: return (error);
818:
819: /*
820: * Don't unconfigure if any other partitions are open
821: * or if both the character and block flavors of this
822: * partition are open.
823: */
824: part = DISKPART(dev);
825: pmask = (1 << part);
826: if ((vnd->sc_dk.dk_openmask & ~pmask) ||
827: ((vnd->sc_dk.dk_bopenmask & pmask) &&
828: (vnd->sc_dk.dk_copenmask & pmask))) {
829: vndunlock(vnd);
830: return (EBUSY);
831: }
832:
833: vndclear(vnd);
834: DNPRINTF(VDB_INIT, "vndioctl: CLRed\n");
835:
836: /* Free crypto key */
837: if (vnd->sc_keyctx) {
838: bzero(vnd->sc_keyctx, sizeof(*vnd->sc_keyctx));
839: free(vnd->sc_keyctx, M_DEVBUF);
840: }
841:
842: /* Detatch the disk. */
843: disk_detach(&vnd->sc_dk);
844:
845: /* This must be atomic. */
846: s = splhigh();
847: vndunlock(vnd);
848: bzero(vnd, sizeof(struct vnd_softc));
849: splx(s);
850: break;
851:
852: case VNDIOCGET:
853: vnu = (struct vnd_user *)addr;
854:
855: if (vnu->vnu_unit == -1)
856: vnu->vnu_unit = unit;
857: if (vnu->vnu_unit >= numvnd)
858: return (ENXIO);
859: if (vnu->vnu_unit < 0)
860: return (EINVAL);
861:
862: vnd = &vnd_softc[vnu->vnu_unit];
863:
864: if (vnd->sc_flags & VNF_INITED) {
865: error = VOP_GETATTR(vnd->sc_vp, &vattr, p->p_ucred, p);
866: if (error)
867: return (error);
868:
869: strlcpy(vnu->vnu_file, vnd->sc_file,
870: sizeof(vnu->vnu_file));
871: vnu->vnu_dev = vattr.va_fsid;
872: vnu->vnu_ino = vattr.va_fileid;
873: } else {
874: vnu->vnu_dev = 0;
875: vnu->vnu_ino = 0;
876: }
877:
878: break;
879:
880: case DIOCGDINFO:
881: if ((vnd->sc_flags & VNF_HAVELABEL) == 0)
882: return (ENOTTY);
883: *(struct disklabel *)addr = *(vnd->sc_dk.dk_label);
884: return (0);
885:
886: case DIOCGPART:
887: if ((vnd->sc_flags & VNF_HAVELABEL) == 0)
888: return (ENOTTY);
889: ((struct partinfo *)addr)->disklab = vnd->sc_dk.dk_label;
890: ((struct partinfo *)addr)->part =
891: &vnd->sc_dk.dk_label->d_partitions[DISKPART(dev)];
892: return (0);
893:
894: case DIOCWDINFO:
895: case DIOCSDINFO:
896: if ((vnd->sc_flags & VNF_HAVELABEL) == 0)
897: return (ENOTTY);
898: if ((flag & FWRITE) == 0)
899: return (EBADF);
900:
901: if ((error = vndlock(vnd)) != 0)
902: return (error);
903: vnd->sc_flags |= VNF_LABELLING;
904:
905: error = setdisklabel(vnd->sc_dk.dk_label,
906: (struct disklabel *)addr, /*vnd->sc_dk.dk_openmask : */0);
907: if (error == 0) {
908: if (cmd == DIOCWDINFO)
909: error = writedisklabel(VNDLABELDEV(dev),
910: vndstrategy, vnd->sc_dk.dk_label);
911: }
912:
913: vnd->sc_flags &= ~VNF_LABELLING;
914: vndunlock(vnd);
915: return (error);
916:
917: case DIOCWLABEL:
918: if ((flag & FWRITE) == 0)
919: return (EBADF);
920: if (*(int *)addr)
921: vnd->sc_flags |= VNF_WLABEL;
922: else
923: vnd->sc_flags &= ~VNF_WLABEL;
924: return (0);
925:
926: default:
927: return (ENOTTY);
928: }
929:
930: return (0);
931: }
932:
933: /*
934: * Duplicate the current processes' credentials. Since we are called only
935: * as the result of a SET ioctl and only root can do that, any future access
936: * to this "disk" is essentially as root. Note that credentials may change
937: * if some other uid can write directly to the mapped file (NFS).
938: */
939: int
940: vndsetcred(struct vnd_softc *vnd, struct ucred *cred)
941: {
942: struct uio auio;
943: struct iovec aiov;
944: char *tmpbuf;
945: int error;
946: struct proc *p = curproc;
947:
948: vnd->sc_cred = crdup(cred);
949: tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK);
950:
951: /* XXX: Horrible kludge to establish credentials for NFS */
952: aiov.iov_base = tmpbuf;
953: aiov.iov_len = MIN(DEV_BSIZE, dbtob((off_t)vnd->sc_size));
954: auio.uio_iov = &aiov;
955: auio.uio_iovcnt = 1;
956: auio.uio_offset = 0;
957: auio.uio_rw = UIO_READ;
958: auio.uio_segflg = UIO_SYSSPACE;
959: auio.uio_resid = aiov.iov_len;
960: vn_lock(vnd->sc_vp, LK_RETRY | LK_EXCLUSIVE, p);
961: error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred);
962: VOP_UNLOCK(vnd->sc_vp, 0, p);
963:
964: free(tmpbuf, M_TEMP);
965: return (error);
966: }
967:
968: void
969: vndshutdown(void)
970: {
971: struct vnd_softc *vnd;
972:
973: for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++)
974: if (vnd->sc_flags & VNF_INITED)
975: vndclear(vnd);
976: }
977:
978: void
979: vndclear(struct vnd_softc *vnd)
980: {
981: struct vnode *vp = vnd->sc_vp;
982: struct proc *p = curproc; /* XXX */
983:
984: DNPRINTF(VDB_FOLLOW, "vndclear(%p): vp %p\n", vnd, vp);
985:
986: vnd->sc_flags &= ~VNF_INITED;
987: if (vp == NULL)
988: panic("vndioctl: null vp");
989: (void) vn_close(vp, VNDRW(vnd), vnd->sc_cred, p);
990: crfree(vnd->sc_cred);
991: vnd->sc_vp = NULL;
992: vnd->sc_cred = NULL;
993: vnd->sc_size = 0;
994: }
995:
996: daddr64_t
997: vndsize(dev_t dev)
998: {
999: int unit = vndunit(dev);
1000: struct vnd_softc *vnd = &vnd_softc[unit];
1001:
1002: if (unit >= numvnd || (vnd->sc_flags & VNF_INITED) == 0)
1003: return (-1);
1004: return (vnd->sc_size);
1005: }
1006:
1007: int
1008: vnddump(dev_t dev, daddr64_t blkno, caddr_t va, size_t size)
1009: {
1010:
1011: /* Not implemented. */
1012: return (ENXIO);
1013: }
CVSweb