Annotation of sys/nfs/nfs_kq.c, Revision 1.1.1.1
1.1 nbrk 1: /* $OpenBSD: nfs_kq.c,v 1.9 2007/06/01 23:47:57 deraadt Exp $ */
2: /* $NetBSD: nfs_kq.c,v 1.7 2003/10/30 01:43:10 simonb Exp $ */
3:
4: /*-
5: * Copyright (c) 2002 The NetBSD Foundation, Inc.
6: * All rights reserved.
7: *
8: * This code is derived from software contributed to The NetBSD Foundation
9: * by Jaromir Dolecek.
10: *
11: * Redistribution and use in source and binary forms, with or without
12: * modification, are permitted provided that the following conditions
13: * are met:
14: * 1. Redistributions of source code must retain the above copyright
15: * notice, this list of conditions and the following disclaimer.
16: * 2. Redistributions in binary form must reproduce the above copyright
17: * notice, this list of conditions and the following disclaimer in the
18: * documentation and/or other materials provided with the distribution.
19: * 3. All advertising materials mentioning features or use of this software
20: * must display the following acknowledgement:
21: * This product includes software developed by the NetBSD
22: * Foundation, Inc. and its contributors.
23: * 4. Neither the name of The NetBSD Foundation nor the names of its
24: * contributors may be used to endorse or promote products derived
25: * from this software without specific prior written permission.
26: *
27: * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37: * POSSIBILITY OF SUCH DAMAGE.
38: */
39:
40: #include <sys/cdefs.h>
41:
42: #include <sys/param.h>
43: #include <sys/systm.h>
44: #include <sys/kernel.h>
45: #include <sys/proc.h>
46: #include <sys/mount.h>
47: #include <sys/malloc.h>
48: #include <sys/vnode.h>
49: #include <sys/unistd.h>
50: #include <sys/file.h>
51: #include <sys/kthread.h>
52: #include <sys/rwlock.h>
53:
54: #include <uvm/uvm_extern.h>
55: #include <uvm/uvm.h>
56:
57: #include <nfs/rpcv2.h>
58: #include <nfs/nfsproto.h>
59: #include <nfs/nfs.h>
60: #include <nfs/nfsnode.h>
61: #include <nfs/nfs_var.h>
62:
63: void nfs_kqpoll(void *);
64:
65: void filt_nfsdetach(struct knote *);
66: int filt_nfsread(struct knote *, long);
67: int filt_nfsvnode(struct knote *, long);
68:
69: struct kevq {
70: SLIST_ENTRY(kevq) kev_link;
71: struct vnode *vp;
72: u_int usecount;
73: u_int flags;
74: #define KEVQ_BUSY 0x01 /* currently being processed */
75: #define KEVQ_WANT 0x02 /* want to change this entry */
76: struct timespec omtime; /* old modification time */
77: struct timespec octime; /* old change time */
78: nlink_t onlink; /* old number of references to file */
79: };
80: SLIST_HEAD(kevqlist, kevq);
81:
82: struct rwlock nfskevq_lock = RWLOCK_INITIALIZER("nfskqlk");
83: struct proc *pnfskq;
84: struct kevqlist kevlist = SLIST_HEAD_INITIALIZER(kevlist);
85:
86: /*
87: * This quite simplistic routine periodically checks for server changes
88: * of any of the watched files every NFS_MINATTRTIMO/2 seconds.
89: * Only changes in size, modification time, change time and nlinks
90: * are being checked, everything else is ignored.
91: * The routine only calls VOP_GETATTR() when it's likely it would get
92: * some new data, i.e. when the vnode expires from attrcache. This
93: * should give same result as periodically running stat(2) from userland,
94: * while keeping CPU/network usage low, and still provide proper kevent
95: * semantics.
96: * The poller thread is created when first vnode is added to watch list,
97: * and exits when the watch list is empty. The overhead of thread creation
98: * isn't really important, neither speed of attach and detach of knote.
99: */
100: /* ARGSUSED */
101: void
102: nfs_kqpoll(void *arg)
103: {
104: struct kevq *ke;
105: struct vattr attr;
106: struct proc *p = pnfskq;
107: u_quad_t osize;
108: int error;
109:
110: for(;;) {
111: rw_enter_write(&nfskevq_lock);
112: SLIST_FOREACH(ke, &kevlist, kev_link) {
113: struct nfsnode *np = VTONFS(ke->vp);
114:
115: #ifdef DEBUG
116: printf("nfs_kqpoll on: ");
117: VOP_PRINT(ke->vp);
118: #endif
119: /* skip if still in attrcache */
120: if (nfs_getattrcache(ke->vp, &attr) != ENOENT)
121: continue;
122:
123: /*
124: * Mark entry busy, release lock and check
125: * for changes.
126: */
127: ke->flags |= KEVQ_BUSY;
128: rw_exit_write(&nfskevq_lock);
129:
130: /* save v_size, nfs_getattr() updates it */
131: osize = np->n_size;
132:
133: error = VOP_GETATTR(ke->vp, &attr, p->p_ucred, p);
134: if (error == ESTALE) {
135: np->n_attrstamp = 0;
136: VN_KNOTE(ke->vp, NOTE_DELETE);
137: goto next;
138: }
139:
140: /* following is a bit fragile, but about best
141: * we can get */
142: if (attr.va_size != osize) {
143: int extended = (attr.va_size > osize);
144: VN_KNOTE(ke->vp, NOTE_WRITE
145: | (extended ? NOTE_EXTEND : 0));
146: ke->omtime = attr.va_mtime;
147: } else if (attr.va_mtime.tv_sec != ke->omtime.tv_sec
148: || attr.va_mtime.tv_nsec != ke->omtime.tv_nsec) {
149: VN_KNOTE(ke->vp, NOTE_WRITE);
150: ke->omtime = attr.va_mtime;
151: }
152:
153: if (attr.va_ctime.tv_sec != ke->octime.tv_sec
154: || attr.va_ctime.tv_nsec != ke->octime.tv_nsec) {
155: VN_KNOTE(ke->vp, NOTE_ATTRIB);
156: ke->octime = attr.va_ctime;
157: }
158:
159: if (attr.va_nlink != ke->onlink) {
160: VN_KNOTE(ke->vp, NOTE_LINK);
161: ke->onlink = attr.va_nlink;
162: }
163:
164: next:
165: rw_enter_write(&nfskevq_lock);
166: ke->flags &= ~KEVQ_BUSY;
167: if (ke->flags & KEVQ_WANT) {
168: ke->flags &= ~KEVQ_WANT;
169: wakeup(ke);
170: }
171: }
172:
173: if (SLIST_EMPTY(&kevlist)) {
174: /* Nothing more to watch, exit */
175: pnfskq = NULL;
176: rw_exit_write(&nfskevq_lock);
177: kthread_exit(0);
178: }
179: rw_exit_write(&nfskevq_lock);
180:
181: /* wait a while before checking for changes again */
182: tsleep(pnfskq, PSOCK, "nfskqpw", NFS_MINATTRTIMO * hz / 2);
183:
184: }
185: }
186:
187: void
188: filt_nfsdetach(struct knote *kn)
189: {
190: struct vnode *vp = (struct vnode *)kn->kn_hook;
191: struct kevq *ke;
192:
193: SLIST_REMOVE(&vp->v_selectinfo.si_note, kn, knote, kn_selnext);
194:
195: /* Remove the vnode from watch list */
196: rw_enter_write(&nfskevq_lock);
197: SLIST_FOREACH(ke, &kevlist, kev_link) {
198: if (ke->vp == vp) {
199: while (ke->flags & KEVQ_BUSY) {
200: ke->flags |= KEVQ_WANT;
201: rw_exit_write(&nfskevq_lock);
202: (void) tsleep(ke, PSOCK, "nfskqdet", 0);
203: rw_enter_write(&nfskevq_lock);
204: }
205:
206: if (ke->usecount > 1) {
207: /* keep, other kevents need this */
208: ke->usecount--;
209: } else {
210: /* last user, g/c */
211: SLIST_REMOVE(&kevlist, ke, kevq, kev_link);
212: FREE(ke, M_KEVENT);
213: }
214: break;
215: }
216: }
217: rw_exit_write(&nfskevq_lock);
218: }
219:
220: int
221: filt_nfsread(struct knote *kn, long hint)
222: {
223: struct vnode *vp = (struct vnode *)kn->kn_hook;
224: struct nfsnode *np = VTONFS(vp);
225:
226: /*
227: * filesystem is gone, so set the EOF flag and schedule
228: * the knote for deletion.
229: */
230: if (hint == NOTE_REVOKE) {
231: kn->kn_flags |= (EV_EOF | EV_ONESHOT);
232: return (1);
233: }
234:
235: kn->kn_data = np->n_size - kn->kn_fp->f_offset;
236: #ifdef DEBUG
237: printf("nfsread event. %d\n", kn->kn_data);
238: #endif
239: return (kn->kn_data != 0);
240: }
241:
242: int
243: filt_nfsvnode(struct knote *kn, long hint)
244: {
245: if (kn->kn_sfflags & hint)
246: kn->kn_fflags |= hint;
247: if (hint == NOTE_REVOKE) {
248: kn->kn_flags |= EV_EOF;
249: return (1);
250: }
251: return (kn->kn_fflags != 0);
252: }
253:
254: static const struct filterops nfsread_filtops =
255: { 1, NULL, filt_nfsdetach, filt_nfsread };
256: static const struct filterops nfsvnode_filtops =
257: { 1, NULL, filt_nfsdetach, filt_nfsvnode };
258:
259: int
260: nfs_kqfilter(void *v)
261: {
262: struct vop_kqfilter_args *ap = v;
263: struct vnode *vp;
264: struct knote *kn;
265: struct kevq *ke;
266: int error = 0;
267: struct vattr attr;
268: struct proc *p = curproc; /* XXX */
269:
270: vp = ap->a_vp;
271: kn = ap->a_kn;
272:
273: #ifdef DEBUG
274: printf("nfs_kqfilter(%d) on: ", kn->kn_filter);
275: VOP_PRINT(vp);
276: #endif
277:
278: switch (kn->kn_filter) {
279: case EVFILT_READ:
280: kn->kn_fop = &nfsread_filtops;
281: break;
282: case EVFILT_VNODE:
283: kn->kn_fop = &nfsvnode_filtops;
284: break;
285: default:
286: return (1);
287: }
288:
289: kn->kn_hook = vp;
290:
291: /*
292: * Put the vnode to watched list.
293: */
294:
295: /*
296: * Fetch current attributes. It's only needed when the vnode
297: * is not watched yet, but we need to do this without lock
298: * held. This is likely cheap due to attrcache, so do it now.
299: */
300: memset(&attr, 0, sizeof(attr));
301: (void) VOP_GETATTR(vp, &attr, p->p_ucred, p);
302:
303: rw_enter_write(&nfskevq_lock);
304:
305: /* ensure the poller is running */
306: if (!pnfskq) {
307: error = kthread_create(nfs_kqpoll, NULL, &pnfskq,
308: "nfskqpoll");
309: if (error)
310: goto out;
311: }
312:
313: SLIST_FOREACH(ke, &kevlist, kev_link)
314: if (ke->vp == vp)
315: break;
316:
317: if (ke) {
318: /* already watched, so just bump usecount */
319: ke->usecount++;
320: } else {
321: /* need a new one */
322: MALLOC(ke, struct kevq *,
323: sizeof(struct kevq), M_KEVENT, M_WAITOK);
324: ke->vp = vp;
325: ke->usecount = 1;
326: ke->flags = 0;
327: ke->omtime = attr.va_mtime;
328: ke->octime = attr.va_ctime;
329: ke->onlink = attr.va_nlink;
330: SLIST_INSERT_HEAD(&kevlist, ke, kev_link);
331: }
332:
333: /* kick the poller */
334: wakeup(pnfskq);
335:
336: SLIST_INSERT_HEAD(&vp->v_selectinfo.si_note, kn, kn_selnext);
337:
338: out:
339: rw_exit_write(&nfskevq_lock);
340: return (error);
341: }
CVSweb