Annotation of sys/kern/uipc_usrreq.c, Revision 1.1.1.1
1.1 nbrk 1: /* $OpenBSD: uipc_usrreq.c,v 1.34 2007/01/16 17:52:18 thib Exp $ */
2: /* $NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $ */
3:
4: /*
5: * Copyright (c) 1982, 1986, 1989, 1991, 1993
6: * The Regents of the University of California. All rights reserved.
7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
16: * 3. Neither the name of the University nor the names of its contributors
17: * may be used to endorse or promote products derived from this software
18: * without specific prior written permission.
19: *
20: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30: * SUCH DAMAGE.
31: *
32: * @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
33: */
34:
35: #include <sys/param.h>
36: #include <sys/systm.h>
37: #include <sys/proc.h>
38: #include <sys/filedesc.h>
39: #include <sys/domain.h>
40: #include <sys/protosw.h>
41: #include <sys/socket.h>
42: #include <sys/socketvar.h>
43: #include <sys/unpcb.h>
44: #include <sys/un.h>
45: #include <sys/namei.h>
46: #include <sys/vnode.h>
47: #include <sys/file.h>
48: #include <sys/stat.h>
49: #include <sys/mbuf.h>
50:
51: /*
52: * Unix communications domain.
53: *
54: * TODO:
55: * SEQPACKET, RDM
56: * rethink name space problems
57: * need a proper out-of-band
58: */
59: struct sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX };
60: ino_t unp_ino; /* prototype for fake inode numbers */
61:
62: /*ARGSUSED*/
63: int
64: uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
65: struct mbuf *control)
66: {
67: struct unpcb *unp = sotounpcb(so);
68: struct socket *so2;
69: int error = 0;
70: struct proc *p = curproc; /* XXX */
71:
72: if (req == PRU_CONTROL)
73: return (EOPNOTSUPP);
74: if (req != PRU_SEND && control && control->m_len) {
75: error = EOPNOTSUPP;
76: goto release;
77: }
78: if (unp == NULL && req != PRU_ATTACH) {
79: error = EINVAL;
80: goto release;
81: }
82: switch (req) {
83:
84: case PRU_ATTACH:
85: if (unp) {
86: error = EISCONN;
87: break;
88: }
89: error = unp_attach(so);
90: break;
91:
92: case PRU_DETACH:
93: unp_detach(unp);
94: break;
95:
96: case PRU_BIND:
97: error = unp_bind(unp, nam, p);
98: break;
99:
100: case PRU_LISTEN:
101: if (unp->unp_vnode == NULL)
102: error = EINVAL;
103: break;
104:
105: case PRU_CONNECT:
106: error = unp_connect(so, nam, p);
107: break;
108:
109: case PRU_CONNECT2:
110: error = unp_connect2(so, (struct socket *)nam);
111: break;
112:
113: case PRU_DISCONNECT:
114: unp_disconnect(unp);
115: break;
116:
117: case PRU_ACCEPT:
118: /*
119: * Pass back name of connected socket,
120: * if it was bound and we are still connected
121: * (our peer may have closed already!).
122: */
123: if (unp->unp_conn && unp->unp_conn->unp_addr) {
124: nam->m_len = unp->unp_conn->unp_addr->m_len;
125: bcopy(mtod(unp->unp_conn->unp_addr, caddr_t),
126: mtod(nam, caddr_t), (unsigned)nam->m_len);
127: } else {
128: nam->m_len = sizeof(sun_noname);
129: *(mtod(nam, struct sockaddr *)) = sun_noname;
130: }
131: break;
132:
133: case PRU_SHUTDOWN:
134: socantsendmore(so);
135: unp_shutdown(unp);
136: break;
137:
138: case PRU_RCVD:
139: switch (so->so_type) {
140:
141: case SOCK_DGRAM:
142: panic("uipc 1");
143: /*NOTREACHED*/
144:
145: case SOCK_STREAM:
146: #define rcv (&so->so_rcv)
147: #define snd (&so2->so_snd)
148: if (unp->unp_conn == NULL)
149: break;
150: so2 = unp->unp_conn->unp_socket;
151: /*
152: * Adjust backpressure on sender
153: * and wakeup any waiting to write.
154: */
155: snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
156: unp->unp_mbcnt = rcv->sb_mbcnt;
157: snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
158: unp->unp_cc = rcv->sb_cc;
159: sowwakeup(so2);
160: #undef snd
161: #undef rcv
162: break;
163:
164: default:
165: panic("uipc 2");
166: }
167: break;
168:
169: case PRU_SEND:
170: if (control && (error = unp_internalize(control, p)))
171: break;
172: switch (so->so_type) {
173:
174: case SOCK_DGRAM: {
175: struct sockaddr *from;
176:
177: if (nam) {
178: if (unp->unp_conn) {
179: error = EISCONN;
180: break;
181: }
182: error = unp_connect(so, nam, p);
183: if (error)
184: break;
185: } else {
186: if (unp->unp_conn == NULL) {
187: error = ENOTCONN;
188: break;
189: }
190: }
191: so2 = unp->unp_conn->unp_socket;
192: if (unp->unp_addr)
193: from = mtod(unp->unp_addr, struct sockaddr *);
194: else
195: from = &sun_noname;
196: if (sbappendaddr(&so2->so_rcv, from, m, control)) {
197: sorwakeup(so2);
198: m = NULL;
199: control = NULL;
200: } else
201: error = ENOBUFS;
202: if (nam)
203: unp_disconnect(unp);
204: break;
205: }
206:
207: case SOCK_STREAM:
208: #define rcv (&so2->so_rcv)
209: #define snd (&so->so_snd)
210: if (so->so_state & SS_CANTSENDMORE) {
211: error = EPIPE;
212: break;
213: }
214: if (unp->unp_conn == NULL) {
215: error = ENOTCONN;
216: break;
217: }
218: so2 = unp->unp_conn->unp_socket;
219: /*
220: * Send to paired receive port, and then reduce
221: * send buffer hiwater marks to maintain backpressure.
222: * Wake up readers.
223: */
224: if (control) {
225: if (sbappendcontrol(rcv, m, control))
226: control = NULL;
227: } else
228: sbappend(rcv, m);
229: snd->sb_mbmax -=
230: rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
231: unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
232: snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
233: unp->unp_conn->unp_cc = rcv->sb_cc;
234: sorwakeup(so2);
235: m = NULL;
236: #undef snd
237: #undef rcv
238: break;
239:
240: default:
241: panic("uipc 4");
242: }
243: /* we need to undo unp_internalize in case of errors */
244: if (control && error)
245: unp_dispose(control);
246: break;
247:
248: case PRU_ABORT:
249: unp_drop(unp, ECONNABORTED);
250: break;
251:
252: case PRU_SENSE:
253: ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
254: if (so->so_type == SOCK_STREAM && unp->unp_conn != NULL) {
255: so2 = unp->unp_conn->unp_socket;
256: ((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc;
257: }
258: ((struct stat *) m)->st_dev = NODEV;
259: if (unp->unp_ino == 0)
260: unp->unp_ino = unp_ino++;
261: ((struct stat *) m)->st_atimespec =
262: ((struct stat *) m)->st_mtimespec =
263: ((struct stat *) m)->st_ctimespec = unp->unp_ctime;
264: ((struct stat *) m)->st_ino = unp->unp_ino;
265: return (0);
266:
267: case PRU_RCVOOB:
268: return (EOPNOTSUPP);
269:
270: case PRU_SENDOOB:
271: error = EOPNOTSUPP;
272: break;
273:
274: case PRU_SOCKADDR:
275: if (unp->unp_addr) {
276: nam->m_len = unp->unp_addr->m_len;
277: bcopy(mtod(unp->unp_addr, caddr_t),
278: mtod(nam, caddr_t), (unsigned)nam->m_len);
279: } else
280: nam->m_len = 0;
281: break;
282:
283: case PRU_PEERADDR:
284: if (unp->unp_conn && unp->unp_conn->unp_addr) {
285: nam->m_len = unp->unp_conn->unp_addr->m_len;
286: bcopy(mtod(unp->unp_conn->unp_addr, caddr_t),
287: mtod(nam, caddr_t), (unsigned)nam->m_len);
288: } else
289: nam->m_len = 0;
290: break;
291:
292: case PRU_PEEREID:
293: if (unp->unp_flags & UNP_FEIDS) {
294: nam->m_len = sizeof(struct unpcbid);
295: bcopy((caddr_t)(&(unp->unp_connid)),
296: mtod(nam, caddr_t), (unsigned)nam->m_len);
297: } else
298: nam->m_len = 0;
299: break;
300:
301: case PRU_SLOWTIMO:
302: break;
303:
304: default:
305: panic("piusrreq");
306: }
307: release:
308: if (control)
309: m_freem(control);
310: if (m)
311: m_freem(m);
312: return (error);
313: }
314:
315: /*
316: * Both send and receive buffers are allocated PIPSIZ bytes of buffering
317: * for stream sockets, although the total for sender and receiver is
318: * actually only PIPSIZ.
319: * Datagram sockets really use the sendspace as the maximum datagram size,
320: * and don't really want to reserve the sendspace. Their recvspace should
321: * be large enough for at least one max-size datagram plus address.
322: */
323: #define PIPSIZ 4096
324: u_long unpst_sendspace = PIPSIZ;
325: u_long unpst_recvspace = PIPSIZ;
326: u_long unpdg_sendspace = 2*1024; /* really max datagram size */
327: u_long unpdg_recvspace = 4*1024;
328:
329: int unp_rights; /* file descriptors in flight */
330:
331: int
332: unp_attach(struct socket *so)
333: {
334: struct unpcb *unp;
335: int error;
336:
337: if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
338: switch (so->so_type) {
339:
340: case SOCK_STREAM:
341: error = soreserve(so, unpst_sendspace, unpst_recvspace);
342: break;
343:
344: case SOCK_DGRAM:
345: error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
346: break;
347:
348: default:
349: panic("unp_attach");
350: }
351: if (error)
352: return (error);
353: }
354: unp = malloc(sizeof(*unp), M_PCB, M_NOWAIT);
355: if (unp == NULL)
356: return (ENOBUFS);
357: bzero((caddr_t)unp, sizeof(*unp));
358: unp->unp_socket = so;
359: so->so_pcb = unp;
360: nanotime(&unp->unp_ctime);
361: return (0);
362: }
363:
364: void
365: unp_detach(struct unpcb *unp)
366: {
367:
368: if (unp->unp_vnode) {
369: unp->unp_vnode->v_socket = NULL;
370: vrele(unp->unp_vnode);
371: unp->unp_vnode = NULL;
372: }
373: if (unp->unp_conn)
374: unp_disconnect(unp);
375: while (unp->unp_refs)
376: unp_drop(unp->unp_refs, ECONNRESET);
377: soisdisconnected(unp->unp_socket);
378: unp->unp_socket->so_pcb = NULL;
379: m_freem(unp->unp_addr);
380: if (unp_rights) {
381: /*
382: * Normally the receive buffer is flushed later,
383: * in sofree, but if our receive buffer holds references
384: * to descriptors that are now garbage, we will dispose
385: * of those descriptor references after the garbage collector
386: * gets them (resulting in a "panic: closef: count < 0").
387: */
388: sorflush(unp->unp_socket);
389: free(unp, M_PCB);
390: unp_gc();
391: } else
392: free(unp, M_PCB);
393: }
394:
395: int
396: unp_bind(struct unpcb *unp, struct mbuf *nam, struct proc *p)
397: {
398: struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
399: struct vnode *vp;
400: struct vattr vattr;
401: int error, namelen;
402: struct nameidata nd;
403: char buf[MLEN];
404:
405: if (unp->unp_vnode != NULL)
406: return (EINVAL);
407: namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
408: if (namelen <= 0 || namelen >= MLEN)
409: return EINVAL;
410: strncpy(buf, soun->sun_path, namelen);
411: buf[namelen] = 0; /* null-terminate the string */
412: NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT, UIO_SYSSPACE, buf, p);
413: /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
414: if ((error = namei(&nd)) != 0)
415: return (error);
416: vp = nd.ni_vp;
417: if (vp != NULL) {
418: VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
419: if (nd.ni_dvp == vp)
420: vrele(nd.ni_dvp);
421: else
422: vput(nd.ni_dvp);
423: vrele(vp);
424: return (EADDRINUSE);
425: }
426: VATTR_NULL(&vattr);
427: vattr.va_type = VSOCK;
428: vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
429: error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
430: if (error)
431: return (error);
432: vp = nd.ni_vp;
433: vp->v_socket = unp->unp_socket;
434: unp->unp_vnode = vp;
435: unp->unp_addr = m_copy(nam, 0, (int)M_COPYALL);
436: unp->unp_connid.unp_euid = p->p_ucred->cr_uid;
437: unp->unp_connid.unp_egid = p->p_ucred->cr_gid;
438: unp->unp_flags |= UNP_FEIDSBIND;
439: VOP_UNLOCK(vp, 0, p);
440: return (0);
441: }
442:
443: int
444: unp_connect(struct socket *so, struct mbuf *nam, struct proc *p)
445: {
446: struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
447: struct vnode *vp;
448: struct socket *so2, *so3;
449: struct unpcb *unp, *unp2, *unp3;
450: int error;
451: struct nameidata nd;
452:
453: NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p);
454: if (nam->m_data + nam->m_len == &nam->m_dat[MLEN]) { /* XXX */
455: if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0)
456: return (EMSGSIZE);
457: } else
458: *(mtod(nam, caddr_t) + nam->m_len) = 0;
459: if ((error = namei(&nd)) != 0)
460: return (error);
461: vp = nd.ni_vp;
462: if (vp->v_type != VSOCK) {
463: error = ENOTSOCK;
464: goto bad;
465: }
466: if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0)
467: goto bad;
468: so2 = vp->v_socket;
469: if (so2 == NULL) {
470: error = ECONNREFUSED;
471: goto bad;
472: }
473: if (so->so_type != so2->so_type) {
474: error = EPROTOTYPE;
475: goto bad;
476: }
477: if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
478: if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
479: (so3 = sonewconn(so2, 0)) == 0) {
480: error = ECONNREFUSED;
481: goto bad;
482: }
483: unp = sotounpcb(so);
484: unp2 = sotounpcb(so2);
485: unp3 = sotounpcb(so3);
486: if (unp2->unp_addr)
487: unp3->unp_addr =
488: m_copy(unp2->unp_addr, 0, (int)M_COPYALL);
489: unp3->unp_connid.unp_euid = p->p_ucred->cr_uid;
490: unp3->unp_connid.unp_egid = p->p_ucred->cr_gid;
491: unp3->unp_flags |= UNP_FEIDS;
492: so2 = so3;
493: if (unp2->unp_flags & UNP_FEIDSBIND) {
494: unp->unp_connid.unp_euid = unp2->unp_connid.unp_euid;
495: unp->unp_connid.unp_egid = unp2->unp_connid.unp_egid;
496: unp->unp_flags |= UNP_FEIDS;
497: }
498: }
499: error = unp_connect2(so, so2);
500: bad:
501: vput(vp);
502: return (error);
503: }
504:
505: int
506: unp_connect2(struct socket *so, struct socket *so2)
507: {
508: struct unpcb *unp = sotounpcb(so);
509: struct unpcb *unp2;
510:
511: if (so2->so_type != so->so_type)
512: return (EPROTOTYPE);
513: unp2 = sotounpcb(so2);
514: unp->unp_conn = unp2;
515: switch (so->so_type) {
516:
517: case SOCK_DGRAM:
518: unp->unp_nextref = unp2->unp_refs;
519: unp2->unp_refs = unp;
520: soisconnected(so);
521: break;
522:
523: case SOCK_STREAM:
524: unp2->unp_conn = unp;
525: soisconnected(so);
526: soisconnected(so2);
527: break;
528:
529: default:
530: panic("unp_connect2");
531: }
532: return (0);
533: }
534:
535: void
536: unp_disconnect(struct unpcb *unp)
537: {
538: struct unpcb *unp2 = unp->unp_conn;
539:
540: if (unp2 == NULL)
541: return;
542: unp->unp_conn = NULL;
543: switch (unp->unp_socket->so_type) {
544:
545: case SOCK_DGRAM:
546: if (unp2->unp_refs == unp)
547: unp2->unp_refs = unp->unp_nextref;
548: else {
549: unp2 = unp2->unp_refs;
550: for (;;) {
551: if (unp2 == NULL)
552: panic("unp_disconnect");
553: if (unp2->unp_nextref == unp)
554: break;
555: unp2 = unp2->unp_nextref;
556: }
557: unp2->unp_nextref = unp->unp_nextref;
558: }
559: unp->unp_nextref = NULL;
560: unp->unp_socket->so_state &= ~SS_ISCONNECTED;
561: break;
562:
563: case SOCK_STREAM:
564: soisdisconnected(unp->unp_socket);
565: unp2->unp_conn = NULL;
566: soisdisconnected(unp2->unp_socket);
567: break;
568: }
569: }
570:
571: #ifdef notdef
572: unp_abort(struct unpcb *unp)
573: {
574: unp_detach(unp);
575: }
576: #endif
577:
578: void
579: unp_shutdown(struct unpcb *unp)
580: {
581: struct socket *so;
582:
583: if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
584: (so = unp->unp_conn->unp_socket))
585: socantrcvmore(so);
586: }
587:
588: void
589: unp_drop(struct unpcb *unp, int errno)
590: {
591: struct socket *so = unp->unp_socket;
592:
593: so->so_error = errno;
594: unp_disconnect(unp);
595: if (so->so_head) {
596: so->so_pcb = NULL;
597: sofree(so);
598: m_freem(unp->unp_addr);
599: free(unp, M_PCB);
600: }
601: }
602:
603: #ifdef notdef
604: unp_drain(void)
605: {
606:
607: }
608: #endif
609:
610: int
611: unp_externalize(struct mbuf *rights)
612: {
613: struct proc *p = curproc; /* XXX */
614: struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
615: int i, *fdp;
616: struct file **rp;
617: struct file *fp;
618: int nfds, error = 0;
619:
620: nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) /
621: sizeof(struct file *);
622: rp = (struct file **)CMSG_DATA(cm);
623:
624: fdp = malloc(nfds * sizeof(int), M_TEMP, M_WAITOK);
625:
626: #ifdef notyet
627: /* Make sure the recipient should be able to see the descriptors.. */
628: if (p->p_cwdi->cwdi_rdir != NULL) {
629: rp = (struct file **)CMSG_DATA(cm);
630: for (i = 0; i < nfds; i++) {
631: fp = *rp++;
632: /*
633: * If we are in a chroot'ed directory, and
634: * someone wants to pass us a directory, make
635: * sure it's inside the subtree we're allowed
636: * to access.
637: */
638: if (fp->f_type == DTYPE_VNODE) {
639: struct vnode *vp = (struct vnode *)fp->f_data;
640: if ((vp->v_type == VDIR) &&
641: !vn_isunder(vp, p->p_cwdi->cwdi_rdir, p)) {
642: error = EPERM;
643: break;
644: }
645: }
646: }
647: }
648: #endif
649:
650: restart:
651: fdplock(p->p_fd);
652: if (error != 0) {
653: rp = ((struct file **)CMSG_DATA(cm));
654: for (i = 0; i < nfds; i++) {
655: fp = *rp;
656: /*
657: * zero the pointer before calling unp_discard,
658: * since it may end up in unp_gc()..
659: */
660: *rp++ = NULL;
661: unp_discard(fp);
662: }
663: goto out;
664: }
665:
666: /*
667: * First loop -- allocate file descriptor table slots for the
668: * new descriptors.
669: */
670: rp = ((struct file **)CMSG_DATA(cm));
671: for (i = 0; i < nfds; i++) {
672: bcopy(rp, &fp, sizeof(fp));
673: rp++;
674: if ((error = fdalloc(p, 0, &fdp[i])) != 0) {
675: /*
676: * Back out what we've done so far.
677: */
678: for (--i; i >= 0; i--)
679: fdremove(p->p_fd, fdp[i]);
680:
681: if (error == ENOSPC) {
682: fdexpand(p);
683: error = 0;
684: } else {
685: /*
686: * This is the error that has historically
687: * been returned, and some callers may
688: * expect it.
689: */
690: error = EMSGSIZE;
691: }
692: fdpunlock(p->p_fd);
693: goto restart;
694: }
695:
696: /*
697: * Make the slot reference the descriptor so that
698: * fdalloc() works properly.. We finalize it all
699: * in the loop below.
700: */
701: p->p_fd->fd_ofiles[fdp[i]] = fp;
702: }
703:
704: /*
705: * Now that adding them has succeeded, update all of the
706: * descriptor passing state.
707: */
708: rp = (struct file **)CMSG_DATA(cm);
709: for (i = 0; i < nfds; i++) {
710: fp = *rp++;
711: fp->f_msgcount--;
712: unp_rights--;
713: }
714:
715: /*
716: * Copy temporary array to message and adjust length, in case of
717: * transition from large struct file pointers to ints.
718: */
719: memcpy(CMSG_DATA(cm), fdp, nfds * sizeof(int));
720: cm->cmsg_len = CMSG_LEN(nfds * sizeof(int));
721: rights->m_len = CMSG_SPACE(nfds * sizeof(int));
722: out:
723: fdpunlock(p->p_fd);
724: free(fdp, M_TEMP);
725: return (error);
726: }
727:
728: int
729: unp_internalize(struct mbuf *control, struct proc *p)
730: {
731: struct filedesc *fdp = p->p_fd;
732: struct cmsghdr *cm = mtod(control, struct cmsghdr *);
733: struct file **rp, *fp;
734: int i, error;
735: int nfds, *ip, fd, neededspace;
736:
737: if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
738: cm->cmsg_len != control->m_len)
739: return (EINVAL);
740: nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof (int);
741:
742: /* Make sure we have room for the struct file pointers */
743: morespace:
744: neededspace = CMSG_SPACE(nfds * sizeof(struct file *)) -
745: control->m_len;
746: if (neededspace > M_TRAILINGSPACE(control)) {
747: /* if we already have a cluster, the message is just too big */
748: if (control->m_flags & M_EXT)
749: return (E2BIG);
750:
751: /* allocate a cluster and try again */
752: MCLGET(control, M_WAIT);
753: if ((control->m_flags & M_EXT) == 0)
754: return (ENOBUFS); /* allocation failed */
755:
756: /* copy the data to the cluster */
757: memcpy(mtod(control, char *), cm, cm->cmsg_len);
758: cm = mtod(control, struct cmsghdr *);
759: goto morespace;
760: }
761:
762: /* adjust message & mbuf to note amount of space actually used. */
763: cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct file *));
764: control->m_len = CMSG_SPACE(nfds * sizeof(struct file *));
765:
766: ip = ((int *)CMSG_DATA(cm)) + nfds - 1;
767: rp = ((struct file **)CMSG_DATA(cm)) + nfds - 1;
768: for (i = 0; i < nfds; i++) {
769: bcopy(ip, &fd, sizeof fd);
770: ip--;
771: if ((fp = fd_getfile(fdp, fd)) == NULL) {
772: error = EBADF;
773: goto fail;
774: }
775: if (fp->f_count == LONG_MAX-2 ||
776: fp->f_msgcount == LONG_MAX-2) {
777: error = EDEADLK;
778: goto fail;
779: }
780: bcopy(&fp, rp, sizeof fp);
781: rp--;
782: fp->f_count++;
783: fp->f_msgcount++;
784: unp_rights++;
785: }
786: return (0);
787: fail:
788: /* Back out what we just did. */
789: for ( ; i > 0; i--) {
790: bcopy(rp, &fp, sizeof(fp));
791: rp++;
792: fp->f_count--;
793: fp->f_msgcount--;
794: unp_rights--;
795: }
796:
797: return (error);
798: }
799:
800: int unp_defer, unp_gcing;
801: extern struct domain unixdomain;
802:
803: void
804: unp_gc(void)
805: {
806: struct file *fp, *nextfp;
807: struct socket *so;
808: struct file **extra_ref, **fpp;
809: int nunref, i;
810:
811: if (unp_gcing)
812: return;
813: unp_gcing = 1;
814: unp_defer = 0;
815: LIST_FOREACH(fp, &filehead, f_list)
816: fp->f_flag &= ~(FMARK|FDEFER);
817: do {
818: LIST_FOREACH(fp, &filehead, f_list) {
819: if (fp->f_flag & FDEFER) {
820: fp->f_flag &= ~FDEFER;
821: unp_defer--;
822: } else {
823: if (fp->f_count == 0)
824: continue;
825: if (fp->f_flag & FMARK)
826: continue;
827: if (fp->f_count == fp->f_msgcount)
828: continue;
829: }
830: fp->f_flag |= FMARK;
831:
832: if (fp->f_type != DTYPE_SOCKET ||
833: (so = (struct socket *)fp->f_data) == NULL)
834: continue;
835: if (so->so_proto->pr_domain != &unixdomain ||
836: (so->so_proto->pr_flags&PR_RIGHTS) == 0)
837: continue;
838: #ifdef notdef
839: if (so->so_rcv.sb_flags & SB_LOCK) {
840: /*
841: * This is problematical; it's not clear
842: * we need to wait for the sockbuf to be
843: * unlocked (on a uniprocessor, at least),
844: * and it's also not clear what to do
845: * if sbwait returns an error due to receipt
846: * of a signal. If sbwait does return
847: * an error, we'll go into an infinite
848: * loop. Delete all of this for now.
849: */
850: (void) sbwait(&so->so_rcv);
851: goto restart;
852: }
853: #endif
854: unp_scan(so->so_rcv.sb_mb, unp_mark, 0);
855: }
856: } while (unp_defer);
857: /*
858: * We grab an extra reference to each of the file table entries
859: * that are not otherwise accessible and then free the rights
860: * that are stored in messages on them.
861: *
862: * The bug in the original code is a little tricky, so I'll describe
863: * what's wrong with it here.
864: *
865: * It is incorrect to simply unp_discard each entry for f_msgcount
866: * times -- consider the case of sockets A and B that contain
867: * references to each other. On a last close of some other socket,
868: * we trigger a gc since the number of outstanding rights (unp_rights)
869: * is non-zero. If during the sweep phase the gc code un_discards,
870: * we end up doing a (full) closef on the descriptor. A closef on A
871: * results in the following chain. Closef calls soo_close, which
872: * calls soclose. Soclose calls first (through the switch
873: * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply
874: * returns because the previous instance had set unp_gcing, and
875: * we return all the way back to soclose, which marks the socket
876: * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush
877: * to free up the rights that are queued in messages on the socket A,
878: * i.e., the reference on B. The sorflush calls via the dom_dispose
879: * switch unp_dispose, which unp_scans with unp_discard. This second
880: * instance of unp_discard just calls closef on B.
881: *
882: * Well, a similar chain occurs on B, resulting in a sorflush on B,
883: * which results in another closef on A. Unfortunately, A is already
884: * being closed, and the descriptor has already been marked with
885: * SS_NOFDREF, and soclose panics at this point.
886: *
887: * Here, we first take an extra reference to each inaccessible
888: * descriptor. Then, we call sorflush ourself, since we know
889: * it is a Unix domain socket anyhow. After we destroy all the
890: * rights carried in messages, we do a last closef to get rid
891: * of our extra reference. This is the last close, and the
892: * unp_detach etc will shut down the socket.
893: *
894: * 91/09/19, bsy@cs.cmu.edu
895: */
896: extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK);
897: for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref;
898: fp != NULL; fp = nextfp) {
899: nextfp = LIST_NEXT(fp, f_list);
900: if (fp->f_count == 0)
901: continue;
902: if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) {
903: *fpp++ = fp;
904: nunref++;
905: FREF(fp);
906: fp->f_count++;
907: }
908: }
909: for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
910: if ((*fpp)->f_type == DTYPE_SOCKET && (*fpp)->f_data != NULL)
911: sorflush((struct socket *)(*fpp)->f_data);
912: for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
913: (void) closef(*fpp, NULL);
914: free((caddr_t)extra_ref, M_FILE);
915: unp_gcing = 0;
916: }
917:
918: void
919: unp_dispose(struct mbuf *m)
920: {
921:
922: if (m)
923: unp_scan(m, unp_discard, 1);
924: }
925:
926: void
927: unp_scan(struct mbuf *m0, void (*op)(struct file *), int discard)
928: {
929: struct mbuf *m;
930: struct file **rp, *fp;
931: struct cmsghdr *cm;
932: int i;
933: int qfds;
934:
935: while (m0) {
936: for (m = m0; m; m = m->m_next) {
937: if (m->m_type == MT_CONTROL &&
938: m->m_len >= sizeof(*cm)) {
939: cm = mtod(m, struct cmsghdr *);
940: if (cm->cmsg_level != SOL_SOCKET ||
941: cm->cmsg_type != SCM_RIGHTS)
942: continue;
943: qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof *cm))
944: / sizeof(struct file *);
945: rp = (struct file **)CMSG_DATA(cm);
946: for (i = 0; i < qfds; i++) {
947: fp = *rp;
948: if (discard)
949: *rp = 0;
950: (*op)(fp);
951: rp++;
952: }
953: break; /* XXX, but saves time */
954: }
955: }
956: m0 = m0->m_nextpkt;
957: }
958: }
959:
960: void
961: unp_mark(struct file *fp)
962: {
963: if (fp == NULL)
964: return;
965:
966: if (fp->f_flag & FMARK)
967: return;
968:
969: if (fp->f_flag & FDEFER)
970: return;
971:
972: if (fp->f_type == DTYPE_SOCKET) {
973: unp_defer++;
974: fp->f_flag |= FDEFER;
975: } else {
976: fp->f_flag |= FMARK;
977: }
978: }
979:
980: void
981: unp_discard(struct file *fp)
982: {
983:
984: if (fp == NULL)
985: return;
986: FREF(fp);
987: fp->f_msgcount--;
988: unp_rights--;
989: (void) closef(fp, NULL);
990: }
CVSweb