Annotation of sys/net/pf_norm.c, Revision 1.1.1.1
1.1 nbrk 1: /* $OpenBSD: pf_norm.c,v 1.109 2007/05/28 17:16:39 henning Exp $ */
2:
3: /*
4: * Copyright 2001 Niels Provos <provos@citi.umich.edu>
5: * All rights reserved.
6: *
7: * Redistribution and use in source and binary forms, with or without
8: * modification, are permitted provided that the following conditions
9: * are met:
10: * 1. Redistributions of source code must retain the above copyright
11: * notice, this list of conditions and the following disclaimer.
12: * 2. Redistributions in binary form must reproduce the above copyright
13: * notice, this list of conditions and the following disclaimer in the
14: * documentation and/or other materials provided with the distribution.
15: *
16: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17: * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19: * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20: * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21: * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22: * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23: * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25: * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26: */
27:
28: #include "pflog.h"
29:
30: #include <sys/param.h>
31: #include <sys/systm.h>
32: #include <sys/mbuf.h>
33: #include <sys/filio.h>
34: #include <sys/fcntl.h>
35: #include <sys/socket.h>
36: #include <sys/kernel.h>
37: #include <sys/time.h>
38: #include <sys/pool.h>
39:
40: #include <dev/rndvar.h>
41: #include <net/if.h>
42: #include <net/if_types.h>
43: #include <net/bpf.h>
44: #include <net/route.h>
45: #include <net/if_pflog.h>
46:
47: #include <netinet/in.h>
48: #include <netinet/in_var.h>
49: #include <netinet/in_systm.h>
50: #include <netinet/ip.h>
51: #include <netinet/ip_var.h>
52: #include <netinet/tcp.h>
53: #include <netinet/tcp_seq.h>
54: #include <netinet/udp.h>
55: #include <netinet/ip_icmp.h>
56:
57: #ifdef INET6
58: #include <netinet/ip6.h>
59: #endif /* INET6 */
60:
61: #include <net/pfvar.h>
62:
63: struct pf_frent {
64: LIST_ENTRY(pf_frent) fr_next;
65: struct ip *fr_ip;
66: struct mbuf *fr_m;
67: };
68:
69: struct pf_frcache {
70: LIST_ENTRY(pf_frcache) fr_next;
71: uint16_t fr_off;
72: uint16_t fr_end;
73: };
74:
75: #define PFFRAG_SEENLAST 0x0001 /* Seen the last fragment for this */
76: #define PFFRAG_NOBUFFER 0x0002 /* Non-buffering fragment cache */
77: #define PFFRAG_DROP 0x0004 /* Drop all fragments */
78: #define BUFFER_FRAGMENTS(fr) (!((fr)->fr_flags & PFFRAG_NOBUFFER))
79:
80: struct pf_fragment {
81: RB_ENTRY(pf_fragment) fr_entry;
82: TAILQ_ENTRY(pf_fragment) frag_next;
83: struct in_addr fr_src;
84: struct in_addr fr_dst;
85: u_int8_t fr_p; /* protocol of this fragment */
86: u_int8_t fr_flags; /* status flags */
87: u_int16_t fr_id; /* fragment id for reassemble */
88: u_int16_t fr_max; /* fragment data max */
89: u_int32_t fr_timeout;
90: #define fr_queue fr_u.fru_queue
91: #define fr_cache fr_u.fru_cache
92: union {
93: LIST_HEAD(pf_fragq, pf_frent) fru_queue; /* buffering */
94: LIST_HEAD(pf_cacheq, pf_frcache) fru_cache; /* non-buf */
95: } fr_u;
96: };
97:
98: TAILQ_HEAD(pf_fragqueue, pf_fragment) pf_fragqueue;
99: TAILQ_HEAD(pf_cachequeue, pf_fragment) pf_cachequeue;
100:
101: static __inline int pf_frag_compare(struct pf_fragment *,
102: struct pf_fragment *);
103: RB_HEAD(pf_frag_tree, pf_fragment) pf_frag_tree, pf_cache_tree;
104: RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
105: RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
106:
107: /* Private prototypes */
108: void pf_ip2key(struct pf_fragment *, struct ip *);
109: void pf_remove_fragment(struct pf_fragment *);
110: void pf_flush_fragments(void);
111: void pf_free_fragment(struct pf_fragment *);
112: struct pf_fragment *pf_find_fragment(struct ip *, struct pf_frag_tree *);
113: struct mbuf *pf_reassemble(struct mbuf **, struct pf_fragment **,
114: struct pf_frent *, int);
115: struct mbuf *pf_fragcache(struct mbuf **, struct ip*,
116: struct pf_fragment **, int, int, int *);
117: int pf_normalize_tcpopt(struct pf_rule *, struct mbuf *,
118: struct tcphdr *, int);
119:
120: #define DPFPRINTF(x) do { \
121: if (pf_status.debug >= PF_DEBUG_MISC) { \
122: printf("%s: ", __func__); \
123: printf x ; \
124: } \
125: } while(0)
126:
127: /* Globals */
128: struct pool pf_frent_pl, pf_frag_pl, pf_cache_pl, pf_cent_pl;
129: struct pool pf_state_scrub_pl;
130: int pf_nfrents, pf_ncache;
131:
132: void
133: pf_normalize_init(void)
134: {
135: pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0, 0, 0, "pffrent",
136: NULL);
137: pool_init(&pf_frag_pl, sizeof(struct pf_fragment), 0, 0, 0, "pffrag",
138: NULL);
139: pool_init(&pf_cache_pl, sizeof(struct pf_fragment), 0, 0, 0,
140: "pffrcache", NULL);
141: pool_init(&pf_cent_pl, sizeof(struct pf_frcache), 0, 0, 0, "pffrcent",
142: NULL);
143: pool_init(&pf_state_scrub_pl, sizeof(struct pf_state_scrub), 0, 0, 0,
144: "pfstscr", NULL);
145:
146: pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT);
147: pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0);
148: pool_sethardlimit(&pf_cache_pl, PFFRAG_FRCACHE_HIWAT, NULL, 0);
149: pool_sethardlimit(&pf_cent_pl, PFFRAG_FRCENT_HIWAT, NULL, 0);
150:
151: TAILQ_INIT(&pf_fragqueue);
152: TAILQ_INIT(&pf_cachequeue);
153: }
154:
155: static __inline int
156: pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b)
157: {
158: int diff;
159:
160: if ((diff = a->fr_id - b->fr_id))
161: return (diff);
162: else if ((diff = a->fr_p - b->fr_p))
163: return (diff);
164: else if (a->fr_src.s_addr < b->fr_src.s_addr)
165: return (-1);
166: else if (a->fr_src.s_addr > b->fr_src.s_addr)
167: return (1);
168: else if (a->fr_dst.s_addr < b->fr_dst.s_addr)
169: return (-1);
170: else if (a->fr_dst.s_addr > b->fr_dst.s_addr)
171: return (1);
172: return (0);
173: }
174:
175: void
176: pf_purge_expired_fragments(void)
177: {
178: struct pf_fragment *frag;
179: u_int32_t expire = time_second -
180: pf_default_rule.timeout[PFTM_FRAG];
181:
182: while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) {
183: KASSERT(BUFFER_FRAGMENTS(frag));
184: if (frag->fr_timeout > expire)
185: break;
186:
187: DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
188: pf_free_fragment(frag);
189: }
190:
191: while ((frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue)) != NULL) {
192: KASSERT(!BUFFER_FRAGMENTS(frag));
193: if (frag->fr_timeout > expire)
194: break;
195:
196: DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
197: pf_free_fragment(frag);
198: KASSERT(TAILQ_EMPTY(&pf_cachequeue) ||
199: TAILQ_LAST(&pf_cachequeue, pf_cachequeue) != frag);
200: }
201: }
202:
203: /*
204: * Try to flush old fragments to make space for new ones
205: */
206:
207: void
208: pf_flush_fragments(void)
209: {
210: struct pf_fragment *frag;
211: int goal;
212:
213: goal = pf_nfrents * 9 / 10;
214: DPFPRINTF(("trying to free > %d frents\n",
215: pf_nfrents - goal));
216: while (goal < pf_nfrents) {
217: frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue);
218: if (frag == NULL)
219: break;
220: pf_free_fragment(frag);
221: }
222:
223:
224: goal = pf_ncache * 9 / 10;
225: DPFPRINTF(("trying to free > %d cache entries\n",
226: pf_ncache - goal));
227: while (goal < pf_ncache) {
228: frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue);
229: if (frag == NULL)
230: break;
231: pf_free_fragment(frag);
232: }
233: }
234:
235: /* Frees the fragments and all associated entries */
236:
237: void
238: pf_free_fragment(struct pf_fragment *frag)
239: {
240: struct pf_frent *frent;
241: struct pf_frcache *frcache;
242:
243: /* Free all fragments */
244: if (BUFFER_FRAGMENTS(frag)) {
245: for (frent = LIST_FIRST(&frag->fr_queue); frent;
246: frent = LIST_FIRST(&frag->fr_queue)) {
247: LIST_REMOVE(frent, fr_next);
248:
249: m_freem(frent->fr_m);
250: pool_put(&pf_frent_pl, frent);
251: pf_nfrents--;
252: }
253: } else {
254: for (frcache = LIST_FIRST(&frag->fr_cache); frcache;
255: frcache = LIST_FIRST(&frag->fr_cache)) {
256: LIST_REMOVE(frcache, fr_next);
257:
258: KASSERT(LIST_EMPTY(&frag->fr_cache) ||
259: LIST_FIRST(&frag->fr_cache)->fr_off >
260: frcache->fr_end);
261:
262: pool_put(&pf_cent_pl, frcache);
263: pf_ncache--;
264: }
265: }
266:
267: pf_remove_fragment(frag);
268: }
269:
270: void
271: pf_ip2key(struct pf_fragment *key, struct ip *ip)
272: {
273: key->fr_p = ip->ip_p;
274: key->fr_id = ip->ip_id;
275: key->fr_src.s_addr = ip->ip_src.s_addr;
276: key->fr_dst.s_addr = ip->ip_dst.s_addr;
277: }
278:
279: struct pf_fragment *
280: pf_find_fragment(struct ip *ip, struct pf_frag_tree *tree)
281: {
282: struct pf_fragment key;
283: struct pf_fragment *frag;
284:
285: pf_ip2key(&key, ip);
286:
287: frag = RB_FIND(pf_frag_tree, tree, &key);
288: if (frag != NULL) {
289: /* XXX Are we sure we want to update the timeout? */
290: frag->fr_timeout = time_second;
291: if (BUFFER_FRAGMENTS(frag)) {
292: TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
293: TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next);
294: } else {
295: TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
296: TAILQ_INSERT_HEAD(&pf_cachequeue, frag, frag_next);
297: }
298: }
299:
300: return (frag);
301: }
302:
303: /* Removes a fragment from the fragment queue and frees the fragment */
304:
305: void
306: pf_remove_fragment(struct pf_fragment *frag)
307: {
308: if (BUFFER_FRAGMENTS(frag)) {
309: RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag);
310: TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
311: pool_put(&pf_frag_pl, frag);
312: } else {
313: RB_REMOVE(pf_frag_tree, &pf_cache_tree, frag);
314: TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
315: pool_put(&pf_cache_pl, frag);
316: }
317: }
318:
319: #define FR_IP_OFF(fr) ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3)
320: struct mbuf *
321: pf_reassemble(struct mbuf **m0, struct pf_fragment **frag,
322: struct pf_frent *frent, int mff)
323: {
324: struct mbuf *m = *m0, *m2;
325: struct pf_frent *frea, *next;
326: struct pf_frent *frep = NULL;
327: struct ip *ip = frent->fr_ip;
328: int hlen = ip->ip_hl << 2;
329: u_int16_t off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
330: u_int16_t ip_len = ntohs(ip->ip_len) - ip->ip_hl * 4;
331: u_int16_t max = ip_len + off;
332:
333: KASSERT(*frag == NULL || BUFFER_FRAGMENTS(*frag));
334:
335: /* Strip off ip header */
336: m->m_data += hlen;
337: m->m_len -= hlen;
338:
339: /* Create a new reassembly queue for this packet */
340: if (*frag == NULL) {
341: *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
342: if (*frag == NULL) {
343: pf_flush_fragments();
344: *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
345: if (*frag == NULL)
346: goto drop_fragment;
347: }
348:
349: (*frag)->fr_flags = 0;
350: (*frag)->fr_max = 0;
351: (*frag)->fr_src = frent->fr_ip->ip_src;
352: (*frag)->fr_dst = frent->fr_ip->ip_dst;
353: (*frag)->fr_p = frent->fr_ip->ip_p;
354: (*frag)->fr_id = frent->fr_ip->ip_id;
355: (*frag)->fr_timeout = time_second;
356: LIST_INIT(&(*frag)->fr_queue);
357:
358: RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag);
359: TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next);
360:
361: /* We do not have a previous fragment */
362: frep = NULL;
363: goto insert;
364: }
365:
366: /*
367: * Find a fragment after the current one:
368: * - off contains the real shifted offset.
369: */
370: LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) {
371: if (FR_IP_OFF(frea) > off)
372: break;
373: frep = frea;
374: }
375:
376: KASSERT(frep != NULL || frea != NULL);
377:
378: if (frep != NULL &&
379: FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl *
380: 4 > off)
381: {
382: u_int16_t precut;
383:
384: precut = FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) -
385: frep->fr_ip->ip_hl * 4 - off;
386: if (precut >= ip_len)
387: goto drop_fragment;
388: m_adj(frent->fr_m, precut);
389: DPFPRINTF(("overlap -%d\n", precut));
390: /* Enforce 8 byte boundaries */
391: ip->ip_off = htons(ntohs(ip->ip_off) + (precut >> 3));
392: off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
393: ip_len -= precut;
394: ip->ip_len = htons(ip_len);
395: }
396:
397: for (; frea != NULL && ip_len + off > FR_IP_OFF(frea);
398: frea = next)
399: {
400: u_int16_t aftercut;
401:
402: aftercut = ip_len + off - FR_IP_OFF(frea);
403: DPFPRINTF(("adjust overlap %d\n", aftercut));
404: if (aftercut < ntohs(frea->fr_ip->ip_len) - frea->fr_ip->ip_hl
405: * 4)
406: {
407: frea->fr_ip->ip_len =
408: htons(ntohs(frea->fr_ip->ip_len) - aftercut);
409: frea->fr_ip->ip_off = htons(ntohs(frea->fr_ip->ip_off) +
410: (aftercut >> 3));
411: m_adj(frea->fr_m, aftercut);
412: break;
413: }
414:
415: /* This fragment is completely overlapped, lose it */
416: next = LIST_NEXT(frea, fr_next);
417: m_freem(frea->fr_m);
418: LIST_REMOVE(frea, fr_next);
419: pool_put(&pf_frent_pl, frea);
420: pf_nfrents--;
421: }
422:
423: insert:
424: /* Update maximum data size */
425: if ((*frag)->fr_max < max)
426: (*frag)->fr_max = max;
427: /* This is the last segment */
428: if (!mff)
429: (*frag)->fr_flags |= PFFRAG_SEENLAST;
430:
431: if (frep == NULL)
432: LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next);
433: else
434: LIST_INSERT_AFTER(frep, frent, fr_next);
435:
436: /* Check if we are completely reassembled */
437: if (!((*frag)->fr_flags & PFFRAG_SEENLAST))
438: return (NULL);
439:
440: /* Check if we have all the data */
441: off = 0;
442: for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) {
443: next = LIST_NEXT(frep, fr_next);
444:
445: off += ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 4;
446: if (off < (*frag)->fr_max &&
447: (next == NULL || FR_IP_OFF(next) != off))
448: {
449: DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
450: off, next == NULL ? -1 : FR_IP_OFF(next),
451: (*frag)->fr_max));
452: return (NULL);
453: }
454: }
455: DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max));
456: if (off < (*frag)->fr_max)
457: return (NULL);
458:
459: /* We have all the data */
460: frent = LIST_FIRST(&(*frag)->fr_queue);
461: KASSERT(frent != NULL);
462: if ((frent->fr_ip->ip_hl << 2) + off > IP_MAXPACKET) {
463: DPFPRINTF(("drop: too big: %d\n", off));
464: pf_free_fragment(*frag);
465: *frag = NULL;
466: return (NULL);
467: }
468: next = LIST_NEXT(frent, fr_next);
469:
470: /* Magic from ip_input */
471: ip = frent->fr_ip;
472: m = frent->fr_m;
473: m2 = m->m_next;
474: m->m_next = NULL;
475: m_cat(m, m2);
476: pool_put(&pf_frent_pl, frent);
477: pf_nfrents--;
478: for (frent = next; frent != NULL; frent = next) {
479: next = LIST_NEXT(frent, fr_next);
480:
481: m2 = frent->fr_m;
482: pool_put(&pf_frent_pl, frent);
483: pf_nfrents--;
484: m_cat(m, m2);
485: }
486:
487: ip->ip_src = (*frag)->fr_src;
488: ip->ip_dst = (*frag)->fr_dst;
489:
490: /* Remove from fragment queue */
491: pf_remove_fragment(*frag);
492: *frag = NULL;
493:
494: hlen = ip->ip_hl << 2;
495: ip->ip_len = htons(off + hlen);
496: m->m_len += hlen;
497: m->m_data -= hlen;
498:
499: /* some debugging cruft by sklower, below, will go away soon */
500: /* XXX this should be done elsewhere */
501: if (m->m_flags & M_PKTHDR) {
502: int plen = 0;
503: for (m2 = m; m2; m2 = m2->m_next)
504: plen += m2->m_len;
505: m->m_pkthdr.len = plen;
506: }
507:
508: DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip->ip_len)));
509: return (m);
510:
511: drop_fragment:
512: /* Oops - fail safe - drop packet */
513: pool_put(&pf_frent_pl, frent);
514: pf_nfrents--;
515: m_freem(m);
516: return (NULL);
517: }
518:
519: struct mbuf *
520: pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
521: int drop, int *nomem)
522: {
523: struct mbuf *m = *m0;
524: struct pf_frcache *frp, *fra, *cur = NULL;
525: int ip_len = ntohs(h->ip_len) - (h->ip_hl << 2);
526: u_int16_t off = ntohs(h->ip_off) << 3;
527: u_int16_t max = ip_len + off;
528: int hosed = 0;
529:
530: KASSERT(*frag == NULL || !BUFFER_FRAGMENTS(*frag));
531:
532: /* Create a new range queue for this packet */
533: if (*frag == NULL) {
534: *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
535: if (*frag == NULL) {
536: pf_flush_fragments();
537: *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
538: if (*frag == NULL)
539: goto no_mem;
540: }
541:
542: /* Get an entry for the queue */
543: cur = pool_get(&pf_cent_pl, PR_NOWAIT);
544: if (cur == NULL) {
545: pool_put(&pf_cache_pl, *frag);
546: *frag = NULL;
547: goto no_mem;
548: }
549: pf_ncache++;
550:
551: (*frag)->fr_flags = PFFRAG_NOBUFFER;
552: (*frag)->fr_max = 0;
553: (*frag)->fr_src = h->ip_src;
554: (*frag)->fr_dst = h->ip_dst;
555: (*frag)->fr_p = h->ip_p;
556: (*frag)->fr_id = h->ip_id;
557: (*frag)->fr_timeout = time_second;
558:
559: cur->fr_off = off;
560: cur->fr_end = max;
561: LIST_INIT(&(*frag)->fr_cache);
562: LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next);
563:
564: RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag);
565: TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next);
566:
567: DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off, max));
568:
569: goto pass;
570: }
571:
572: /*
573: * Find a fragment after the current one:
574: * - off contains the real shifted offset.
575: */
576: frp = NULL;
577: LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) {
578: if (fra->fr_off > off)
579: break;
580: frp = fra;
581: }
582:
583: KASSERT(frp != NULL || fra != NULL);
584:
585: if (frp != NULL) {
586: int precut;
587:
588: precut = frp->fr_end - off;
589: if (precut >= ip_len) {
590: /* Fragment is entirely a duplicate */
591: DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n",
592: h->ip_id, frp->fr_off, frp->fr_end, off, max));
593: goto drop_fragment;
594: }
595: if (precut == 0) {
596: /* They are adjacent. Fixup cache entry */
597: DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n",
598: h->ip_id, frp->fr_off, frp->fr_end, off, max));
599: frp->fr_end = max;
600: } else if (precut > 0) {
601: /* The first part of this payload overlaps with a
602: * fragment that has already been passed.
603: * Need to trim off the first part of the payload.
604: * But to do so easily, we need to create another
605: * mbuf to throw the original header into.
606: */
607:
608: DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n",
609: h->ip_id, precut, frp->fr_off, frp->fr_end, off,
610: max));
611:
612: off += precut;
613: max -= precut;
614: /* Update the previous frag to encompass this one */
615: frp->fr_end = max;
616:
617: if (!drop) {
618: /* XXX Optimization opportunity
619: * This is a very heavy way to trim the payload.
620: * we could do it much faster by diddling mbuf
621: * internals but that would be even less legible
622: * than this mbuf magic. For my next trick,
623: * I'll pull a rabbit out of my laptop.
624: */
625: *m0 = m_copym2(m, 0, h->ip_hl << 2, M_NOWAIT);
626: if (*m0 == NULL)
627: goto no_mem;
628: KASSERT((*m0)->m_next == NULL);
629: m_adj(m, precut + (h->ip_hl << 2));
630: m_cat(*m0, m);
631: m = *m0;
632: if (m->m_flags & M_PKTHDR) {
633: int plen = 0;
634: struct mbuf *t;
635: for (t = m; t; t = t->m_next)
636: plen += t->m_len;
637: m->m_pkthdr.len = plen;
638: }
639:
640:
641: h = mtod(m, struct ip *);
642:
643:
644: KASSERT((int)m->m_len ==
645: ntohs(h->ip_len) - precut);
646: h->ip_off = htons(ntohs(h->ip_off) +
647: (precut >> 3));
648: h->ip_len = htons(ntohs(h->ip_len) - precut);
649: } else {
650: hosed++;
651: }
652: } else {
653: /* There is a gap between fragments */
654:
655: DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n",
656: h->ip_id, -precut, frp->fr_off, frp->fr_end, off,
657: max));
658:
659: cur = pool_get(&pf_cent_pl, PR_NOWAIT);
660: if (cur == NULL)
661: goto no_mem;
662: pf_ncache++;
663:
664: cur->fr_off = off;
665: cur->fr_end = max;
666: LIST_INSERT_AFTER(frp, cur, fr_next);
667: }
668: }
669:
670: if (fra != NULL) {
671: int aftercut;
672: int merge = 0;
673:
674: aftercut = max - fra->fr_off;
675: if (aftercut == 0) {
676: /* Adjacent fragments */
677: DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n",
678: h->ip_id, off, max, fra->fr_off, fra->fr_end));
679: fra->fr_off = off;
680: merge = 1;
681: } else if (aftercut > 0) {
682: /* Need to chop off the tail of this fragment */
683: DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n",
684: h->ip_id, aftercut, off, max, fra->fr_off,
685: fra->fr_end));
686: fra->fr_off = off;
687: max -= aftercut;
688:
689: merge = 1;
690:
691: if (!drop) {
692: m_adj(m, -aftercut);
693: if (m->m_flags & M_PKTHDR) {
694: int plen = 0;
695: struct mbuf *t;
696: for (t = m; t; t = t->m_next)
697: plen += t->m_len;
698: m->m_pkthdr.len = plen;
699: }
700: h = mtod(m, struct ip *);
701: KASSERT((int)m->m_len ==
702: ntohs(h->ip_len) - aftercut);
703: h->ip_len = htons(ntohs(h->ip_len) - aftercut);
704: } else {
705: hosed++;
706: }
707: } else if (frp == NULL) {
708: /* There is a gap between fragments */
709: DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n",
710: h->ip_id, -aftercut, off, max, fra->fr_off,
711: fra->fr_end));
712:
713: cur = pool_get(&pf_cent_pl, PR_NOWAIT);
714: if (cur == NULL)
715: goto no_mem;
716: pf_ncache++;
717:
718: cur->fr_off = off;
719: cur->fr_end = max;
720: LIST_INSERT_BEFORE(fra, cur, fr_next);
721: }
722:
723:
724: /* Need to glue together two separate fragment descriptors */
725: if (merge) {
726: if (cur && fra->fr_off <= cur->fr_end) {
727: /* Need to merge in a previous 'cur' */
728: DPFPRINTF(("fragcache[%d]: adjacent(merge "
729: "%d-%d) %d-%d (%d-%d)\n",
730: h->ip_id, cur->fr_off, cur->fr_end, off,
731: max, fra->fr_off, fra->fr_end));
732: fra->fr_off = cur->fr_off;
733: LIST_REMOVE(cur, fr_next);
734: pool_put(&pf_cent_pl, cur);
735: pf_ncache--;
736: cur = NULL;
737:
738: } else if (frp && fra->fr_off <= frp->fr_end) {
739: /* Need to merge in a modified 'frp' */
740: KASSERT(cur == NULL);
741: DPFPRINTF(("fragcache[%d]: adjacent(merge "
742: "%d-%d) %d-%d (%d-%d)\n",
743: h->ip_id, frp->fr_off, frp->fr_end, off,
744: max, fra->fr_off, fra->fr_end));
745: fra->fr_off = frp->fr_off;
746: LIST_REMOVE(frp, fr_next);
747: pool_put(&pf_cent_pl, frp);
748: pf_ncache--;
749: frp = NULL;
750:
751: }
752: }
753: }
754:
755: if (hosed) {
756: /*
757: * We must keep tracking the overall fragment even when
758: * we're going to drop it anyway so that we know when to
759: * free the overall descriptor. Thus we drop the frag late.
760: */
761: goto drop_fragment;
762: }
763:
764:
765: pass:
766: /* Update maximum data size */
767: if ((*frag)->fr_max < max)
768: (*frag)->fr_max = max;
769:
770: /* This is the last segment */
771: if (!mff)
772: (*frag)->fr_flags |= PFFRAG_SEENLAST;
773:
774: /* Check if we are completely reassembled */
775: if (((*frag)->fr_flags & PFFRAG_SEENLAST) &&
776: LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 &&
777: LIST_FIRST(&(*frag)->fr_cache)->fr_end == (*frag)->fr_max) {
778: /* Remove from fragment queue */
779: DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id,
780: (*frag)->fr_max));
781: pf_free_fragment(*frag);
782: *frag = NULL;
783: }
784:
785: return (m);
786:
787: no_mem:
788: *nomem = 1;
789:
790: /* Still need to pay attention to !IP_MF */
791: if (!mff && *frag != NULL)
792: (*frag)->fr_flags |= PFFRAG_SEENLAST;
793:
794: m_freem(m);
795: return (NULL);
796:
797: drop_fragment:
798:
799: /* Still need to pay attention to !IP_MF */
800: if (!mff && *frag != NULL)
801: (*frag)->fr_flags |= PFFRAG_SEENLAST;
802:
803: if (drop) {
804: /* This fragment has been deemed bad. Don't reass */
805: if (((*frag)->fr_flags & PFFRAG_DROP) == 0)
806: DPFPRINTF(("fragcache[%d]: dropping overall fragment\n",
807: h->ip_id));
808: (*frag)->fr_flags |= PFFRAG_DROP;
809: }
810:
811: m_freem(m);
812: return (NULL);
813: }
814:
815: int
816: pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
817: struct pf_pdesc *pd)
818: {
819: struct mbuf *m = *m0;
820: struct pf_rule *r;
821: struct pf_frent *frent;
822: struct pf_fragment *frag = NULL;
823: struct ip *h = mtod(m, struct ip *);
824: int mff = (ntohs(h->ip_off) & IP_MF);
825: int hlen = h->ip_hl << 2;
826: u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
827: u_int16_t max;
828: int ip_len;
829: int ip_off;
830:
831: r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
832: while (r != NULL) {
833: r->evaluations++;
834: if (pfi_kif_match(r->kif, kif) == r->ifnot)
835: r = r->skip[PF_SKIP_IFP].ptr;
836: else if (r->direction && r->direction != dir)
837: r = r->skip[PF_SKIP_DIR].ptr;
838: else if (r->af && r->af != AF_INET)
839: r = r->skip[PF_SKIP_AF].ptr;
840: else if (r->proto && r->proto != h->ip_p)
841: r = r->skip[PF_SKIP_PROTO].ptr;
842: else if (PF_MISMATCHAW(&r->src.addr,
843: (struct pf_addr *)&h->ip_src.s_addr, AF_INET,
844: r->src.neg, kif))
845: r = r->skip[PF_SKIP_SRC_ADDR].ptr;
846: else if (PF_MISMATCHAW(&r->dst.addr,
847: (struct pf_addr *)&h->ip_dst.s_addr, AF_INET,
848: r->dst.neg, NULL))
849: r = r->skip[PF_SKIP_DST_ADDR].ptr;
850: else
851: break;
852: }
853:
854: if (r == NULL || r->action == PF_NOSCRUB)
855: return (PF_PASS);
856: else {
857: r->packets[dir == PF_OUT]++;
858: r->bytes[dir == PF_OUT] += pd->tot_len;
859: }
860:
861: /* Check for illegal packets */
862: if (hlen < (int)sizeof(struct ip))
863: goto drop;
864:
865: if (hlen > ntohs(h->ip_len))
866: goto drop;
867:
868: /* Clear IP_DF if the rule uses the no-df option */
869: if (r->rule_flag & PFRULE_NODF && h->ip_off & htons(IP_DF)) {
870: u_int16_t ip_off = h->ip_off;
871:
872: h->ip_off &= htons(~IP_DF);
873: h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
874: }
875:
876: /* We will need other tests here */
877: if (!fragoff && !mff)
878: goto no_fragment;
879:
880: /* We're dealing with a fragment now. Don't allow fragments
881: * with IP_DF to enter the cache. If the flag was cleared by
882: * no-df above, fine. Otherwise drop it.
883: */
884: if (h->ip_off & htons(IP_DF)) {
885: DPFPRINTF(("IP_DF\n"));
886: goto bad;
887: }
888:
889: ip_len = ntohs(h->ip_len) - hlen;
890: ip_off = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
891:
892: /* All fragments are 8 byte aligned */
893: if (mff && (ip_len & 0x7)) {
894: DPFPRINTF(("mff and %d\n", ip_len));
895: goto bad;
896: }
897:
898: /* Respect maximum length */
899: if (fragoff + ip_len > IP_MAXPACKET) {
900: DPFPRINTF(("max packet %d\n", fragoff + ip_len));
901: goto bad;
902: }
903: max = fragoff + ip_len;
904:
905: if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) {
906: /* Fully buffer all of the fragments */
907:
908: frag = pf_find_fragment(h, &pf_frag_tree);
909:
910: /* Check if we saw the last fragment already */
911: if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
912: max > frag->fr_max)
913: goto bad;
914:
915: /* Get an entry for the fragment queue */
916: frent = pool_get(&pf_frent_pl, PR_NOWAIT);
917: if (frent == NULL) {
918: REASON_SET(reason, PFRES_MEMORY);
919: return (PF_DROP);
920: }
921: pf_nfrents++;
922: frent->fr_ip = h;
923: frent->fr_m = m;
924:
925: /* Might return a completely reassembled mbuf, or NULL */
926: DPFPRINTF(("reass frag %d @ %d-%d\n", h->ip_id, fragoff, max));
927: *m0 = m = pf_reassemble(m0, &frag, frent, mff);
928:
929: if (m == NULL)
930: return (PF_DROP);
931:
932: if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
933: goto drop;
934:
935: h = mtod(m, struct ip *);
936: } else {
937: /* non-buffering fragment cache (drops or masks overlaps) */
938: int nomem = 0;
939:
940: if (dir == PF_OUT && m->m_pkthdr.pf.flags & PF_TAG_FRAGCACHE) {
941: /*
942: * Already passed the fragment cache in the
943: * input direction. If we continued, it would
944: * appear to be a dup and would be dropped.
945: */
946: goto fragment_pass;
947: }
948:
949: frag = pf_find_fragment(h, &pf_cache_tree);
950:
951: /* Check if we saw the last fragment already */
952: if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
953: max > frag->fr_max) {
954: if (r->rule_flag & PFRULE_FRAGDROP)
955: frag->fr_flags |= PFFRAG_DROP;
956: goto bad;
957: }
958:
959: *m0 = m = pf_fragcache(m0, h, &frag, mff,
960: (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem);
961: if (m == NULL) {
962: if (nomem)
963: goto no_mem;
964: goto drop;
965: }
966:
967: if (dir == PF_IN)
968: m->m_pkthdr.pf.flags |= PF_TAG_FRAGCACHE;
969:
970: if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
971: goto drop;
972: goto fragment_pass;
973: }
974:
975: no_fragment:
976: /* At this point, only IP_DF is allowed in ip_off */
977: if (h->ip_off & ~htons(IP_DF)) {
978: u_int16_t ip_off = h->ip_off;
979:
980: h->ip_off &= htons(IP_DF);
981: h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
982: }
983:
984: /* Enforce a minimum ttl, may cause endless packet loops */
985: if (r->min_ttl && h->ip_ttl < r->min_ttl) {
986: u_int16_t ip_ttl = h->ip_ttl;
987:
988: h->ip_ttl = r->min_ttl;
989: h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
990: }
991:
992: if (r->rule_flag & PFRULE_RANDOMID) {
993: u_int16_t ip_id = h->ip_id;
994:
995: h->ip_id = ip_randomid();
996: h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0);
997: }
998: if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0)
999: pd->flags |= PFDESC_IP_REAS;
1000:
1001: return (PF_PASS);
1002:
1003: fragment_pass:
1004: /* Enforce a minimum ttl, may cause endless packet loops */
1005: if (r->min_ttl && h->ip_ttl < r->min_ttl) {
1006: u_int16_t ip_ttl = h->ip_ttl;
1007:
1008: h->ip_ttl = r->min_ttl;
1009: h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
1010: }
1011: if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0)
1012: pd->flags |= PFDESC_IP_REAS;
1013: return (PF_PASS);
1014:
1015: no_mem:
1016: REASON_SET(reason, PFRES_MEMORY);
1017: if (r != NULL && r->log)
1018: PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd);
1019: return (PF_DROP);
1020:
1021: drop:
1022: REASON_SET(reason, PFRES_NORM);
1023: if (r != NULL && r->log)
1024: PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd);
1025: return (PF_DROP);
1026:
1027: bad:
1028: DPFPRINTF(("dropping bad fragment\n"));
1029:
1030: /* Free associated fragments */
1031: if (frag != NULL)
1032: pf_free_fragment(frag);
1033:
1034: REASON_SET(reason, PFRES_FRAG);
1035: if (r != NULL && r->log)
1036: PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd);
1037:
1038: return (PF_DROP);
1039: }
1040:
1041: #ifdef INET6
1042: int
1043: pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif,
1044: u_short *reason, struct pf_pdesc *pd)
1045: {
1046: struct mbuf *m = *m0;
1047: struct pf_rule *r;
1048: struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
1049: int off;
1050: struct ip6_ext ext;
1051: struct ip6_opt opt;
1052: struct ip6_opt_jumbo jumbo;
1053: struct ip6_frag frag;
1054: u_int32_t jumbolen = 0, plen;
1055: u_int16_t fragoff = 0;
1056: int optend;
1057: int ooff;
1058: u_int8_t proto;
1059: int terminal;
1060:
1061: r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
1062: while (r != NULL) {
1063: r->evaluations++;
1064: if (pfi_kif_match(r->kif, kif) == r->ifnot)
1065: r = r->skip[PF_SKIP_IFP].ptr;
1066: else if (r->direction && r->direction != dir)
1067: r = r->skip[PF_SKIP_DIR].ptr;
1068: else if (r->af && r->af != AF_INET6)
1069: r = r->skip[PF_SKIP_AF].ptr;
1070: #if 0 /* header chain! */
1071: else if (r->proto && r->proto != h->ip6_nxt)
1072: r = r->skip[PF_SKIP_PROTO].ptr;
1073: #endif
1074: else if (PF_MISMATCHAW(&r->src.addr,
1075: (struct pf_addr *)&h->ip6_src, AF_INET6,
1076: r->src.neg, kif))
1077: r = r->skip[PF_SKIP_SRC_ADDR].ptr;
1078: else if (PF_MISMATCHAW(&r->dst.addr,
1079: (struct pf_addr *)&h->ip6_dst, AF_INET6,
1080: r->dst.neg, NULL))
1081: r = r->skip[PF_SKIP_DST_ADDR].ptr;
1082: else
1083: break;
1084: }
1085:
1086: if (r == NULL || r->action == PF_NOSCRUB)
1087: return (PF_PASS);
1088: else {
1089: r->packets[dir == PF_OUT]++;
1090: r->bytes[dir == PF_OUT] += pd->tot_len;
1091: }
1092:
1093: /* Check for illegal packets */
1094: if (sizeof(struct ip6_hdr) + IPV6_MAXPACKET < m->m_pkthdr.len)
1095: goto drop;
1096:
1097: off = sizeof(struct ip6_hdr);
1098: proto = h->ip6_nxt;
1099: terminal = 0;
1100: do {
1101: switch (proto) {
1102: case IPPROTO_FRAGMENT:
1103: goto fragment;
1104: break;
1105: case IPPROTO_AH:
1106: case IPPROTO_ROUTING:
1107: case IPPROTO_DSTOPTS:
1108: if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
1109: NULL, AF_INET6))
1110: goto shortpkt;
1111: if (proto == IPPROTO_AH)
1112: off += (ext.ip6e_len + 2) * 4;
1113: else
1114: off += (ext.ip6e_len + 1) * 8;
1115: proto = ext.ip6e_nxt;
1116: break;
1117: case IPPROTO_HOPOPTS:
1118: if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
1119: NULL, AF_INET6))
1120: goto shortpkt;
1121: optend = off + (ext.ip6e_len + 1) * 8;
1122: ooff = off + sizeof(ext);
1123: do {
1124: if (!pf_pull_hdr(m, ooff, &opt.ip6o_type,
1125: sizeof(opt.ip6o_type), NULL, NULL,
1126: AF_INET6))
1127: goto shortpkt;
1128: if (opt.ip6o_type == IP6OPT_PAD1) {
1129: ooff++;
1130: continue;
1131: }
1132: if (!pf_pull_hdr(m, ooff, &opt, sizeof(opt),
1133: NULL, NULL, AF_INET6))
1134: goto shortpkt;
1135: if (ooff + sizeof(opt) + opt.ip6o_len > optend)
1136: goto drop;
1137: switch (opt.ip6o_type) {
1138: case IP6OPT_JUMBO:
1139: if (h->ip6_plen != 0)
1140: goto drop;
1141: if (!pf_pull_hdr(m, ooff, &jumbo,
1142: sizeof(jumbo), NULL, NULL,
1143: AF_INET6))
1144: goto shortpkt;
1145: memcpy(&jumbolen, jumbo.ip6oj_jumbo_len,
1146: sizeof(jumbolen));
1147: jumbolen = ntohl(jumbolen);
1148: if (jumbolen <= IPV6_MAXPACKET)
1149: goto drop;
1150: if (sizeof(struct ip6_hdr) + jumbolen !=
1151: m->m_pkthdr.len)
1152: goto drop;
1153: break;
1154: default:
1155: break;
1156: }
1157: ooff += sizeof(opt) + opt.ip6o_len;
1158: } while (ooff < optend);
1159:
1160: off = optend;
1161: proto = ext.ip6e_nxt;
1162: break;
1163: default:
1164: terminal = 1;
1165: break;
1166: }
1167: } while (!terminal);
1168:
1169: /* jumbo payload option must be present, or plen > 0 */
1170: if (ntohs(h->ip6_plen) == 0)
1171: plen = jumbolen;
1172: else
1173: plen = ntohs(h->ip6_plen);
1174: if (plen == 0)
1175: goto drop;
1176: if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len)
1177: goto shortpkt;
1178:
1179: /* Enforce a minimum ttl, may cause endless packet loops */
1180: if (r->min_ttl && h->ip6_hlim < r->min_ttl)
1181: h->ip6_hlim = r->min_ttl;
1182:
1183: return (PF_PASS);
1184:
1185: fragment:
1186: if (ntohs(h->ip6_plen) == 0 || jumbolen)
1187: goto drop;
1188: plen = ntohs(h->ip6_plen);
1189:
1190: if (!pf_pull_hdr(m, off, &frag, sizeof(frag), NULL, NULL, AF_INET6))
1191: goto shortpkt;
1192: fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK);
1193: if (fragoff + (plen - off - sizeof(frag)) > IPV6_MAXPACKET)
1194: goto badfrag;
1195:
1196: /* do something about it */
1197: /* remember to set pd->flags |= PFDESC_IP_REAS */
1198: return (PF_PASS);
1199:
1200: shortpkt:
1201: REASON_SET(reason, PFRES_SHORT);
1202: if (r != NULL && r->log)
1203: PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd);
1204: return (PF_DROP);
1205:
1206: drop:
1207: REASON_SET(reason, PFRES_NORM);
1208: if (r != NULL && r->log)
1209: PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd);
1210: return (PF_DROP);
1211:
1212: badfrag:
1213: REASON_SET(reason, PFRES_FRAG);
1214: if (r != NULL && r->log)
1215: PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd);
1216: return (PF_DROP);
1217: }
1218: #endif /* INET6 */
1219:
1220: int
1221: pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff,
1222: int off, void *h, struct pf_pdesc *pd)
1223: {
1224: struct pf_rule *r, *rm = NULL;
1225: struct tcphdr *th = pd->hdr.tcp;
1226: int rewrite = 0;
1227: u_short reason;
1228: u_int8_t flags;
1229: sa_family_t af = pd->af;
1230:
1231: r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
1232: while (r != NULL) {
1233: r->evaluations++;
1234: if (pfi_kif_match(r->kif, kif) == r->ifnot)
1235: r = r->skip[PF_SKIP_IFP].ptr;
1236: else if (r->direction && r->direction != dir)
1237: r = r->skip[PF_SKIP_DIR].ptr;
1238: else if (r->af && r->af != af)
1239: r = r->skip[PF_SKIP_AF].ptr;
1240: else if (r->proto && r->proto != pd->proto)
1241: r = r->skip[PF_SKIP_PROTO].ptr;
1242: else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
1243: r->src.neg, kif))
1244: r = r->skip[PF_SKIP_SRC_ADDR].ptr;
1245: else if (r->src.port_op && !pf_match_port(r->src.port_op,
1246: r->src.port[0], r->src.port[1], th->th_sport))
1247: r = r->skip[PF_SKIP_SRC_PORT].ptr;
1248: else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
1249: r->dst.neg, NULL))
1250: r = r->skip[PF_SKIP_DST_ADDR].ptr;
1251: else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
1252: r->dst.port[0], r->dst.port[1], th->th_dport))
1253: r = r->skip[PF_SKIP_DST_PORT].ptr;
1254: else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match(
1255: pf_osfp_fingerprint(pd, m, off, th),
1256: r->os_fingerprint))
1257: r = TAILQ_NEXT(r, entries);
1258: else {
1259: rm = r;
1260: break;
1261: }
1262: }
1263:
1264: if (rm == NULL || rm->action == PF_NOSCRUB)
1265: return (PF_PASS);
1266: else {
1267: r->packets[dir == PF_OUT]++;
1268: r->bytes[dir == PF_OUT] += pd->tot_len;
1269: }
1270:
1271: if (rm->rule_flag & PFRULE_REASSEMBLE_TCP)
1272: pd->flags |= PFDESC_TCP_NORM;
1273:
1274: flags = th->th_flags;
1275: if (flags & TH_SYN) {
1276: /* Illegal packet */
1277: if (flags & TH_RST)
1278: goto tcp_drop;
1279:
1280: if (flags & TH_FIN)
1281: flags &= ~TH_FIN;
1282: } else {
1283: /* Illegal packet */
1284: if (!(flags & (TH_ACK|TH_RST)))
1285: goto tcp_drop;
1286: }
1287:
1288: if (!(flags & TH_ACK)) {
1289: /* These flags are only valid if ACK is set */
1290: if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG))
1291: goto tcp_drop;
1292: }
1293:
1294: /* Check for illegal header length */
1295: if (th->th_off < (sizeof(struct tcphdr) >> 2))
1296: goto tcp_drop;
1297:
1298: /* If flags changed, or reserved data set, then adjust */
1299: if (flags != th->th_flags || th->th_x2 != 0) {
1300: u_int16_t ov, nv;
1301:
1302: ov = *(u_int16_t *)(&th->th_ack + 1);
1303: th->th_flags = flags;
1304: th->th_x2 = 0;
1305: nv = *(u_int16_t *)(&th->th_ack + 1);
1306:
1307: th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv, 0);
1308: rewrite = 1;
1309: }
1310:
1311: /* Remove urgent pointer, if TH_URG is not set */
1312: if (!(flags & TH_URG) && th->th_urp) {
1313: th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0, 0);
1314: th->th_urp = 0;
1315: rewrite = 1;
1316: }
1317:
1318: /* Process options */
1319: if (r->max_mss && pf_normalize_tcpopt(r, m, th, off))
1320: rewrite = 1;
1321:
1322: /* copy back packet headers if we sanitized */
1323: if (rewrite)
1324: m_copyback(m, off, sizeof(*th), th);
1325:
1326: return (PF_PASS);
1327:
1328: tcp_drop:
1329: REASON_SET(&reason, PFRES_NORM);
1330: if (rm != NULL && r->log)
1331: PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, NULL, NULL, pd);
1332: return (PF_DROP);
1333: }
1334:
1335: int
1336: pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd,
1337: struct tcphdr *th, struct pf_state_peer *src, struct pf_state_peer *dst)
1338: {
1339: u_int32_t tsval, tsecr;
1340: u_int8_t hdr[60];
1341: u_int8_t *opt;
1342:
1343: KASSERT(src->scrub == NULL);
1344:
1345: src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT);
1346: if (src->scrub == NULL)
1347: return (1);
1348: bzero(src->scrub, sizeof(*src->scrub));
1349:
1350: switch (pd->af) {
1351: #ifdef INET
1352: case AF_INET: {
1353: struct ip *h = mtod(m, struct ip *);
1354: src->scrub->pfss_ttl = h->ip_ttl;
1355: break;
1356: }
1357: #endif /* INET */
1358: #ifdef INET6
1359: case AF_INET6: {
1360: struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
1361: src->scrub->pfss_ttl = h->ip6_hlim;
1362: break;
1363: }
1364: #endif /* INET6 */
1365: }
1366:
1367:
1368: /*
1369: * All normalizations below are only begun if we see the start of
1370: * the connections. They must all set an enabled bit in pfss_flags
1371: */
1372: if ((th->th_flags & TH_SYN) == 0)
1373: return (0);
1374:
1375:
1376: if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub &&
1377: pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
1378: /* Diddle with TCP options */
1379: int hlen;
1380: opt = hdr + sizeof(struct tcphdr);
1381: hlen = (th->th_off << 2) - sizeof(struct tcphdr);
1382: while (hlen >= TCPOLEN_TIMESTAMP) {
1383: switch (*opt) {
1384: case TCPOPT_EOL: /* FALLTHROUGH */
1385: case TCPOPT_NOP:
1386: opt++;
1387: hlen--;
1388: break;
1389: case TCPOPT_TIMESTAMP:
1390: if (opt[1] >= TCPOLEN_TIMESTAMP) {
1391: src->scrub->pfss_flags |=
1392: PFSS_TIMESTAMP;
1393: src->scrub->pfss_ts_mod =
1394: htonl(arc4random());
1395:
1396: /* note PFSS_PAWS not set yet */
1397: memcpy(&tsval, &opt[2],
1398: sizeof(u_int32_t));
1399: memcpy(&tsecr, &opt[6],
1400: sizeof(u_int32_t));
1401: src->scrub->pfss_tsval0 = ntohl(tsval);
1402: src->scrub->pfss_tsval = ntohl(tsval);
1403: src->scrub->pfss_tsecr = ntohl(tsecr);
1404: getmicrouptime(&src->scrub->pfss_last);
1405: }
1406: /* FALLTHROUGH */
1407: default:
1408: hlen -= MAX(opt[1], 2);
1409: opt += MAX(opt[1], 2);
1410: break;
1411: }
1412: }
1413: }
1414:
1415: return (0);
1416: }
1417:
1418: void
1419: pf_normalize_tcp_cleanup(struct pf_state *state)
1420: {
1421: if (state->src.scrub)
1422: pool_put(&pf_state_scrub_pl, state->src.scrub);
1423: if (state->dst.scrub)
1424: pool_put(&pf_state_scrub_pl, state->dst.scrub);
1425:
1426: /* Someday... flush the TCP segment reassembly descriptors. */
1427: }
1428:
1429: int
1430: pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd,
1431: u_short *reason, struct tcphdr *th, struct pf_state *state,
1432: struct pf_state_peer *src, struct pf_state_peer *dst, int *writeback)
1433: {
1434: struct timeval uptime;
1435: u_int32_t tsval, tsecr;
1436: u_int tsval_from_last;
1437: u_int8_t hdr[60];
1438: u_int8_t *opt;
1439: int copyback = 0;
1440: int got_ts = 0;
1441:
1442: KASSERT(src->scrub || dst->scrub);
1443:
1444: /*
1445: * Enforce the minimum TTL seen for this connection. Negate a common
1446: * technique to evade an intrusion detection system and confuse
1447: * firewall state code.
1448: */
1449: switch (pd->af) {
1450: #ifdef INET
1451: case AF_INET: {
1452: if (src->scrub) {
1453: struct ip *h = mtod(m, struct ip *);
1454: if (h->ip_ttl > src->scrub->pfss_ttl)
1455: src->scrub->pfss_ttl = h->ip_ttl;
1456: h->ip_ttl = src->scrub->pfss_ttl;
1457: }
1458: break;
1459: }
1460: #endif /* INET */
1461: #ifdef INET6
1462: case AF_INET6: {
1463: if (src->scrub) {
1464: struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
1465: if (h->ip6_hlim > src->scrub->pfss_ttl)
1466: src->scrub->pfss_ttl = h->ip6_hlim;
1467: h->ip6_hlim = src->scrub->pfss_ttl;
1468: }
1469: break;
1470: }
1471: #endif /* INET6 */
1472: }
1473:
1474: if (th->th_off > (sizeof(struct tcphdr) >> 2) &&
1475: ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) ||
1476: (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) &&
1477: pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
1478: /* Diddle with TCP options */
1479: int hlen;
1480: opt = hdr + sizeof(struct tcphdr);
1481: hlen = (th->th_off << 2) - sizeof(struct tcphdr);
1482: while (hlen >= TCPOLEN_TIMESTAMP) {
1483: switch (*opt) {
1484: case TCPOPT_EOL: /* FALLTHROUGH */
1485: case TCPOPT_NOP:
1486: opt++;
1487: hlen--;
1488: break;
1489: case TCPOPT_TIMESTAMP:
1490: /* Modulate the timestamps. Can be used for
1491: * NAT detection, OS uptime determination or
1492: * reboot detection.
1493: */
1494:
1495: if (got_ts) {
1496: /* Huh? Multiple timestamps!? */
1497: if (pf_status.debug >= PF_DEBUG_MISC) {
1498: DPFPRINTF(("multiple TS??"));
1499: pf_print_state(state);
1500: printf("\n");
1501: }
1502: REASON_SET(reason, PFRES_TS);
1503: return (PF_DROP);
1504: }
1505: if (opt[1] >= TCPOLEN_TIMESTAMP) {
1506: memcpy(&tsval, &opt[2],
1507: sizeof(u_int32_t));
1508: if (tsval && src->scrub &&
1509: (src->scrub->pfss_flags &
1510: PFSS_TIMESTAMP)) {
1511: tsval = ntohl(tsval);
1512: pf_change_a(&opt[2],
1513: &th->th_sum,
1514: htonl(tsval +
1515: src->scrub->pfss_ts_mod),
1516: 0);
1517: copyback = 1;
1518: }
1519:
1520: /* Modulate TS reply iff valid (!0) */
1521: memcpy(&tsecr, &opt[6],
1522: sizeof(u_int32_t));
1523: if (tsecr && dst->scrub &&
1524: (dst->scrub->pfss_flags &
1525: PFSS_TIMESTAMP)) {
1526: tsecr = ntohl(tsecr)
1527: - dst->scrub->pfss_ts_mod;
1528: pf_change_a(&opt[6],
1529: &th->th_sum, htonl(tsecr),
1530: 0);
1531: copyback = 1;
1532: }
1533: got_ts = 1;
1534: }
1535: /* FALLTHROUGH */
1536: default:
1537: hlen -= MAX(opt[1], 2);
1538: opt += MAX(opt[1], 2);
1539: break;
1540: }
1541: }
1542: if (copyback) {
1543: /* Copyback the options, caller copys back header */
1544: *writeback = 1;
1545: m_copyback(m, off + sizeof(struct tcphdr),
1546: (th->th_off << 2) - sizeof(struct tcphdr), hdr +
1547: sizeof(struct tcphdr));
1548: }
1549: }
1550:
1551:
1552: /*
1553: * Must invalidate PAWS checks on connections idle for too long.
1554: * The fastest allowed timestamp clock is 1ms. That turns out to
1555: * be about 24 days before it wraps. XXX Right now our lowerbound
1556: * TS echo check only works for the first 12 days of a connection
1557: * when the TS has exhausted half its 32bit space
1558: */
1559: #define TS_MAX_IDLE (24*24*60*60)
1560: #define TS_MAX_CONN (12*24*60*60) /* XXX remove when better tsecr check */
1561:
1562: getmicrouptime(&uptime);
1563: if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) &&
1564: (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE ||
1565: time_second - state->creation > TS_MAX_CONN)) {
1566: if (pf_status.debug >= PF_DEBUG_MISC) {
1567: DPFPRINTF(("src idled out of PAWS\n"));
1568: pf_print_state(state);
1569: printf("\n");
1570: }
1571: src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS)
1572: | PFSS_PAWS_IDLED;
1573: }
1574: if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) &&
1575: uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) {
1576: if (pf_status.debug >= PF_DEBUG_MISC) {
1577: DPFPRINTF(("dst idled out of PAWS\n"));
1578: pf_print_state(state);
1579: printf("\n");
1580: }
1581: dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS)
1582: | PFSS_PAWS_IDLED;
1583: }
1584:
1585: if (got_ts && src->scrub && dst->scrub &&
1586: (src->scrub->pfss_flags & PFSS_PAWS) &&
1587: (dst->scrub->pfss_flags & PFSS_PAWS)) {
1588: /* Validate that the timestamps are "in-window".
1589: * RFC1323 describes TCP Timestamp options that allow
1590: * measurement of RTT (round trip time) and PAWS
1591: * (protection against wrapped sequence numbers). PAWS
1592: * gives us a set of rules for rejecting packets on
1593: * long fat pipes (packets that were somehow delayed
1594: * in transit longer than the time it took to send the
1595: * full TCP sequence space of 4Gb). We can use these
1596: * rules and infer a few others that will let us treat
1597: * the 32bit timestamp and the 32bit echoed timestamp
1598: * as sequence numbers to prevent a blind attacker from
1599: * inserting packets into a connection.
1600: *
1601: * RFC1323 tells us:
1602: * - The timestamp on this packet must be greater than
1603: * or equal to the last value echoed by the other
1604: * endpoint. The RFC says those will be discarded
1605: * since it is a dup that has already been acked.
1606: * This gives us a lowerbound on the timestamp.
1607: * timestamp >= other last echoed timestamp
1608: * - The timestamp will be less than or equal to
1609: * the last timestamp plus the time between the
1610: * last packet and now. The RFC defines the max
1611: * clock rate as 1ms. We will allow clocks to be
1612: * up to 10% fast and will allow a total difference
1613: * or 30 seconds due to a route change. And this
1614: * gives us an upperbound on the timestamp.
1615: * timestamp <= last timestamp + max ticks
1616: * We have to be careful here. Windows will send an
1617: * initial timestamp of zero and then initialize it
1618: * to a random value after the 3whs; presumably to
1619: * avoid a DoS by having to call an expensive RNG
1620: * during a SYN flood. Proof MS has at least one
1621: * good security geek.
1622: *
1623: * - The TCP timestamp option must also echo the other
1624: * endpoints timestamp. The timestamp echoed is the
1625: * one carried on the earliest unacknowledged segment
1626: * on the left edge of the sequence window. The RFC
1627: * states that the host will reject any echoed
1628: * timestamps that were larger than any ever sent.
1629: * This gives us an upperbound on the TS echo.
1630: * tescr <= largest_tsval
1631: * - The lowerbound on the TS echo is a little more
1632: * tricky to determine. The other endpoint's echoed
1633: * values will not decrease. But there may be
1634: * network conditions that re-order packets and
1635: * cause our view of them to decrease. For now the
1636: * only lowerbound we can safely determine is that
1637: * the TS echo will never be less than the original
1638: * TS. XXX There is probably a better lowerbound.
1639: * Remove TS_MAX_CONN with better lowerbound check.
1640: * tescr >= other original TS
1641: *
1642: * It is also important to note that the fastest
1643: * timestamp clock of 1ms will wrap its 32bit space in
1644: * 24 days. So we just disable TS checking after 24
1645: * days of idle time. We actually must use a 12d
1646: * connection limit until we can come up with a better
1647: * lowerbound to the TS echo check.
1648: */
1649: struct timeval delta_ts;
1650: int ts_fudge;
1651:
1652:
1653: /*
1654: * PFTM_TS_DIFF is how many seconds of leeway to allow
1655: * a host's timestamp. This can happen if the previous
1656: * packet got delayed in transit for much longer than
1657: * this packet.
1658: */
1659: if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0)
1660: ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF];
1661:
1662:
1663: /* Calculate max ticks since the last timestamp */
1664: #define TS_MAXFREQ 1100 /* RFC max TS freq of 1Khz + 10% skew */
1665: #define TS_MICROSECS 1000000 /* microseconds per second */
1666: timersub(&uptime, &src->scrub->pfss_last, &delta_ts);
1667: tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ;
1668: tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ);
1669:
1670:
1671: if ((src->state >= TCPS_ESTABLISHED &&
1672: dst->state >= TCPS_ESTABLISHED) &&
1673: (SEQ_LT(tsval, dst->scrub->pfss_tsecr) ||
1674: SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) ||
1675: (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) ||
1676: SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) {
1677: /* Bad RFC1323 implementation or an insertion attack.
1678: *
1679: * - Solaris 2.6 and 2.7 are known to send another ACK
1680: * after the FIN,FIN|ACK,ACK closing that carries
1681: * an old timestamp.
1682: */
1683:
1684: DPFPRINTF(("Timestamp failed %c%c%c%c\n",
1685: SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ',
1686: SEQ_GT(tsval, src->scrub->pfss_tsval +
1687: tsval_from_last) ? '1' : ' ',
1688: SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ',
1689: SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' '));
1690: DPFPRINTF((" tsval: %lu tsecr: %lu +ticks: %lu "
1691: "idle: %lus %lums\n",
1692: tsval, tsecr, tsval_from_last, delta_ts.tv_sec,
1693: delta_ts.tv_usec / 1000));
1694: DPFPRINTF((" src->tsval: %lu tsecr: %lu\n",
1695: src->scrub->pfss_tsval, src->scrub->pfss_tsecr));
1696: DPFPRINTF((" dst->tsval: %lu tsecr: %lu tsval0: %lu"
1697: "\n", dst->scrub->pfss_tsval,
1698: dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0));
1699: if (pf_status.debug >= PF_DEBUG_MISC) {
1700: pf_print_state(state);
1701: pf_print_flags(th->th_flags);
1702: printf("\n");
1703: }
1704: REASON_SET(reason, PFRES_TS);
1705: return (PF_DROP);
1706: }
1707:
1708: /* XXX I'd really like to require tsecr but it's optional */
1709:
1710: } else if (!got_ts && (th->th_flags & TH_RST) == 0 &&
1711: ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED)
1712: || pd->p_len > 0 || (th->th_flags & TH_SYN)) &&
1713: src->scrub && dst->scrub &&
1714: (src->scrub->pfss_flags & PFSS_PAWS) &&
1715: (dst->scrub->pfss_flags & PFSS_PAWS)) {
1716: /* Didn't send a timestamp. Timestamps aren't really useful
1717: * when:
1718: * - connection opening or closing (often not even sent).
1719: * but we must not let an attacker to put a FIN on a
1720: * data packet to sneak it through our ESTABLISHED check.
1721: * - on a TCP reset. RFC suggests not even looking at TS.
1722: * - on an empty ACK. The TS will not be echoed so it will
1723: * probably not help keep the RTT calculation in sync and
1724: * there isn't as much danger when the sequence numbers
1725: * got wrapped. So some stacks don't include TS on empty
1726: * ACKs :-(
1727: *
1728: * To minimize the disruption to mostly RFC1323 conformant
1729: * stacks, we will only require timestamps on data packets.
1730: *
1731: * And what do ya know, we cannot require timestamps on data
1732: * packets. There appear to be devices that do legitimate
1733: * TCP connection hijacking. There are HTTP devices that allow
1734: * a 3whs (with timestamps) and then buffer the HTTP request.
1735: * If the intermediate device has the HTTP response cache, it
1736: * will spoof the response but not bother timestamping its
1737: * packets. So we can look for the presence of a timestamp in
1738: * the first data packet and if there, require it in all future
1739: * packets.
1740: */
1741:
1742: if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) {
1743: /*
1744: * Hey! Someone tried to sneak a packet in. Or the
1745: * stack changed its RFC1323 behavior?!?!
1746: */
1747: if (pf_status.debug >= PF_DEBUG_MISC) {
1748: DPFPRINTF(("Did not receive expected RFC1323 "
1749: "timestamp\n"));
1750: pf_print_state(state);
1751: pf_print_flags(th->th_flags);
1752: printf("\n");
1753: }
1754: REASON_SET(reason, PFRES_TS);
1755: return (PF_DROP);
1756: }
1757: }
1758:
1759:
1760: /*
1761: * We will note if a host sends his data packets with or without
1762: * timestamps. And require all data packets to contain a timestamp
1763: * if the first does. PAWS implicitly requires that all data packets be
1764: * timestamped. But I think there are middle-man devices that hijack
1765: * TCP streams immediately after the 3whs and don't timestamp their
1766: * packets (seen in a WWW accelerator or cache).
1767: */
1768: if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags &
1769: (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) {
1770: if (got_ts)
1771: src->scrub->pfss_flags |= PFSS_DATA_TS;
1772: else {
1773: src->scrub->pfss_flags |= PFSS_DATA_NOTS;
1774: if (pf_status.debug >= PF_DEBUG_MISC && dst->scrub &&
1775: (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) {
1776: /* Don't warn if other host rejected RFC1323 */
1777: DPFPRINTF(("Broken RFC1323 stack did not "
1778: "timestamp data packet. Disabled PAWS "
1779: "security.\n"));
1780: pf_print_state(state);
1781: pf_print_flags(th->th_flags);
1782: printf("\n");
1783: }
1784: }
1785: }
1786:
1787:
1788: /*
1789: * Update PAWS values
1790: */
1791: if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags &
1792: (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) {
1793: getmicrouptime(&src->scrub->pfss_last);
1794: if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) ||
1795: (src->scrub->pfss_flags & PFSS_PAWS) == 0)
1796: src->scrub->pfss_tsval = tsval;
1797:
1798: if (tsecr) {
1799: if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) ||
1800: (src->scrub->pfss_flags & PFSS_PAWS) == 0)
1801: src->scrub->pfss_tsecr = tsecr;
1802:
1803: if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 &&
1804: (SEQ_LT(tsval, src->scrub->pfss_tsval0) ||
1805: src->scrub->pfss_tsval0 == 0)) {
1806: /* tsval0 MUST be the lowest timestamp */
1807: src->scrub->pfss_tsval0 = tsval;
1808: }
1809:
1810: /* Only fully initialized after a TS gets echoed */
1811: if ((src->scrub->pfss_flags & PFSS_PAWS) == 0)
1812: src->scrub->pfss_flags |= PFSS_PAWS;
1813: }
1814: }
1815:
1816: /* I have a dream.... TCP segment reassembly.... */
1817: return (0);
1818: }
1819:
1820: int
1821: pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th,
1822: int off)
1823: {
1824: u_int16_t *mss;
1825: int thoff;
1826: int opt, cnt, optlen = 0;
1827: int rewrite = 0;
1828: u_char *optp;
1829:
1830: thoff = th->th_off << 2;
1831: cnt = thoff - sizeof(struct tcphdr);
1832: optp = mtod(m, caddr_t) + off + sizeof(struct tcphdr);
1833:
1834: for (; cnt > 0; cnt -= optlen, optp += optlen) {
1835: opt = optp[0];
1836: if (opt == TCPOPT_EOL)
1837: break;
1838: if (opt == TCPOPT_NOP)
1839: optlen = 1;
1840: else {
1841: if (cnt < 2)
1842: break;
1843: optlen = optp[1];
1844: if (optlen < 2 || optlen > cnt)
1845: break;
1846: }
1847: switch (opt) {
1848: case TCPOPT_MAXSEG:
1849: mss = (u_int16_t *)(optp + 2);
1850: if ((ntohs(*mss)) > r->max_mss) {
1851: th->th_sum = pf_cksum_fixup(th->th_sum,
1852: *mss, htons(r->max_mss), 0);
1853: *mss = htons(r->max_mss);
1854: rewrite = 1;
1855: }
1856: break;
1857: default:
1858: break;
1859: }
1860: }
1861:
1862: return (rewrite);
1863: }
CVSweb