sys/net/pf_norm.c - annotate

Return to pf_norm.c CVS log
Up to [local] / sys / net
Annotation of sys/net/pf_norm.c, Revision 1.1.1.1

1.1       nbrk        1: /*     $OpenBSD: pf_norm.c,v 1.109 2007/05/28 17:16:39 henning Exp $ */
                      2:
                      3: /*
                      4:  * Copyright 2001 Niels Provos <provos@citi.umich.edu>
                      5:  * All rights reserved.
                      6:  *
                      7:  * Redistribution and use in source and binary forms, with or without
                      8:  * modification, are permitted provided that the following conditions
                      9:  * are met:
                     10:  * 1. Redistributions of source code must retain the above copyright
                     11:  *    notice, this list of conditions and the following disclaimer.
                     12:  * 2. Redistributions in binary form must reproduce the above copyright
                     13:  *    notice, this list of conditions and the following disclaimer in the
                     14:  *    documentation and/or other materials provided with the distribution.
                     15:  *
                     16:  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
                     17:  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
                     18:  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
                     19:  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
                     20:  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
                     21:  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
                     22:  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
                     23:  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
                     24:  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
                     25:  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
                     26:  */
                     27:
                     28: #include "pflog.h"
                     29:
                     30: #include <sys/param.h>
                     31: #include <sys/systm.h>
                     32: #include <sys/mbuf.h>
                     33: #include <sys/filio.h>
                     34: #include <sys/fcntl.h>
                     35: #include <sys/socket.h>
                     36: #include <sys/kernel.h>
                     37: #include <sys/time.h>
                     38: #include <sys/pool.h>
                     39:
                     40: #include <dev/rndvar.h>
                     41: #include <net/if.h>
                     42: #include <net/if_types.h>
                     43: #include <net/bpf.h>
                     44: #include <net/route.h>
                     45: #include <net/if_pflog.h>
                     46:
                     47: #include <netinet/in.h>
                     48: #include <netinet/in_var.h>
                     49: #include <netinet/in_systm.h>
                     50: #include <netinet/ip.h>
                     51: #include <netinet/ip_var.h>
                     52: #include <netinet/tcp.h>
                     53: #include <netinet/tcp_seq.h>
                     54: #include <netinet/udp.h>
                     55: #include <netinet/ip_icmp.h>
                     56:
                     57: #ifdef INET6
                     58: #include <netinet/ip6.h>
                     59: #endif /* INET6 */
                     60:
                     61: #include <net/pfvar.h>
                     62:
                     63: struct pf_frent {
                     64:        LIST_ENTRY(pf_frent) fr_next;
                     65:        struct ip *fr_ip;
                     66:        struct mbuf *fr_m;
                     67: };
                     68:
                     69: struct pf_frcache {
                     70:        LIST_ENTRY(pf_frcache) fr_next;
                     71:        uint16_t        fr_off;
                     72:        uint16_t        fr_end;
                     73: };
                     74:
                     75: #define PFFRAG_SEENLAST        0x0001          /* Seen the last fragment for this */
                     76: #define PFFRAG_NOBUFFER        0x0002          /* Non-buffering fragment cache */
                     77: #define PFFRAG_DROP    0x0004          /* Drop all fragments */
                     78: #define BUFFER_FRAGMENTS(fr)   (!((fr)->fr_flags & PFFRAG_NOBUFFER))
                     79:
                     80: struct pf_fragment {
                     81:        RB_ENTRY(pf_fragment) fr_entry;
                     82:        TAILQ_ENTRY(pf_fragment) frag_next;
                     83:        struct in_addr  fr_src;
                     84:        struct in_addr  fr_dst;
                     85:        u_int8_t        fr_p;           /* protocol of this fragment */
                     86:        u_int8_t        fr_flags;       /* status flags */
                     87:        u_int16_t       fr_id;          /* fragment id for reassemble */
                     88:        u_int16_t       fr_max;         /* fragment data max */
                     89:        u_int32_t       fr_timeout;
                     90: #define fr_queue       fr_u.fru_queue
                     91: #define fr_cache       fr_u.fru_cache
                     92:        union {
                     93:                LIST_HEAD(pf_fragq, pf_frent) fru_queue;        /* buffering */
                     94:                LIST_HEAD(pf_cacheq, pf_frcache) fru_cache;     /* non-buf */
                     95:        } fr_u;
                     96: };
                     97:
                     98: TAILQ_HEAD(pf_fragqueue, pf_fragment)  pf_fragqueue;
                     99: TAILQ_HEAD(pf_cachequeue, pf_fragment) pf_cachequeue;
                    100:
                    101: static __inline int     pf_frag_compare(struct pf_fragment *,
                    102:                            struct pf_fragment *);
                    103: RB_HEAD(pf_frag_tree, pf_fragment)     pf_frag_tree, pf_cache_tree;
                    104: RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
                    105: RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
                    106:
                    107: /* Private prototypes */
                    108: void                    pf_ip2key(struct pf_fragment *, struct ip *);
                    109: void                    pf_remove_fragment(struct pf_fragment *);
                    110: void                    pf_flush_fragments(void);
                    111: void                    pf_free_fragment(struct pf_fragment *);
                    112: struct pf_fragment     *pf_find_fragment(struct ip *, struct pf_frag_tree *);
                    113: struct mbuf            *pf_reassemble(struct mbuf **, struct pf_fragment **,
                    114:                            struct pf_frent *, int);
                    115: struct mbuf            *pf_fragcache(struct mbuf **, struct ip*,
                    116:                            struct pf_fragment **, int, int, int *);
                    117: int                     pf_normalize_tcpopt(struct pf_rule *, struct mbuf *,
                    118:                            struct tcphdr *, int);
                    119:
                    120: #define        DPFPRINTF(x) do {                               \
                    121:        if (pf_status.debug >= PF_DEBUG_MISC) {         \
                    122:                printf("%s: ", __func__);               \
                    123:                printf x ;                              \
                    124:        }                                               \
                    125: } while(0)
                    126:
                    127: /* Globals */
                    128: struct pool             pf_frent_pl, pf_frag_pl, pf_cache_pl, pf_cent_pl;
                    129: struct pool             pf_state_scrub_pl;
                    130: int                     pf_nfrents, pf_ncache;
                    131:
                    132: void
                    133: pf_normalize_init(void)
                    134: {
                    135:        pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0, 0, 0, "pffrent",
                    136:            NULL);
                    137:        pool_init(&pf_frag_pl, sizeof(struct pf_fragment), 0, 0, 0, "pffrag",
                    138:            NULL);
                    139:        pool_init(&pf_cache_pl, sizeof(struct pf_fragment), 0, 0, 0,
                    140:            "pffrcache", NULL);
                    141:        pool_init(&pf_cent_pl, sizeof(struct pf_frcache), 0, 0, 0, "pffrcent",
                    142:            NULL);
                    143:        pool_init(&pf_state_scrub_pl, sizeof(struct pf_state_scrub), 0, 0, 0,
                    144:            "pfstscr", NULL);
                    145:
                    146:        pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT);
                    147:        pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0);
                    148:        pool_sethardlimit(&pf_cache_pl, PFFRAG_FRCACHE_HIWAT, NULL, 0);
                    149:        pool_sethardlimit(&pf_cent_pl, PFFRAG_FRCENT_HIWAT, NULL, 0);
                    150:
                    151:        TAILQ_INIT(&pf_fragqueue);
                    152:        TAILQ_INIT(&pf_cachequeue);
                    153: }
                    154:
                    155: static __inline int
                    156: pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b)
                    157: {
                    158:        int     diff;
                    159:
                    160:        if ((diff = a->fr_id - b->fr_id))
                    161:                return (diff);
                    162:        else if ((diff = a->fr_p - b->fr_p))
                    163:                return (diff);
                    164:        else if (a->fr_src.s_addr < b->fr_src.s_addr)
                    165:                return (-1);
                    166:        else if (a->fr_src.s_addr > b->fr_src.s_addr)
                    167:                return (1);
                    168:        else if (a->fr_dst.s_addr < b->fr_dst.s_addr)
                    169:                return (-1);
                    170:        else if (a->fr_dst.s_addr > b->fr_dst.s_addr)
                    171:                return (1);
                    172:        return (0);
                    173: }
                    174:
                    175: void
                    176: pf_purge_expired_fragments(void)
                    177: {
                    178:        struct pf_fragment      *frag;
                    179:        u_int32_t                expire = time_second -
                    180:                                    pf_default_rule.timeout[PFTM_FRAG];
                    181:
                    182:        while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) {
                    183:                KASSERT(BUFFER_FRAGMENTS(frag));
                    184:                if (frag->fr_timeout > expire)
                    185:                        break;
                    186:
                    187:                DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
                    188:                pf_free_fragment(frag);
                    189:        }
                    190:
                    191:        while ((frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue)) != NULL) {
                    192:                KASSERT(!BUFFER_FRAGMENTS(frag));
                    193:                if (frag->fr_timeout > expire)
                    194:                        break;
                    195:
                    196:                DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
                    197:                pf_free_fragment(frag);
                    198:                KASSERT(TAILQ_EMPTY(&pf_cachequeue) ||
                    199:                    TAILQ_LAST(&pf_cachequeue, pf_cachequeue) != frag);
                    200:        }
                    201: }
                    202:
                    203: /*
                    204:  * Try to flush old fragments to make space for new ones
                    205:  */
                    206:
                    207: void
                    208: pf_flush_fragments(void)
                    209: {
                    210:        struct pf_fragment      *frag;
                    211:        int                      goal;
                    212:
                    213:        goal = pf_nfrents * 9 / 10;
                    214:        DPFPRINTF(("trying to free > %d frents\n",
                    215:            pf_nfrents - goal));
                    216:        while (goal < pf_nfrents) {
                    217:                frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue);
                    218:                if (frag == NULL)
                    219:                        break;
                    220:                pf_free_fragment(frag);
                    221:        }
                    222:
                    223:
                    224:        goal = pf_ncache * 9 / 10;
                    225:        DPFPRINTF(("trying to free > %d cache entries\n",
                    226:            pf_ncache - goal));
                    227:        while (goal < pf_ncache) {
                    228:                frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue);
                    229:                if (frag == NULL)
                    230:                        break;
                    231:                pf_free_fragment(frag);
                    232:        }
                    233: }
                    234:
                    235: /* Frees the fragments and all associated entries */
                    236:
                    237: void
                    238: pf_free_fragment(struct pf_fragment *frag)
                    239: {
                    240:        struct pf_frent         *frent;
                    241:        struct pf_frcache       *frcache;
                    242:
                    243:        /* Free all fragments */
                    244:        if (BUFFER_FRAGMENTS(frag)) {
                    245:                for (frent = LIST_FIRST(&frag->fr_queue); frent;
                    246:                    frent = LIST_FIRST(&frag->fr_queue)) {
                    247:                        LIST_REMOVE(frent, fr_next);
                    248:
                    249:                        m_freem(frent->fr_m);
                    250:                        pool_put(&pf_frent_pl, frent);
                    251:                        pf_nfrents--;
                    252:                }
                    253:        } else {
                    254:                for (frcache = LIST_FIRST(&frag->fr_cache); frcache;
                    255:                    frcache = LIST_FIRST(&frag->fr_cache)) {
                    256:                        LIST_REMOVE(frcache, fr_next);
                    257:
                    258:                        KASSERT(LIST_EMPTY(&frag->fr_cache) ||
                    259:                            LIST_FIRST(&frag->fr_cache)->fr_off >
                    260:                            frcache->fr_end);
                    261:
                    262:                        pool_put(&pf_cent_pl, frcache);
                    263:                        pf_ncache--;
                    264:                }
                    265:        }
                    266:
                    267:        pf_remove_fragment(frag);
                    268: }
                    269:
                    270: void
                    271: pf_ip2key(struct pf_fragment *key, struct ip *ip)
                    272: {
                    273:        key->fr_p = ip->ip_p;
                    274:        key->fr_id = ip->ip_id;
                    275:        key->fr_src.s_addr = ip->ip_src.s_addr;
                    276:        key->fr_dst.s_addr = ip->ip_dst.s_addr;
                    277: }
                    278:
                    279: struct pf_fragment *
                    280: pf_find_fragment(struct ip *ip, struct pf_frag_tree *tree)
                    281: {
                    282:        struct pf_fragment       key;
                    283:        struct pf_fragment      *frag;
                    284:
                    285:        pf_ip2key(&key, ip);
                    286:
                    287:        frag = RB_FIND(pf_frag_tree, tree, &key);
                    288:        if (frag != NULL) {
                    289:                /* XXX Are we sure we want to update the timeout? */
                    290:                frag->fr_timeout = time_second;
                    291:                if (BUFFER_FRAGMENTS(frag)) {
                    292:                        TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
                    293:                        TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next);
                    294:                } else {
                    295:                        TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
                    296:                        TAILQ_INSERT_HEAD(&pf_cachequeue, frag, frag_next);
                    297:                }
                    298:        }
                    299:
                    300:        return (frag);
                    301: }
                    302:
                    303: /* Removes a fragment from the fragment queue and frees the fragment */
                    304:
                    305: void
                    306: pf_remove_fragment(struct pf_fragment *frag)
                    307: {
                    308:        if (BUFFER_FRAGMENTS(frag)) {
                    309:                RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag);
                    310:                TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
                    311:                pool_put(&pf_frag_pl, frag);
                    312:        } else {
                    313:                RB_REMOVE(pf_frag_tree, &pf_cache_tree, frag);
                    314:                TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
                    315:                pool_put(&pf_cache_pl, frag);
                    316:        }
                    317: }
                    318:
                    319: #define FR_IP_OFF(fr)  ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3)
                    320: struct mbuf *
                    321: pf_reassemble(struct mbuf **m0, struct pf_fragment **frag,
                    322:     struct pf_frent *frent, int mff)
                    323: {
                    324:        struct mbuf     *m = *m0, *m2;
                    325:        struct pf_frent *frea, *next;
                    326:        struct pf_frent *frep = NULL;
                    327:        struct ip       *ip = frent->fr_ip;
                    328:        int              hlen = ip->ip_hl << 2;
                    329:        u_int16_t        off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
                    330:        u_int16_t        ip_len = ntohs(ip->ip_len) - ip->ip_hl * 4;
                    331:        u_int16_t        max = ip_len + off;
                    332:
                    333:        KASSERT(*frag == NULL || BUFFER_FRAGMENTS(*frag));
                    334:
                    335:        /* Strip off ip header */
                    336:        m->m_data += hlen;
                    337:        m->m_len -= hlen;
                    338:
                    339:        /* Create a new reassembly queue for this packet */
                    340:        if (*frag == NULL) {
                    341:                *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
                    342:                if (*frag == NULL) {
                    343:                        pf_flush_fragments();
                    344:                        *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
                    345:                        if (*frag == NULL)
                    346:                                goto drop_fragment;
                    347:                }
                    348:
                    349:                (*frag)->fr_flags = 0;
                    350:                (*frag)->fr_max = 0;
                    351:                (*frag)->fr_src = frent->fr_ip->ip_src;
                    352:                (*frag)->fr_dst = frent->fr_ip->ip_dst;
                    353:                (*frag)->fr_p = frent->fr_ip->ip_p;
                    354:                (*frag)->fr_id = frent->fr_ip->ip_id;
                    355:                (*frag)->fr_timeout = time_second;
                    356:                LIST_INIT(&(*frag)->fr_queue);
                    357:
                    358:                RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag);
                    359:                TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next);
                    360:
                    361:                /* We do not have a previous fragment */
                    362:                frep = NULL;
                    363:                goto insert;
                    364:        }
                    365:
                    366:        /*
                    367:         * Find a fragment after the current one:
                    368:         *  - off contains the real shifted offset.
                    369:         */
                    370:        LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) {
                    371:                if (FR_IP_OFF(frea) > off)
                    372:                        break;
                    373:                frep = frea;
                    374:        }
                    375:
                    376:        KASSERT(frep != NULL || frea != NULL);
                    377:
                    378:        if (frep != NULL &&
                    379:            FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl *
                    380:            4 > off)
                    381:        {
                    382:                u_int16_t       precut;
                    383:
                    384:                precut = FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) -
                    385:                    frep->fr_ip->ip_hl * 4 - off;
                    386:                if (precut >= ip_len)
                    387:                        goto drop_fragment;
                    388:                m_adj(frent->fr_m, precut);
                    389:                DPFPRINTF(("overlap -%d\n", precut));
                    390:                /* Enforce 8 byte boundaries */
                    391:                ip->ip_off = htons(ntohs(ip->ip_off) + (precut >> 3));
                    392:                off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
                    393:                ip_len -= precut;
                    394:                ip->ip_len = htons(ip_len);
                    395:        }
                    396:
                    397:        for (; frea != NULL && ip_len + off > FR_IP_OFF(frea);
                    398:            frea = next)
                    399:        {
                    400:                u_int16_t       aftercut;
                    401:
                    402:                aftercut = ip_len + off - FR_IP_OFF(frea);
                    403:                DPFPRINTF(("adjust overlap %d\n", aftercut));
                    404:                if (aftercut < ntohs(frea->fr_ip->ip_len) - frea->fr_ip->ip_hl
                    405:                    * 4)
                    406:                {
                    407:                        frea->fr_ip->ip_len =
                    408:                            htons(ntohs(frea->fr_ip->ip_len) - aftercut);
                    409:                        frea->fr_ip->ip_off = htons(ntohs(frea->fr_ip->ip_off) +
                    410:                            (aftercut >> 3));
                    411:                        m_adj(frea->fr_m, aftercut);
                    412:                        break;
                    413:                }
                    414:
                    415:                /* This fragment is completely overlapped, lose it */
                    416:                next = LIST_NEXT(frea, fr_next);
                    417:                m_freem(frea->fr_m);
                    418:                LIST_REMOVE(frea, fr_next);
                    419:                pool_put(&pf_frent_pl, frea);
                    420:                pf_nfrents--;
                    421:        }
                    422:
                    423:  insert:
                    424:        /* Update maximum data size */
                    425:        if ((*frag)->fr_max < max)
                    426:                (*frag)->fr_max = max;
                    427:        /* This is the last segment */
                    428:        if (!mff)
                    429:                (*frag)->fr_flags |= PFFRAG_SEENLAST;
                    430:
                    431:        if (frep == NULL)
                    432:                LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next);
                    433:        else
                    434:                LIST_INSERT_AFTER(frep, frent, fr_next);
                    435:
                    436:        /* Check if we are completely reassembled */
                    437:        if (!((*frag)->fr_flags & PFFRAG_SEENLAST))
                    438:                return (NULL);
                    439:
                    440:        /* Check if we have all the data */
                    441:        off = 0;
                    442:        for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) {
                    443:                next = LIST_NEXT(frep, fr_next);
                    444:
                    445:                off += ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 4;
                    446:                if (off < (*frag)->fr_max &&
                    447:                    (next == NULL || FR_IP_OFF(next) != off))
                    448:                {
                    449:                        DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
                    450:                            off, next == NULL ? -1 : FR_IP_OFF(next),
                    451:                            (*frag)->fr_max));
                    452:                        return (NULL);
                    453:                }
                    454:        }
                    455:        DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max));
                    456:        if (off < (*frag)->fr_max)
                    457:                return (NULL);
                    458:
                    459:        /* We have all the data */
                    460:        frent = LIST_FIRST(&(*frag)->fr_queue);
                    461:        KASSERT(frent != NULL);
                    462:        if ((frent->fr_ip->ip_hl << 2) + off > IP_MAXPACKET) {
                    463:                DPFPRINTF(("drop: too big: %d\n", off));
                    464:                pf_free_fragment(*frag);
                    465:                *frag = NULL;
                    466:                return (NULL);
                    467:        }
                    468:        next = LIST_NEXT(frent, fr_next);
                    469:
                    470:        /* Magic from ip_input */
                    471:        ip = frent->fr_ip;
                    472:        m = frent->fr_m;
                    473:        m2 = m->m_next;
                    474:        m->m_next = NULL;
                    475:        m_cat(m, m2);
                    476:        pool_put(&pf_frent_pl, frent);
                    477:        pf_nfrents--;
                    478:        for (frent = next; frent != NULL; frent = next) {
                    479:                next = LIST_NEXT(frent, fr_next);
                    480:
                    481:                m2 = frent->fr_m;
                    482:                pool_put(&pf_frent_pl, frent);
                    483:                pf_nfrents--;
                    484:                m_cat(m, m2);
                    485:        }
                    486:
                    487:        ip->ip_src = (*frag)->fr_src;
                    488:        ip->ip_dst = (*frag)->fr_dst;
                    489:
                    490:        /* Remove from fragment queue */
                    491:        pf_remove_fragment(*frag);
                    492:        *frag = NULL;
                    493:
                    494:        hlen = ip->ip_hl << 2;
                    495:        ip->ip_len = htons(off + hlen);
                    496:        m->m_len += hlen;
                    497:        m->m_data -= hlen;
                    498:
                    499:        /* some debugging cruft by sklower, below, will go away soon */
                    500:        /* XXX this should be done elsewhere */
                    501:        if (m->m_flags & M_PKTHDR) {
                    502:                int plen = 0;
                    503:                for (m2 = m; m2; m2 = m2->m_next)
                    504:                        plen += m2->m_len;
                    505:                m->m_pkthdr.len = plen;
                    506:        }
                    507:
                    508:        DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip->ip_len)));
                    509:        return (m);
                    510:
                    511:  drop_fragment:
                    512:        /* Oops - fail safe - drop packet */
                    513:        pool_put(&pf_frent_pl, frent);
                    514:        pf_nfrents--;
                    515:        m_freem(m);
                    516:        return (NULL);
                    517: }
                    518:
                    519: struct mbuf *
                    520: pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
                    521:     int drop, int *nomem)
                    522: {
                    523:        struct mbuf             *m = *m0;
                    524:        struct pf_frcache       *frp, *fra, *cur = NULL;
                    525:        int                      ip_len = ntohs(h->ip_len) - (h->ip_hl << 2);
                    526:        u_int16_t                off = ntohs(h->ip_off) << 3;
                    527:        u_int16_t                max = ip_len + off;
                    528:        int                      hosed = 0;
                    529:
                    530:        KASSERT(*frag == NULL || !BUFFER_FRAGMENTS(*frag));
                    531:
                    532:        /* Create a new range queue for this packet */
                    533:        if (*frag == NULL) {
                    534:                *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
                    535:                if (*frag == NULL) {
                    536:                        pf_flush_fragments();
                    537:                        *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
                    538:                        if (*frag == NULL)
                    539:                                goto no_mem;
                    540:                }
                    541:
                    542:                /* Get an entry for the queue */
                    543:                cur = pool_get(&pf_cent_pl, PR_NOWAIT);
                    544:                if (cur == NULL) {
                    545:                        pool_put(&pf_cache_pl, *frag);
                    546:                        *frag = NULL;
                    547:                        goto no_mem;
                    548:                }
                    549:                pf_ncache++;
                    550:
                    551:                (*frag)->fr_flags = PFFRAG_NOBUFFER;
                    552:                (*frag)->fr_max = 0;
                    553:                (*frag)->fr_src = h->ip_src;
                    554:                (*frag)->fr_dst = h->ip_dst;
                    555:                (*frag)->fr_p = h->ip_p;
                    556:                (*frag)->fr_id = h->ip_id;
                    557:                (*frag)->fr_timeout = time_second;
                    558:
                    559:                cur->fr_off = off;
                    560:                cur->fr_end = max;
                    561:                LIST_INIT(&(*frag)->fr_cache);
                    562:                LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next);
                    563:
                    564:                RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag);
                    565:                TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next);
                    566:
                    567:                DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off, max));
                    568:
                    569:                goto pass;
                    570:        }
                    571:
                    572:        /*
                    573:         * Find a fragment after the current one:
                    574:         *  - off contains the real shifted offset.
                    575:         */
                    576:        frp = NULL;
                    577:        LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) {
                    578:                if (fra->fr_off > off)
                    579:                        break;
                    580:                frp = fra;
                    581:        }
                    582:
                    583:        KASSERT(frp != NULL || fra != NULL);
                    584:
                    585:        if (frp != NULL) {
                    586:                int     precut;
                    587:
                    588:                precut = frp->fr_end - off;
                    589:                if (precut >= ip_len) {
                    590:                        /* Fragment is entirely a duplicate */
                    591:                        DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n",
                    592:                            h->ip_id, frp->fr_off, frp->fr_end, off, max));
                    593:                        goto drop_fragment;
                    594:                }
                    595:                if (precut == 0) {
                    596:                        /* They are adjacent.  Fixup cache entry */
                    597:                        DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n",
                    598:                            h->ip_id, frp->fr_off, frp->fr_end, off, max));
                    599:                        frp->fr_end = max;
                    600:                } else if (precut > 0) {
                    601:                        /* The first part of this payload overlaps with a
                    602:                         * fragment that has already been passed.
                    603:                         * Need to trim off the first part of the payload.
                    604:                         * But to do so easily, we need to create another
                    605:                         * mbuf to throw the original header into.
                    606:                         */
                    607:
                    608:                        DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n",
                    609:                            h->ip_id, precut, frp->fr_off, frp->fr_end, off,
                    610:                            max));
                    611:
                    612:                        off += precut;
                    613:                        max -= precut;
                    614:                        /* Update the previous frag to encompass this one */
                    615:                        frp->fr_end = max;
                    616:
                    617:                        if (!drop) {
                    618:                                /* XXX Optimization opportunity
                    619:                                 * This is a very heavy way to trim the payload.
                    620:                                 * we could do it much faster by diddling mbuf
                    621:                                 * internals but that would be even less legible
                    622:                                 * than this mbuf magic.  For my next trick,
                    623:                                 * I'll pull a rabbit out of my laptop.
                    624:                                 */
                    625:                                *m0 = m_copym2(m, 0, h->ip_hl << 2, M_NOWAIT);
                    626:                                if (*m0 == NULL)
                    627:                                        goto no_mem;
                    628:                                KASSERT((*m0)->m_next == NULL);
                    629:                                m_adj(m, precut + (h->ip_hl << 2));
                    630:                                m_cat(*m0, m);
                    631:                                m = *m0;
                    632:                                if (m->m_flags & M_PKTHDR) {
                    633:                                        int plen = 0;
                    634:                                        struct mbuf *t;
                    635:                                        for (t = m; t; t = t->m_next)
                    636:                                                plen += t->m_len;
                    637:                                        m->m_pkthdr.len = plen;
                    638:                                }
                    639:
                    640:
                    641:                                h = mtod(m, struct ip *);
                    642:
                    643:
                    644:                                KASSERT((int)m->m_len ==
                    645:                                    ntohs(h->ip_len) - precut);
                    646:                                h->ip_off = htons(ntohs(h->ip_off) +
                    647:                                    (precut >> 3));
                    648:                                h->ip_len = htons(ntohs(h->ip_len) - precut);
                    649:                        } else {
                    650:                                hosed++;
                    651:                        }
                    652:                } else {
                    653:                        /* There is a gap between fragments */
                    654:
                    655:                        DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n",
                    656:                            h->ip_id, -precut, frp->fr_off, frp->fr_end, off,
                    657:                            max));
                    658:
                    659:                        cur = pool_get(&pf_cent_pl, PR_NOWAIT);
                    660:                        if (cur == NULL)
                    661:                                goto no_mem;
                    662:                        pf_ncache++;
                    663:
                    664:                        cur->fr_off = off;
                    665:                        cur->fr_end = max;
                    666:                        LIST_INSERT_AFTER(frp, cur, fr_next);
                    667:                }
                    668:        }
                    669:
                    670:        if (fra != NULL) {
                    671:                int     aftercut;
                    672:                int     merge = 0;
                    673:
                    674:                aftercut = max - fra->fr_off;
                    675:                if (aftercut == 0) {
                    676:                        /* Adjacent fragments */
                    677:                        DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n",
                    678:                            h->ip_id, off, max, fra->fr_off, fra->fr_end));
                    679:                        fra->fr_off = off;
                    680:                        merge = 1;
                    681:                } else if (aftercut > 0) {
                    682:                        /* Need to chop off the tail of this fragment */
                    683:                        DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n",
                    684:                            h->ip_id, aftercut, off, max, fra->fr_off,
                    685:                            fra->fr_end));
                    686:                        fra->fr_off = off;
                    687:                        max -= aftercut;
                    688:
                    689:                        merge = 1;
                    690:
                    691:                        if (!drop) {
                    692:                                m_adj(m, -aftercut);
                    693:                                if (m->m_flags & M_PKTHDR) {
                    694:                                        int plen = 0;
                    695:                                        struct mbuf *t;
                    696:                                        for (t = m; t; t = t->m_next)
                    697:                                                plen += t->m_len;
                    698:                                        m->m_pkthdr.len = plen;
                    699:                                }
                    700:                                h = mtod(m, struct ip *);
                    701:                                KASSERT((int)m->m_len ==
                    702:                                    ntohs(h->ip_len) - aftercut);
                    703:                                h->ip_len = htons(ntohs(h->ip_len) - aftercut);
                    704:                        } else {
                    705:                                hosed++;
                    706:                        }
                    707:                } else if (frp == NULL) {
                    708:                        /* There is a gap between fragments */
                    709:                        DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n",
                    710:                            h->ip_id, -aftercut, off, max, fra->fr_off,
                    711:                            fra->fr_end));
                    712:
                    713:                        cur = pool_get(&pf_cent_pl, PR_NOWAIT);
                    714:                        if (cur == NULL)
                    715:                                goto no_mem;
                    716:                        pf_ncache++;
                    717:
                    718:                        cur->fr_off = off;
                    719:                        cur->fr_end = max;
                    720:                        LIST_INSERT_BEFORE(fra, cur, fr_next);
                    721:                }
                    722:
                    723:
                    724:                /* Need to glue together two separate fragment descriptors */
                    725:                if (merge) {
                    726:                        if (cur && fra->fr_off <= cur->fr_end) {
                    727:                                /* Need to merge in a previous 'cur' */
                    728:                                DPFPRINTF(("fragcache[%d]: adjacent(merge "
                    729:                                    "%d-%d) %d-%d (%d-%d)\n",
                    730:                                    h->ip_id, cur->fr_off, cur->fr_end, off,
                    731:                                    max, fra->fr_off, fra->fr_end));
                    732:                                fra->fr_off = cur->fr_off;
                    733:                                LIST_REMOVE(cur, fr_next);
                    734:                                pool_put(&pf_cent_pl, cur);
                    735:                                pf_ncache--;
                    736:                                cur = NULL;
                    737:
                    738:                        } else if (frp && fra->fr_off <= frp->fr_end) {
                    739:                                /* Need to merge in a modified 'frp' */
                    740:                                KASSERT(cur == NULL);
                    741:                                DPFPRINTF(("fragcache[%d]: adjacent(merge "
                    742:                                    "%d-%d) %d-%d (%d-%d)\n",
                    743:                                    h->ip_id, frp->fr_off, frp->fr_end, off,
                    744:                                    max, fra->fr_off, fra->fr_end));
                    745:                                fra->fr_off = frp->fr_off;
                    746:                                LIST_REMOVE(frp, fr_next);
                    747:                                pool_put(&pf_cent_pl, frp);
                    748:                                pf_ncache--;
                    749:                                frp = NULL;
                    750:
                    751:                        }
                    752:                }
                    753:        }
                    754:
                    755:        if (hosed) {
                    756:                /*
                    757:                 * We must keep tracking the overall fragment even when
                    758:                 * we're going to drop it anyway so that we know when to
                    759:                 * free the overall descriptor.  Thus we drop the frag late.
                    760:                 */
                    761:                goto drop_fragment;
                    762:        }
                    763:
                    764:
                    765:  pass:
                    766:        /* Update maximum data size */
                    767:        if ((*frag)->fr_max < max)
                    768:                (*frag)->fr_max = max;
                    769:
                    770:        /* This is the last segment */
                    771:        if (!mff)
                    772:                (*frag)->fr_flags |= PFFRAG_SEENLAST;
                    773:
                    774:        /* Check if we are completely reassembled */
                    775:        if (((*frag)->fr_flags & PFFRAG_SEENLAST) &&
                    776:            LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 &&
                    777:            LIST_FIRST(&(*frag)->fr_cache)->fr_end == (*frag)->fr_max) {
                    778:                /* Remove from fragment queue */
                    779:                DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id,
                    780:                    (*frag)->fr_max));
                    781:                pf_free_fragment(*frag);
                    782:                *frag = NULL;
                    783:        }
                    784:
                    785:        return (m);
                    786:
                    787:  no_mem:
                    788:        *nomem = 1;
                    789:
                    790:        /* Still need to pay attention to !IP_MF */
                    791:        if (!mff && *frag != NULL)
                    792:                (*frag)->fr_flags |= PFFRAG_SEENLAST;
                    793:
                    794:        m_freem(m);
                    795:        return (NULL);
                    796:
                    797:  drop_fragment:
                    798:
                    799:        /* Still need to pay attention to !IP_MF */
                    800:        if (!mff && *frag != NULL)
                    801:                (*frag)->fr_flags |= PFFRAG_SEENLAST;
                    802:
                    803:        if (drop) {
                    804:                /* This fragment has been deemed bad.  Don't reass */
                    805:                if (((*frag)->fr_flags & PFFRAG_DROP) == 0)
                    806:                        DPFPRINTF(("fragcache[%d]: dropping overall fragment\n",
                    807:                            h->ip_id));
                    808:                (*frag)->fr_flags |= PFFRAG_DROP;
                    809:        }
                    810:
                    811:        m_freem(m);
                    812:        return (NULL);
                    813: }
                    814:
                    815: int
                    816: pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
                    817:     struct pf_pdesc *pd)
                    818: {
                    819:        struct mbuf             *m = *m0;
                    820:        struct pf_rule          *r;
                    821:        struct pf_frent         *frent;
                    822:        struct pf_fragment      *frag = NULL;
                    823:        struct ip               *h = mtod(m, struct ip *);
                    824:        int                      mff = (ntohs(h->ip_off) & IP_MF);
                    825:        int                      hlen = h->ip_hl << 2;
                    826:        u_int16_t                fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
                    827:        u_int16_t                max;
                    828:        int                      ip_len;
                    829:        int                      ip_off;
                    830:
                    831:        r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
                    832:        while (r != NULL) {
                    833:                r->evaluations++;
                    834:                if (pfi_kif_match(r->kif, kif) == r->ifnot)
                    835:                        r = r->skip[PF_SKIP_IFP].ptr;
                    836:                else if (r->direction && r->direction != dir)
                    837:                        r = r->skip[PF_SKIP_DIR].ptr;
                    838:                else if (r->af && r->af != AF_INET)
                    839:                        r = r->skip[PF_SKIP_AF].ptr;
                    840:                else if (r->proto && r->proto != h->ip_p)
                    841:                        r = r->skip[PF_SKIP_PROTO].ptr;
                    842:                else if (PF_MISMATCHAW(&r->src.addr,
                    843:                    (struct pf_addr *)&h->ip_src.s_addr, AF_INET,
                    844:                    r->src.neg, kif))
                    845:                        r = r->skip[PF_SKIP_SRC_ADDR].ptr;
                    846:                else if (PF_MISMATCHAW(&r->dst.addr,
                    847:                    (struct pf_addr *)&h->ip_dst.s_addr, AF_INET,
                    848:                    r->dst.neg, NULL))
                    849:                        r = r->skip[PF_SKIP_DST_ADDR].ptr;
                    850:                else
                    851:                        break;
                    852:        }
                    853:
                    854:        if (r == NULL || r->action == PF_NOSCRUB)
                    855:                return (PF_PASS);
                    856:        else {
                    857:                r->packets[dir == PF_OUT]++;
                    858:                r->bytes[dir == PF_OUT] += pd->tot_len;
                    859:        }
                    860:
                    861:        /* Check for illegal packets */
                    862:        if (hlen < (int)sizeof(struct ip))
                    863:                goto drop;
                    864:
                    865:        if (hlen > ntohs(h->ip_len))
                    866:                goto drop;
                    867:
                    868:        /* Clear IP_DF if the rule uses the no-df option */
                    869:        if (r->rule_flag & PFRULE_NODF && h->ip_off & htons(IP_DF)) {
                    870:                u_int16_t ip_off = h->ip_off;
                    871:
                    872:                h->ip_off &= htons(~IP_DF);
                    873:                h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
                    874:        }
                    875:
                    876:        /* We will need other tests here */
                    877:        if (!fragoff && !mff)
                    878:                goto no_fragment;
                    879:
                    880:        /* We're dealing with a fragment now. Don't allow fragments
                    881:         * with IP_DF to enter the cache. If the flag was cleared by
                    882:         * no-df above, fine. Otherwise drop it.
                    883:         */
                    884:        if (h->ip_off & htons(IP_DF)) {
                    885:                DPFPRINTF(("IP_DF\n"));
                    886:                goto bad;
                    887:        }
                    888:
                    889:        ip_len = ntohs(h->ip_len) - hlen;
                    890:        ip_off = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
                    891:
                    892:        /* All fragments are 8 byte aligned */
                    893:        if (mff && (ip_len & 0x7)) {
                    894:                DPFPRINTF(("mff and %d\n", ip_len));
                    895:                goto bad;
                    896:        }
                    897:
                    898:        /* Respect maximum length */
                    899:        if (fragoff + ip_len > IP_MAXPACKET) {
                    900:                DPFPRINTF(("max packet %d\n", fragoff + ip_len));
                    901:                goto bad;
                    902:        }
                    903:        max = fragoff + ip_len;
                    904:
                    905:        if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) {
                    906:                /* Fully buffer all of the fragments */
                    907:
                    908:                frag = pf_find_fragment(h, &pf_frag_tree);
                    909:
                    910:                /* Check if we saw the last fragment already */
                    911:                if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
                    912:                    max > frag->fr_max)
                    913:                        goto bad;
                    914:
                    915:                /* Get an entry for the fragment queue */
                    916:                frent = pool_get(&pf_frent_pl, PR_NOWAIT);
                    917:                if (frent == NULL) {
                    918:                        REASON_SET(reason, PFRES_MEMORY);
                    919:                        return (PF_DROP);
                    920:                }
                    921:                pf_nfrents++;
                    922:                frent->fr_ip = h;
                    923:                frent->fr_m = m;
                    924:
                    925:                /* Might return a completely reassembled mbuf, or NULL */
                    926:                DPFPRINTF(("reass frag %d @ %d-%d\n", h->ip_id, fragoff, max));
                    927:                *m0 = m = pf_reassemble(m0, &frag, frent, mff);
                    928:
                    929:                if (m == NULL)
                    930:                        return (PF_DROP);
                    931:
                    932:                if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
                    933:                        goto drop;
                    934:
                    935:                h = mtod(m, struct ip *);
                    936:        } else {
                    937:                /* non-buffering fragment cache (drops or masks overlaps) */
                    938:                int     nomem = 0;
                    939:
                    940:                if (dir == PF_OUT && m->m_pkthdr.pf.flags & PF_TAG_FRAGCACHE) {
                    941:                        /*
                    942:                         * Already passed the fragment cache in the
                    943:                         * input direction.  If we continued, it would
                    944:                         * appear to be a dup and would be dropped.
                    945:                         */
                    946:                        goto fragment_pass;
                    947:                }
                    948:
                    949:                frag = pf_find_fragment(h, &pf_cache_tree);
                    950:
                    951:                /* Check if we saw the last fragment already */
                    952:                if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
                    953:                    max > frag->fr_max) {
                    954:                        if (r->rule_flag & PFRULE_FRAGDROP)
                    955:                                frag->fr_flags |= PFFRAG_DROP;
                    956:                        goto bad;
                    957:                }
                    958:
                    959:                *m0 = m = pf_fragcache(m0, h, &frag, mff,
                    960:                    (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem);
                    961:                if (m == NULL) {
                    962:                        if (nomem)
                    963:                                goto no_mem;
                    964:                        goto drop;
                    965:                }
                    966:
                    967:                if (dir == PF_IN)
                    968:                        m->m_pkthdr.pf.flags |= PF_TAG_FRAGCACHE;
                    969:
                    970:                if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
                    971:                        goto drop;
                    972:                goto fragment_pass;
                    973:        }
                    974:
                    975:  no_fragment:
                    976:        /* At this point, only IP_DF is allowed in ip_off */
                    977:        if (h->ip_off & ~htons(IP_DF)) {
                    978:                u_int16_t ip_off = h->ip_off;
                    979:
                    980:                h->ip_off &= htons(IP_DF);
                    981:                h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
                    982:        }
                    983:
                    984:        /* Enforce a minimum ttl, may cause endless packet loops */
                    985:        if (r->min_ttl && h->ip_ttl < r->min_ttl) {
                    986:                u_int16_t ip_ttl = h->ip_ttl;
                    987:
                    988:                h->ip_ttl = r->min_ttl;
                    989:                h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
                    990:        }
                    991:
                    992:        if (r->rule_flag & PFRULE_RANDOMID) {
                    993:                u_int16_t ip_id = h->ip_id;
                    994:
                    995:                h->ip_id = ip_randomid();
                    996:                h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0);
                    997:        }
                    998:        if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0)
                    999:                pd->flags |= PFDESC_IP_REAS;
                   1000:
                   1001:        return (PF_PASS);
                   1002:
                   1003:  fragment_pass:
                   1004:        /* Enforce a minimum ttl, may cause endless packet loops */
                   1005:        if (r->min_ttl && h->ip_ttl < r->min_ttl) {
                   1006:                u_int16_t ip_ttl = h->ip_ttl;
                   1007:
                   1008:                h->ip_ttl = r->min_ttl;
                   1009:                h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
                   1010:        }
                   1011:        if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0)
                   1012:                pd->flags |= PFDESC_IP_REAS;
                   1013:        return (PF_PASS);
                   1014:
                   1015:  no_mem:
                   1016:        REASON_SET(reason, PFRES_MEMORY);
                   1017:        if (r != NULL && r->log)
                   1018:                PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd);
                   1019:        return (PF_DROP);
                   1020:
                   1021:  drop:
                   1022:        REASON_SET(reason, PFRES_NORM);
                   1023:        if (r != NULL && r->log)
                   1024:                PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd);
                   1025:        return (PF_DROP);
                   1026:
                   1027:  bad:
                   1028:        DPFPRINTF(("dropping bad fragment\n"));
                   1029:
                   1030:        /* Free associated fragments */
                   1031:        if (frag != NULL)
                   1032:                pf_free_fragment(frag);
                   1033:
                   1034:        REASON_SET(reason, PFRES_FRAG);
                   1035:        if (r != NULL && r->log)
                   1036:                PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd);
                   1037:
                   1038:        return (PF_DROP);
                   1039: }
                   1040:
                   1041: #ifdef INET6
                   1042: int
                   1043: pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif,
                   1044:     u_short *reason, struct pf_pdesc *pd)
                   1045: {
                   1046:        struct mbuf             *m = *m0;
                   1047:        struct pf_rule          *r;
                   1048:        struct ip6_hdr          *h = mtod(m, struct ip6_hdr *);
                   1049:        int                      off;
                   1050:        struct ip6_ext           ext;
                   1051:        struct ip6_opt           opt;
                   1052:        struct ip6_opt_jumbo     jumbo;
                   1053:        struct ip6_frag          frag;
                   1054:        u_int32_t                jumbolen = 0, plen;
                   1055:        u_int16_t                fragoff = 0;
                   1056:        int                      optend;
                   1057:        int                      ooff;
                   1058:        u_int8_t                 proto;
                   1059:        int                      terminal;
                   1060:
                   1061:        r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
                   1062:        while (r != NULL) {
                   1063:                r->evaluations++;
                   1064:                if (pfi_kif_match(r->kif, kif) == r->ifnot)
                   1065:                        r = r->skip[PF_SKIP_IFP].ptr;
                   1066:                else if (r->direction && r->direction != dir)
                   1067:                        r = r->skip[PF_SKIP_DIR].ptr;
                   1068:                else if (r->af && r->af != AF_INET6)
                   1069:                        r = r->skip[PF_SKIP_AF].ptr;
                   1070: #if 0 /* header chain! */
                   1071:                else if (r->proto && r->proto != h->ip6_nxt)
                   1072:                        r = r->skip[PF_SKIP_PROTO].ptr;
                   1073: #endif
                   1074:                else if (PF_MISMATCHAW(&r->src.addr,
                   1075:                    (struct pf_addr *)&h->ip6_src, AF_INET6,
                   1076:                    r->src.neg, kif))
                   1077:                        r = r->skip[PF_SKIP_SRC_ADDR].ptr;
                   1078:                else if (PF_MISMATCHAW(&r->dst.addr,
                   1079:                    (struct pf_addr *)&h->ip6_dst, AF_INET6,
                   1080:                    r->dst.neg, NULL))
                   1081:                        r = r->skip[PF_SKIP_DST_ADDR].ptr;
                   1082:                else
                   1083:                        break;
                   1084:        }
                   1085:
                   1086:        if (r == NULL || r->action == PF_NOSCRUB)
                   1087:                return (PF_PASS);
                   1088:        else {
                   1089:                r->packets[dir == PF_OUT]++;
                   1090:                r->bytes[dir == PF_OUT] += pd->tot_len;
                   1091:        }
                   1092:
                   1093:        /* Check for illegal packets */
                   1094:        if (sizeof(struct ip6_hdr) + IPV6_MAXPACKET < m->m_pkthdr.len)
                   1095:                goto drop;
                   1096:
                   1097:        off = sizeof(struct ip6_hdr);
                   1098:        proto = h->ip6_nxt;
                   1099:        terminal = 0;
                   1100:        do {
                   1101:                switch (proto) {
                   1102:                case IPPROTO_FRAGMENT:
                   1103:                        goto fragment;
                   1104:                        break;
                   1105:                case IPPROTO_AH:
                   1106:                case IPPROTO_ROUTING:
                   1107:                case IPPROTO_DSTOPTS:
                   1108:                        if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
                   1109:                            NULL, AF_INET6))
                   1110:                                goto shortpkt;
                   1111:                        if (proto == IPPROTO_AH)
                   1112:                                off += (ext.ip6e_len + 2) * 4;
                   1113:                        else
                   1114:                                off += (ext.ip6e_len + 1) * 8;
                   1115:                        proto = ext.ip6e_nxt;
                   1116:                        break;
                   1117:                case IPPROTO_HOPOPTS:
                   1118:                        if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
                   1119:                            NULL, AF_INET6))
                   1120:                                goto shortpkt;
                   1121:                        optend = off + (ext.ip6e_len + 1) * 8;
                   1122:                        ooff = off + sizeof(ext);
                   1123:                        do {
                   1124:                                if (!pf_pull_hdr(m, ooff, &opt.ip6o_type,
                   1125:                                    sizeof(opt.ip6o_type), NULL, NULL,
                   1126:                                    AF_INET6))
                   1127:                                        goto shortpkt;
                   1128:                                if (opt.ip6o_type == IP6OPT_PAD1) {
                   1129:                                        ooff++;
                   1130:                                        continue;
                   1131:                                }
                   1132:                                if (!pf_pull_hdr(m, ooff, &opt, sizeof(opt),
                   1133:                                    NULL, NULL, AF_INET6))
                   1134:                                        goto shortpkt;
                   1135:                                if (ooff + sizeof(opt) + opt.ip6o_len > optend)
                   1136:                                        goto drop;
                   1137:                                switch (opt.ip6o_type) {
                   1138:                                case IP6OPT_JUMBO:
                   1139:                                        if (h->ip6_plen != 0)
                   1140:                                                goto drop;
                   1141:                                        if (!pf_pull_hdr(m, ooff, &jumbo,
                   1142:                                            sizeof(jumbo), NULL, NULL,
                   1143:                                            AF_INET6))
                   1144:                                                goto shortpkt;
                   1145:                                        memcpy(&jumbolen, jumbo.ip6oj_jumbo_len,
                   1146:                                            sizeof(jumbolen));
                   1147:                                        jumbolen = ntohl(jumbolen);
                   1148:                                        if (jumbolen <= IPV6_MAXPACKET)
                   1149:                                                goto drop;
                   1150:                                        if (sizeof(struct ip6_hdr) + jumbolen !=
                   1151:                                            m->m_pkthdr.len)
                   1152:                                                goto drop;
                   1153:                                        break;
                   1154:                                default:
                   1155:                                        break;
                   1156:                                }
                   1157:                                ooff += sizeof(opt) + opt.ip6o_len;
                   1158:                        } while (ooff < optend);
                   1159:
                   1160:                        off = optend;
                   1161:                        proto = ext.ip6e_nxt;
                   1162:                        break;
                   1163:                default:
                   1164:                        terminal = 1;
                   1165:                        break;
                   1166:                }
                   1167:        } while (!terminal);
                   1168:
                   1169:        /* jumbo payload option must be present, or plen > 0 */
                   1170:        if (ntohs(h->ip6_plen) == 0)
                   1171:                plen = jumbolen;
                   1172:        else
                   1173:                plen = ntohs(h->ip6_plen);
                   1174:        if (plen == 0)
                   1175:                goto drop;
                   1176:        if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len)
                   1177:                goto shortpkt;
                   1178:
                   1179:        /* Enforce a minimum ttl, may cause endless packet loops */
                   1180:        if (r->min_ttl && h->ip6_hlim < r->min_ttl)
                   1181:                h->ip6_hlim = r->min_ttl;
                   1182:
                   1183:        return (PF_PASS);
                   1184:
                   1185:  fragment:
                   1186:        if (ntohs(h->ip6_plen) == 0 || jumbolen)
                   1187:                goto drop;
                   1188:        plen = ntohs(h->ip6_plen);
                   1189:
                   1190:        if (!pf_pull_hdr(m, off, &frag, sizeof(frag), NULL, NULL, AF_INET6))
                   1191:                goto shortpkt;
                   1192:        fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK);
                   1193:        if (fragoff + (plen - off - sizeof(frag)) > IPV6_MAXPACKET)
                   1194:                goto badfrag;
                   1195:
                   1196:        /* do something about it */
                   1197:        /* remember to set pd->flags |= PFDESC_IP_REAS */
                   1198:        return (PF_PASS);
                   1199:
                   1200:  shortpkt:
                   1201:        REASON_SET(reason, PFRES_SHORT);
                   1202:        if (r != NULL && r->log)
                   1203:                PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd);
                   1204:        return (PF_DROP);
                   1205:
                   1206:  drop:
                   1207:        REASON_SET(reason, PFRES_NORM);
                   1208:        if (r != NULL && r->log)
                   1209:                PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd);
                   1210:        return (PF_DROP);
                   1211:
                   1212:  badfrag:
                   1213:        REASON_SET(reason, PFRES_FRAG);
                   1214:        if (r != NULL && r->log)
                   1215:                PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd);
                   1216:        return (PF_DROP);
                   1217: }
                   1218: #endif /* INET6 */
                   1219:
                   1220: int
                   1221: pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff,
                   1222:     int off, void *h, struct pf_pdesc *pd)
                   1223: {
                   1224:        struct pf_rule  *r, *rm = NULL;
                   1225:        struct tcphdr   *th = pd->hdr.tcp;
                   1226:        int              rewrite = 0;
                   1227:        u_short          reason;
                   1228:        u_int8_t         flags;
                   1229:        sa_family_t      af = pd->af;
                   1230:
                   1231:        r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
                   1232:        while (r != NULL) {
                   1233:                r->evaluations++;
                   1234:                if (pfi_kif_match(r->kif, kif) == r->ifnot)
                   1235:                        r = r->skip[PF_SKIP_IFP].ptr;
                   1236:                else if (r->direction && r->direction != dir)
                   1237:                        r = r->skip[PF_SKIP_DIR].ptr;
                   1238:                else if (r->af && r->af != af)
                   1239:                        r = r->skip[PF_SKIP_AF].ptr;
                   1240:                else if (r->proto && r->proto != pd->proto)
                   1241:                        r = r->skip[PF_SKIP_PROTO].ptr;
                   1242:                else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
                   1243:                    r->src.neg, kif))
                   1244:                        r = r->skip[PF_SKIP_SRC_ADDR].ptr;
                   1245:                else if (r->src.port_op && !pf_match_port(r->src.port_op,
                   1246:                            r->src.port[0], r->src.port[1], th->th_sport))
                   1247:                        r = r->skip[PF_SKIP_SRC_PORT].ptr;
                   1248:                else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
                   1249:                    r->dst.neg, NULL))
                   1250:                        r = r->skip[PF_SKIP_DST_ADDR].ptr;
                   1251:                else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
                   1252:                            r->dst.port[0], r->dst.port[1], th->th_dport))
                   1253:                        r = r->skip[PF_SKIP_DST_PORT].ptr;
                   1254:                else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match(
                   1255:                            pf_osfp_fingerprint(pd, m, off, th),
                   1256:                            r->os_fingerprint))
                   1257:                        r = TAILQ_NEXT(r, entries);
                   1258:                else {
                   1259:                        rm = r;
                   1260:                        break;
                   1261:                }
                   1262:        }
                   1263:
                   1264:        if (rm == NULL || rm->action == PF_NOSCRUB)
                   1265:                return (PF_PASS);
                   1266:        else {
                   1267:                r->packets[dir == PF_OUT]++;
                   1268:                r->bytes[dir == PF_OUT] += pd->tot_len;
                   1269:        }
                   1270:
                   1271:        if (rm->rule_flag & PFRULE_REASSEMBLE_TCP)
                   1272:                pd->flags |= PFDESC_TCP_NORM;
                   1273:
                   1274:        flags = th->th_flags;
                   1275:        if (flags & TH_SYN) {
                   1276:                /* Illegal packet */
                   1277:                if (flags & TH_RST)
                   1278:                        goto tcp_drop;
                   1279:
                   1280:                if (flags & TH_FIN)
                   1281:                        flags &= ~TH_FIN;
                   1282:        } else {
                   1283:                /* Illegal packet */
                   1284:                if (!(flags & (TH_ACK|TH_RST)))
                   1285:                        goto tcp_drop;
                   1286:        }
                   1287:
                   1288:        if (!(flags & TH_ACK)) {
                   1289:                /* These flags are only valid if ACK is set */
                   1290:                if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG))
                   1291:                        goto tcp_drop;
                   1292:        }
                   1293:
                   1294:        /* Check for illegal header length */
                   1295:        if (th->th_off < (sizeof(struct tcphdr) >> 2))
                   1296:                goto tcp_drop;
                   1297:
                   1298:        /* If flags changed, or reserved data set, then adjust */
                   1299:        if (flags != th->th_flags || th->th_x2 != 0) {
                   1300:                u_int16_t       ov, nv;
                   1301:
                   1302:                ov = *(u_int16_t *)(&th->th_ack + 1);
                   1303:                th->th_flags = flags;
                   1304:                th->th_x2 = 0;
                   1305:                nv = *(u_int16_t *)(&th->th_ack + 1);
                   1306:
                   1307:                th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv, 0);
                   1308:                rewrite = 1;
                   1309:        }
                   1310:
                   1311:        /* Remove urgent pointer, if TH_URG is not set */
                   1312:        if (!(flags & TH_URG) && th->th_urp) {
                   1313:                th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0, 0);
                   1314:                th->th_urp = 0;
                   1315:                rewrite = 1;
                   1316:        }
                   1317:
                   1318:        /* Process options */
                   1319:        if (r->max_mss && pf_normalize_tcpopt(r, m, th, off))
                   1320:                rewrite = 1;
                   1321:
                   1322:        /* copy back packet headers if we sanitized */
                   1323:        if (rewrite)
                   1324:                m_copyback(m, off, sizeof(*th), th);
                   1325:
                   1326:        return (PF_PASS);
                   1327:
                   1328:  tcp_drop:
                   1329:        REASON_SET(&reason, PFRES_NORM);
                   1330:        if (rm != NULL && r->log)
                   1331:                PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, NULL, NULL, pd);
                   1332:        return (PF_DROP);
                   1333: }
                   1334:
                   1335: int
                   1336: pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd,
                   1337:     struct tcphdr *th, struct pf_state_peer *src, struct pf_state_peer *dst)
                   1338: {
                   1339:        u_int32_t tsval, tsecr;
                   1340:        u_int8_t hdr[60];
                   1341:        u_int8_t *opt;
                   1342:
                   1343:        KASSERT(src->scrub == NULL);
                   1344:
                   1345:        src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT);
                   1346:        if (src->scrub == NULL)
                   1347:                return (1);
                   1348:        bzero(src->scrub, sizeof(*src->scrub));
                   1349:
                   1350:        switch (pd->af) {
                   1351: #ifdef INET
                   1352:        case AF_INET: {
                   1353:                struct ip *h = mtod(m, struct ip *);
                   1354:                src->scrub->pfss_ttl = h->ip_ttl;
                   1355:                break;
                   1356:        }
                   1357: #endif /* INET */
                   1358: #ifdef INET6
                   1359:        case AF_INET6: {
                   1360:                struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
                   1361:                src->scrub->pfss_ttl = h->ip6_hlim;
                   1362:                break;
                   1363:        }
                   1364: #endif /* INET6 */
                   1365:        }
                   1366:
                   1367:
                   1368:        /*
                   1369:         * All normalizations below are only begun if we see the start of
                   1370:         * the connections.  They must all set an enabled bit in pfss_flags
                   1371:         */
                   1372:        if ((th->th_flags & TH_SYN) == 0)
                   1373:                return (0);
                   1374:
                   1375:
                   1376:        if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub &&
                   1377:            pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
                   1378:                /* Diddle with TCP options */
                   1379:                int hlen;
                   1380:                opt = hdr + sizeof(struct tcphdr);
                   1381:                hlen = (th->th_off << 2) - sizeof(struct tcphdr);
                   1382:                while (hlen >= TCPOLEN_TIMESTAMP) {
                   1383:                        switch (*opt) {
                   1384:                        case TCPOPT_EOL:        /* FALLTHROUGH */
                   1385:                        case TCPOPT_NOP:
                   1386:                                opt++;
                   1387:                                hlen--;
                   1388:                                break;
                   1389:                        case TCPOPT_TIMESTAMP:
                   1390:                                if (opt[1] >= TCPOLEN_TIMESTAMP) {
                   1391:                                        src->scrub->pfss_flags |=
                   1392:                                            PFSS_TIMESTAMP;
                   1393:                                        src->scrub->pfss_ts_mod =
                   1394:                                            htonl(arc4random());
                   1395:
                   1396:                                        /* note PFSS_PAWS not set yet */
                   1397:                                        memcpy(&tsval, &opt[2],
                   1398:                                            sizeof(u_int32_t));
                   1399:                                        memcpy(&tsecr, &opt[6],
                   1400:                                            sizeof(u_int32_t));
                   1401:                                        src->scrub->pfss_tsval0 = ntohl(tsval);
                   1402:                                        src->scrub->pfss_tsval = ntohl(tsval);
                   1403:                                        src->scrub->pfss_tsecr = ntohl(tsecr);
                   1404:                                        getmicrouptime(&src->scrub->pfss_last);
                   1405:                                }
                   1406:                                /* FALLTHROUGH */
                   1407:                        default:
                   1408:                                hlen -= MAX(opt[1], 2);
                   1409:                                opt += MAX(opt[1], 2);
                   1410:                                break;
                   1411:                        }
                   1412:                }
                   1413:        }
                   1414:
                   1415:        return (0);
                   1416: }
                   1417:
                   1418: void
                   1419: pf_normalize_tcp_cleanup(struct pf_state *state)
                   1420: {
                   1421:        if (state->src.scrub)
                   1422:                pool_put(&pf_state_scrub_pl, state->src.scrub);
                   1423:        if (state->dst.scrub)
                   1424:                pool_put(&pf_state_scrub_pl, state->dst.scrub);
                   1425:
                   1426:        /* Someday... flush the TCP segment reassembly descriptors. */
                   1427: }
                   1428:
                   1429: int
                   1430: pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd,
                   1431:     u_short *reason, struct tcphdr *th, struct pf_state *state,
                   1432:     struct pf_state_peer *src, struct pf_state_peer *dst, int *writeback)
                   1433: {
                   1434:        struct timeval uptime;
                   1435:        u_int32_t tsval, tsecr;
                   1436:        u_int tsval_from_last;
                   1437:        u_int8_t hdr[60];
                   1438:        u_int8_t *opt;
                   1439:        int copyback = 0;
                   1440:        int got_ts = 0;
                   1441:
                   1442:        KASSERT(src->scrub || dst->scrub);
                   1443:
                   1444:        /*
                   1445:         * Enforce the minimum TTL seen for this connection.  Negate a common
                   1446:         * technique to evade an intrusion detection system and confuse
                   1447:         * firewall state code.
                   1448:         */
                   1449:        switch (pd->af) {
                   1450: #ifdef INET
                   1451:        case AF_INET: {
                   1452:                if (src->scrub) {
                   1453:                        struct ip *h = mtod(m, struct ip *);
                   1454:                        if (h->ip_ttl > src->scrub->pfss_ttl)
                   1455:                                src->scrub->pfss_ttl = h->ip_ttl;
                   1456:                        h->ip_ttl = src->scrub->pfss_ttl;
                   1457:                }
                   1458:                break;
                   1459:        }
                   1460: #endif /* INET */
                   1461: #ifdef INET6
                   1462:        case AF_INET6: {
                   1463:                if (src->scrub) {
                   1464:                        struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
                   1465:                        if (h->ip6_hlim > src->scrub->pfss_ttl)
                   1466:                                src->scrub->pfss_ttl = h->ip6_hlim;
                   1467:                        h->ip6_hlim = src->scrub->pfss_ttl;
                   1468:                }
                   1469:                break;
                   1470:        }
                   1471: #endif /* INET6 */
                   1472:        }
                   1473:
                   1474:        if (th->th_off > (sizeof(struct tcphdr) >> 2) &&
                   1475:            ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) ||
                   1476:            (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) &&
                   1477:            pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
                   1478:                /* Diddle with TCP options */
                   1479:                int hlen;
                   1480:                opt = hdr + sizeof(struct tcphdr);
                   1481:                hlen = (th->th_off << 2) - sizeof(struct tcphdr);
                   1482:                while (hlen >= TCPOLEN_TIMESTAMP) {
                   1483:                        switch (*opt) {
                   1484:                        case TCPOPT_EOL:        /* FALLTHROUGH */
                   1485:                        case TCPOPT_NOP:
                   1486:                                opt++;
                   1487:                                hlen--;
                   1488:                                break;
                   1489:                        case TCPOPT_TIMESTAMP:
                   1490:                                /* Modulate the timestamps.  Can be used for
                   1491:                                 * NAT detection, OS uptime determination or
                   1492:                                 * reboot detection.
                   1493:                                 */
                   1494:
                   1495:                                if (got_ts) {
                   1496:                                        /* Huh?  Multiple timestamps!? */
                   1497:                                        if (pf_status.debug >= PF_DEBUG_MISC) {
                   1498:                                                DPFPRINTF(("multiple TS??"));
                   1499:                                                pf_print_state(state);
                   1500:                                                printf("\n");
                   1501:                                        }
                   1502:                                        REASON_SET(reason, PFRES_TS);
                   1503:                                        return (PF_DROP);
                   1504:                                }
                   1505:                                if (opt[1] >= TCPOLEN_TIMESTAMP) {
                   1506:                                        memcpy(&tsval, &opt[2],
                   1507:                                            sizeof(u_int32_t));
                   1508:                                        if (tsval && src->scrub &&
                   1509:                                            (src->scrub->pfss_flags &
                   1510:                                            PFSS_TIMESTAMP)) {
                   1511:                                                tsval = ntohl(tsval);
                   1512:                                                pf_change_a(&opt[2],
                   1513:                                                    &th->th_sum,
                   1514:                                                    htonl(tsval +
                   1515:                                                    src->scrub->pfss_ts_mod),
                   1516:                                                    0);
                   1517:                                                copyback = 1;
                   1518:                                        }
                   1519:
                   1520:                                        /* Modulate TS reply iff valid (!0) */
                   1521:                                        memcpy(&tsecr, &opt[6],
                   1522:                                            sizeof(u_int32_t));
                   1523:                                        if (tsecr && dst->scrub &&
                   1524:                                            (dst->scrub->pfss_flags &
                   1525:                                            PFSS_TIMESTAMP)) {
                   1526:                                                tsecr = ntohl(tsecr)
                   1527:                                                    - dst->scrub->pfss_ts_mod;
                   1528:                                                pf_change_a(&opt[6],
                   1529:                                                    &th->th_sum, htonl(tsecr),
                   1530:                                                    0);
                   1531:                                                copyback = 1;
                   1532:                                        }
                   1533:                                        got_ts = 1;
                   1534:                                }
                   1535:                                /* FALLTHROUGH */
                   1536:                        default:
                   1537:                                hlen -= MAX(opt[1], 2);
                   1538:                                opt += MAX(opt[1], 2);
                   1539:                                break;
                   1540:                        }
                   1541:                }
                   1542:                if (copyback) {
                   1543:                        /* Copyback the options, caller copys back header */
                   1544:                        *writeback = 1;
                   1545:                        m_copyback(m, off + sizeof(struct tcphdr),
                   1546:                            (th->th_off << 2) - sizeof(struct tcphdr), hdr +
                   1547:                            sizeof(struct tcphdr));
                   1548:                }
                   1549:        }
                   1550:
                   1551:
                   1552:        /*
                   1553:         * Must invalidate PAWS checks on connections idle for too long.
                   1554:         * The fastest allowed timestamp clock is 1ms.  That turns out to
                   1555:         * be about 24 days before it wraps.  XXX Right now our lowerbound
                   1556:         * TS echo check only works for the first 12 days of a connection
                   1557:         * when the TS has exhausted half its 32bit space
                   1558:         */
                   1559: #define TS_MAX_IDLE    (24*24*60*60)
                   1560: #define TS_MAX_CONN    (12*24*60*60)   /* XXX remove when better tsecr check */
                   1561:
                   1562:        getmicrouptime(&uptime);
                   1563:        if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) &&
                   1564:            (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE ||
                   1565:            time_second - state->creation > TS_MAX_CONN))  {
                   1566:                if (pf_status.debug >= PF_DEBUG_MISC) {
                   1567:                        DPFPRINTF(("src idled out of PAWS\n"));
                   1568:                        pf_print_state(state);
                   1569:                        printf("\n");
                   1570:                }
                   1571:                src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS)
                   1572:                    | PFSS_PAWS_IDLED;
                   1573:        }
                   1574:        if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) &&
                   1575:            uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) {
                   1576:                if (pf_status.debug >= PF_DEBUG_MISC) {
                   1577:                        DPFPRINTF(("dst idled out of PAWS\n"));
                   1578:                        pf_print_state(state);
                   1579:                        printf("\n");
                   1580:                }
                   1581:                dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS)
                   1582:                    | PFSS_PAWS_IDLED;
                   1583:        }
                   1584:
                   1585:        if (got_ts && src->scrub && dst->scrub &&
                   1586:            (src->scrub->pfss_flags & PFSS_PAWS) &&
                   1587:            (dst->scrub->pfss_flags & PFSS_PAWS)) {
                   1588:                /* Validate that the timestamps are "in-window".
                   1589:                 * RFC1323 describes TCP Timestamp options that allow
                   1590:                 * measurement of RTT (round trip time) and PAWS
                   1591:                 * (protection against wrapped sequence numbers).  PAWS
                   1592:                 * gives us a set of rules for rejecting packets on
                   1593:                 * long fat pipes (packets that were somehow delayed
                   1594:                 * in transit longer than the time it took to send the
                   1595:                 * full TCP sequence space of 4Gb).  We can use these
                   1596:                 * rules and infer a few others that will let us treat
                   1597:                 * the 32bit timestamp and the 32bit echoed timestamp
                   1598:                 * as sequence numbers to prevent a blind attacker from
                   1599:                 * inserting packets into a connection.
                   1600:                 *
                   1601:                 * RFC1323 tells us:
                   1602:                 *  - The timestamp on this packet must be greater than
                   1603:                 *    or equal to the last value echoed by the other
                   1604:                 *    endpoint.  The RFC says those will be discarded
                   1605:                 *    since it is a dup that has already been acked.
                   1606:                 *    This gives us a lowerbound on the timestamp.
                   1607:                 *        timestamp >= other last echoed timestamp
                   1608:                 *  - The timestamp will be less than or equal to
                   1609:                 *    the last timestamp plus the time between the
                   1610:                 *    last packet and now.  The RFC defines the max
                   1611:                 *    clock rate as 1ms.  We will allow clocks to be
                   1612:                 *    up to 10% fast and will allow a total difference
                   1613:                 *    or 30 seconds due to a route change.  And this
                   1614:                 *    gives us an upperbound on the timestamp.
                   1615:                 *        timestamp <= last timestamp + max ticks
                   1616:                 *    We have to be careful here.  Windows will send an
                   1617:                 *    initial timestamp of zero and then initialize it
                   1618:                 *    to a random value after the 3whs; presumably to
                   1619:                 *    avoid a DoS by having to call an expensive RNG
                   1620:                 *    during a SYN flood.  Proof MS has at least one
                   1621:                 *    good security geek.
                   1622:                 *
                   1623:                 *  - The TCP timestamp option must also echo the other
                   1624:                 *    endpoints timestamp.  The timestamp echoed is the
                   1625:                 *    one carried on the earliest unacknowledged segment
                   1626:                 *    on the left edge of the sequence window.  The RFC
                   1627:                 *    states that the host will reject any echoed
                   1628:                 *    timestamps that were larger than any ever sent.
                   1629:                 *    This gives us an upperbound on the TS echo.
                   1630:                 *        tescr <= largest_tsval
                   1631:                 *  - The lowerbound on the TS echo is a little more
                   1632:                 *    tricky to determine.  The other endpoint's echoed
                   1633:                 *    values will not decrease.  But there may be
                   1634:                 *    network conditions that re-order packets and
                   1635:                 *    cause our view of them to decrease.  For now the
                   1636:                 *    only lowerbound we can safely determine is that
                   1637:                 *    the TS echo will never be less than the original
                   1638:                 *    TS.  XXX There is probably a better lowerbound.
                   1639:                 *    Remove TS_MAX_CONN with better lowerbound check.
                   1640:                 *        tescr >= other original TS
                   1641:                 *
                   1642:                 * It is also important to note that the fastest
                   1643:                 * timestamp clock of 1ms will wrap its 32bit space in
                   1644:                 * 24 days.  So we just disable TS checking after 24
                   1645:                 * days of idle time.  We actually must use a 12d
                   1646:                 * connection limit until we can come up with a better
                   1647:                 * lowerbound to the TS echo check.
                   1648:                 */
                   1649:                struct timeval delta_ts;
                   1650:                int ts_fudge;
                   1651:
                   1652:
                   1653:                /*
                   1654:                 * PFTM_TS_DIFF is how many seconds of leeway to allow
                   1655:                 * a host's timestamp.  This can happen if the previous
                   1656:                 * packet got delayed in transit for much longer than
                   1657:                 * this packet.
                   1658:                 */
                   1659:                if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0)
                   1660:                        ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF];
                   1661:
                   1662:
                   1663:                /* Calculate max ticks since the last timestamp */
                   1664: #define TS_MAXFREQ     1100            /* RFC max TS freq of 1Khz + 10% skew */
                   1665: #define TS_MICROSECS   1000000         /* microseconds per second */
                   1666:                timersub(&uptime, &src->scrub->pfss_last, &delta_ts);
                   1667:                tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ;
                   1668:                tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ);
                   1669:
                   1670:
                   1671:                if ((src->state >= TCPS_ESTABLISHED &&
                   1672:                    dst->state >= TCPS_ESTABLISHED) &&
                   1673:                    (SEQ_LT(tsval, dst->scrub->pfss_tsecr) ||
                   1674:                    SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) ||
                   1675:                    (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) ||
                   1676:                    SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) {
                   1677:                        /* Bad RFC1323 implementation or an insertion attack.
                   1678:                         *
                   1679:                         * - Solaris 2.6 and 2.7 are known to send another ACK
                   1680:                         *   after the FIN,FIN|ACK,ACK closing that carries
                   1681:                         *   an old timestamp.
                   1682:                         */
                   1683:
                   1684:                        DPFPRINTF(("Timestamp failed %c%c%c%c\n",
                   1685:                            SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ',
                   1686:                            SEQ_GT(tsval, src->scrub->pfss_tsval +
                   1687:                            tsval_from_last) ? '1' : ' ',
                   1688:                            SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ',
                   1689:                            SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' '));
                   1690:                        DPFPRINTF((" tsval: %lu  tsecr: %lu  +ticks: %lu  "
                   1691:                            "idle: %lus %lums\n",
                   1692:                            tsval, tsecr, tsval_from_last, delta_ts.tv_sec,
                   1693:                            delta_ts.tv_usec / 1000));
                   1694:                        DPFPRINTF((" src->tsval: %lu  tsecr: %lu\n",
                   1695:                            src->scrub->pfss_tsval, src->scrub->pfss_tsecr));
                   1696:                        DPFPRINTF((" dst->tsval: %lu  tsecr: %lu  tsval0: %lu"
                   1697:                            "\n", dst->scrub->pfss_tsval,
                   1698:                            dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0));
                   1699:                        if (pf_status.debug >= PF_DEBUG_MISC) {
                   1700:                                pf_print_state(state);
                   1701:                                pf_print_flags(th->th_flags);
                   1702:                                printf("\n");
                   1703:                        }
                   1704:                        REASON_SET(reason, PFRES_TS);
                   1705:                        return (PF_DROP);
                   1706:                }
                   1707:
                   1708:                /* XXX I'd really like to require tsecr but it's optional */
                   1709:
                   1710:        } else if (!got_ts && (th->th_flags & TH_RST) == 0 &&
                   1711:            ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED)
                   1712:            || pd->p_len > 0 || (th->th_flags & TH_SYN)) &&
                   1713:            src->scrub && dst->scrub &&
                   1714:            (src->scrub->pfss_flags & PFSS_PAWS) &&
                   1715:            (dst->scrub->pfss_flags & PFSS_PAWS)) {
                   1716:                /* Didn't send a timestamp.  Timestamps aren't really useful
                   1717:                 * when:
                   1718:                 *  - connection opening or closing (often not even sent).
                   1719:                 *    but we must not let an attacker to put a FIN on a
                   1720:                 *    data packet to sneak it through our ESTABLISHED check.
                   1721:                 *  - on a TCP reset.  RFC suggests not even looking at TS.
                   1722:                 *  - on an empty ACK.  The TS will not be echoed so it will
                   1723:                 *    probably not help keep the RTT calculation in sync and
                   1724:                 *    there isn't as much danger when the sequence numbers
                   1725:                 *    got wrapped.  So some stacks don't include TS on empty
                   1726:                 *    ACKs :-(
                   1727:                 *
                   1728:                 * To minimize the disruption to mostly RFC1323 conformant
                   1729:                 * stacks, we will only require timestamps on data packets.
                   1730:                 *
                   1731:                 * And what do ya know, we cannot require timestamps on data
                   1732:                 * packets.  There appear to be devices that do legitimate
                   1733:                 * TCP connection hijacking.  There are HTTP devices that allow
                   1734:                 * a 3whs (with timestamps) and then buffer the HTTP request.
                   1735:                 * If the intermediate device has the HTTP response cache, it
                   1736:                 * will spoof the response but not bother timestamping its
                   1737:                 * packets.  So we can look for the presence of a timestamp in
                   1738:                 * the first data packet and if there, require it in all future
                   1739:                 * packets.
                   1740:                 */
                   1741:
                   1742:                if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) {
                   1743:                        /*
                   1744:                         * Hey!  Someone tried to sneak a packet in.  Or the
                   1745:                         * stack changed its RFC1323 behavior?!?!
                   1746:                         */
                   1747:                        if (pf_status.debug >= PF_DEBUG_MISC) {
                   1748:                                DPFPRINTF(("Did not receive expected RFC1323 "
                   1749:                                    "timestamp\n"));
                   1750:                                pf_print_state(state);
                   1751:                                pf_print_flags(th->th_flags);
                   1752:                                printf("\n");
                   1753:                        }
                   1754:                        REASON_SET(reason, PFRES_TS);
                   1755:                        return (PF_DROP);
                   1756:                }
                   1757:        }
                   1758:
                   1759:
                   1760:        /*
                   1761:         * We will note if a host sends his data packets with or without
                   1762:         * timestamps.  And require all data packets to contain a timestamp
                   1763:         * if the first does.  PAWS implicitly requires that all data packets be
                   1764:         * timestamped.  But I think there are middle-man devices that hijack
                   1765:         * TCP streams immediately after the 3whs and don't timestamp their
                   1766:         * packets (seen in a WWW accelerator or cache).
                   1767:         */
                   1768:        if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags &
                   1769:            (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) {
                   1770:                if (got_ts)
                   1771:                        src->scrub->pfss_flags |= PFSS_DATA_TS;
                   1772:                else {
                   1773:                        src->scrub->pfss_flags |= PFSS_DATA_NOTS;
                   1774:                        if (pf_status.debug >= PF_DEBUG_MISC && dst->scrub &&
                   1775:                            (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) {
                   1776:                                /* Don't warn if other host rejected RFC1323 */
                   1777:                                DPFPRINTF(("Broken RFC1323 stack did not "
                   1778:                                    "timestamp data packet. Disabled PAWS "
                   1779:                                    "security.\n"));
                   1780:                                pf_print_state(state);
                   1781:                                pf_print_flags(th->th_flags);
                   1782:                                printf("\n");
                   1783:                        }
                   1784:                }
                   1785:        }
                   1786:
                   1787:
                   1788:        /*
                   1789:         * Update PAWS values
                   1790:         */
                   1791:        if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags &
                   1792:            (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) {
                   1793:                getmicrouptime(&src->scrub->pfss_last);
                   1794:                if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) ||
                   1795:                    (src->scrub->pfss_flags & PFSS_PAWS) == 0)
                   1796:                        src->scrub->pfss_tsval = tsval;
                   1797:
                   1798:                if (tsecr) {
                   1799:                        if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) ||
                   1800:                            (src->scrub->pfss_flags & PFSS_PAWS) == 0)
                   1801:                                src->scrub->pfss_tsecr = tsecr;
                   1802:
                   1803:                        if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 &&
                   1804:                            (SEQ_LT(tsval, src->scrub->pfss_tsval0) ||
                   1805:                            src->scrub->pfss_tsval0 == 0)) {
                   1806:                                /* tsval0 MUST be the lowest timestamp */
                   1807:                                src->scrub->pfss_tsval0 = tsval;
                   1808:                        }
                   1809:
                   1810:                        /* Only fully initialized after a TS gets echoed */
                   1811:                        if ((src->scrub->pfss_flags & PFSS_PAWS) == 0)
                   1812:                                src->scrub->pfss_flags |= PFSS_PAWS;
                   1813:                }
                   1814:        }
                   1815:
                   1816:        /* I have a dream....  TCP segment reassembly.... */
                   1817:        return (0);
                   1818: }
                   1819:
                   1820: int
                   1821: pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th,
                   1822:     int off)
                   1823: {
                   1824:        u_int16_t       *mss;
                   1825:        int              thoff;
                   1826:        int              opt, cnt, optlen = 0;
                   1827:        int              rewrite = 0;
                   1828:        u_char          *optp;
                   1829:
                   1830:        thoff = th->th_off << 2;
                   1831:        cnt = thoff - sizeof(struct tcphdr);
                   1832:        optp = mtod(m, caddr_t) + off + sizeof(struct tcphdr);
                   1833:
                   1834:        for (; cnt > 0; cnt -= optlen, optp += optlen) {
                   1835:                opt = optp[0];
                   1836:                if (opt == TCPOPT_EOL)
                   1837:                        break;
                   1838:                if (opt == TCPOPT_NOP)
                   1839:                        optlen = 1;
                   1840:                else {
                   1841:                        if (cnt < 2)
                   1842:                                break;
                   1843:                        optlen = optp[1];
                   1844:                        if (optlen < 2 || optlen > cnt)
                   1845:                                break;
                   1846:                }
                   1847:                switch (opt) {
                   1848:                case TCPOPT_MAXSEG:
                   1849:                        mss = (u_int16_t *)(optp + 2);
                   1850:                        if ((ntohs(*mss)) > r->max_mss) {
                   1851:                                th->th_sum = pf_cksum_fixup(th->th_sum,
                   1852:                                    *mss, htons(r->max_mss), 0);
                   1853:                                *mss = htons(r->max_mss);
                   1854:                                rewrite = 1;
                   1855:                        }
                   1856:                        break;
                   1857:                default:
                   1858:                        break;
                   1859:                }
                   1860:        }
                   1861:
                   1862:        return (rewrite);
                   1863: }
CVSweb