Annotation of sys/netinet/tcp_timer.c, Revision 1.1
1.1 ! nbrk 1: /* $OpenBSD: tcp_timer.c,v 1.39 2007/06/15 18:23:07 markus Exp $ */
! 2: /* $NetBSD: tcp_timer.c,v 1.14 1996/02/13 23:44:09 christos Exp $ */
! 3:
! 4: /*
! 5: * Copyright (c) 1982, 1986, 1988, 1990, 1993
! 6: * The Regents of the University of California. All rights reserved.
! 7: *
! 8: * Redistribution and use in source and binary forms, with or without
! 9: * modification, are permitted provided that the following conditions
! 10: * are met:
! 11: * 1. Redistributions of source code must retain the above copyright
! 12: * notice, this list of conditions and the following disclaimer.
! 13: * 2. Redistributions in binary form must reproduce the above copyright
! 14: * notice, this list of conditions and the following disclaimer in the
! 15: * documentation and/or other materials provided with the distribution.
! 16: * 3. Neither the name of the University nor the names of its contributors
! 17: * may be used to endorse or promote products derived from this software
! 18: * without specific prior written permission.
! 19: *
! 20: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
! 21: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
! 22: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
! 23: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
! 24: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
! 25: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
! 26: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
! 27: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
! 28: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
! 29: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
! 30: * SUCH DAMAGE.
! 31: *
! 32: * @(#)tcp_timer.c 8.1 (Berkeley) 6/10/93
! 33: */
! 34:
! 35: #include <sys/param.h>
! 36: #include <sys/systm.h>
! 37: #include <sys/mbuf.h>
! 38: #include <sys/socket.h>
! 39: #include <sys/socketvar.h>
! 40: #include <sys/protosw.h>
! 41: #include <sys/kernel.h>
! 42:
! 43: #include <net/route.h>
! 44:
! 45: #include <netinet/in.h>
! 46: #include <netinet/in_systm.h>
! 47: #include <netinet/ip.h>
! 48: #include <netinet/in_pcb.h>
! 49: #include <netinet/ip_var.h>
! 50: #include <netinet/tcp.h>
! 51: #include <netinet/tcp_fsm.h>
! 52: #include <netinet/tcp_timer.h>
! 53: #include <netinet/tcp_var.h>
! 54: #include <netinet/ip_icmp.h>
! 55: #include <netinet/tcp_seq.h>
! 56:
! 57: int tcp_keepidle;
! 58: int tcp_keepintvl;
! 59: int tcp_maxpersistidle; /* max idle time in persist */
! 60: int tcp_maxidle;
! 61:
! 62: /*
! 63: * Time to delay the ACK. This is initialized in tcp_init(), unless
! 64: * its patched.
! 65: */
! 66: int tcp_delack_ticks;
! 67:
! 68: void tcp_timer_rexmt(void *);
! 69: void tcp_timer_persist(void *);
! 70: void tcp_timer_keep(void *);
! 71: void tcp_timer_2msl(void *);
! 72:
! 73: const tcp_timer_func_t tcp_timer_funcs[TCPT_NTIMERS] = {
! 74: tcp_timer_rexmt,
! 75: tcp_timer_persist,
! 76: tcp_timer_keep,
! 77: tcp_timer_2msl,
! 78: };
! 79:
! 80: /*
! 81: * Timer state initialization, called from tcp_init().
! 82: */
! 83: void
! 84: tcp_timer_init(void)
! 85: {
! 86:
! 87: if (tcp_keepidle == 0)
! 88: tcp_keepidle = TCPTV_KEEP_IDLE;
! 89:
! 90: if (tcp_keepintvl == 0)
! 91: tcp_keepintvl = TCPTV_KEEPINTVL;
! 92:
! 93: if (tcp_maxpersistidle == 0)
! 94: tcp_maxpersistidle = TCPTV_KEEP_IDLE;
! 95:
! 96: if (tcp_delack_ticks == 0)
! 97: tcp_delack_ticks = TCP_DELACK_TICKS;
! 98: }
! 99:
! 100: /*
! 101: * Callout to process delayed ACKs for a TCPCB.
! 102: */
! 103: void
! 104: tcp_delack(void *arg)
! 105: {
! 106: struct tcpcb *tp = arg;
! 107: int s;
! 108:
! 109: /*
! 110: * If tcp_output() wasn't able to transmit the ACK
! 111: * for whatever reason, it will restart the delayed
! 112: * ACK callout.
! 113: */
! 114:
! 115: s = splsoftnet();
! 116: if (tp->t_flags & TF_DEAD) {
! 117: splx(s);
! 118: return;
! 119: }
! 120: tp->t_flags |= TF_ACKNOW;
! 121: (void) tcp_output(tp);
! 122: splx(s);
! 123: }
! 124:
! 125: /*
! 126: * Tcp protocol timeout routine called every 500 ms.
! 127: * Updates the timers in all active tcb's and
! 128: * causes finite state machine actions if timers expire.
! 129: */
! 130: void
! 131: tcp_slowtimo()
! 132: {
! 133: int s;
! 134:
! 135: s = splsoftnet();
! 136: tcp_maxidle = TCPTV_KEEPCNT * tcp_keepintvl;
! 137: #ifdef TCP_COMPAT_42
! 138: tcp_iss += TCP_ISSINCR/PR_SLOWHZ; /* increment iss */
! 139: if ((int)tcp_iss < 0)
! 140: tcp_iss = 0; /* XXX */
! 141: #else
! 142: tcp_iss += TCP_ISSINCR2/PR_SLOWHZ; /* increment iss */
! 143: #endif /* TCP_COMPAT_42 */
! 144: tcp_now++; /* for timestamps */
! 145: splx(s);
! 146: }
! 147:
! 148: /*
! 149: * Cancel all timers for TCP tp.
! 150: */
! 151: void
! 152: tcp_canceltimers(tp)
! 153: struct tcpcb *tp;
! 154: {
! 155: int i;
! 156:
! 157: for (i = 0; i < TCPT_NTIMERS; i++)
! 158: TCP_TIMER_DISARM(tp, i);
! 159: }
! 160:
! 161: int tcp_backoff[TCP_MAXRXTSHIFT + 1] =
! 162: { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
! 163:
! 164: int tcp_totbackoff = 511; /* sum of tcp_backoff[] */
! 165:
! 166: /*
! 167: * TCP timer processing.
! 168: */
! 169:
! 170: #ifdef TCP_SACK
! 171: void tcp_timer_freesack(struct tcpcb *);
! 172:
! 173: void
! 174: tcp_timer_freesack(struct tcpcb *tp)
! 175: {
! 176: struct sackhole *p, *q;
! 177: /*
! 178: * Free SACK holes for 2MSL and REXMT timers.
! 179: */
! 180: q = tp->snd_holes;
! 181: while (q != NULL) {
! 182: p = q;
! 183: q = q->next;
! 184: pool_put(&sackhl_pool, p);
! 185: }
! 186: tp->snd_holes = 0;
! 187: #ifdef TCP_FACK
! 188: tp->snd_fack = tp->snd_una;
! 189: tp->retran_data = 0;
! 190: tp->snd_awnd = 0;
! 191: #endif /* TCP_FACK */
! 192: }
! 193: #endif /* TCP_SACK */
! 194:
! 195: void
! 196: tcp_timer_rexmt(void *arg)
! 197: {
! 198: struct tcpcb *tp = arg;
! 199: uint32_t rto;
! 200: int s;
! 201:
! 202: s = splsoftnet();
! 203: if (tp->t_flags & TF_DEAD) {
! 204: splx(s);
! 205: return;
! 206: }
! 207:
! 208: if ((tp->t_flags & TF_PMTUD_PEND) && tp->t_inpcb &&
! 209: SEQ_GEQ(tp->t_pmtud_th_seq, tp->snd_una) &&
! 210: SEQ_LT(tp->t_pmtud_th_seq, (int)(tp->snd_una + tp->t_maxseg))) {
! 211: extern struct sockaddr_in icmpsrc;
! 212: struct icmp icmp;
! 213:
! 214: tp->t_flags &= ~TF_PMTUD_PEND;
! 215:
! 216: /* XXX create fake icmp message with relevant entries */
! 217: icmp.icmp_nextmtu = tp->t_pmtud_nextmtu;
! 218: icmp.icmp_ip.ip_len = tp->t_pmtud_ip_len;
! 219: icmp.icmp_ip.ip_hl = tp->t_pmtud_ip_hl;
! 220: icmpsrc.sin_addr = tp->t_inpcb->inp_faddr;
! 221: icmp_mtudisc(&icmp);
! 222:
! 223: /*
! 224: * Notify all connections to the same peer about
! 225: * new mss and trigger retransmit.
! 226: */
! 227: in_pcbnotifyall(&tcbtable, sintosa(&icmpsrc), EMSGSIZE,
! 228: tcp_mtudisc);
! 229: splx(s);
! 230: return;
! 231: }
! 232:
! 233: #ifdef TCP_SACK
! 234: tcp_timer_freesack(tp);
! 235: #endif
! 236: if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
! 237: tp->t_rxtshift = TCP_MAXRXTSHIFT;
! 238: tcpstat.tcps_timeoutdrop++;
! 239: (void)tcp_drop(tp, tp->t_softerror ?
! 240: tp->t_softerror : ETIMEDOUT);
! 241: goto out;
! 242: }
! 243: tcpstat.tcps_rexmttimeo++;
! 244: rto = TCP_REXMTVAL(tp);
! 245: if (rto < tp->t_rttmin)
! 246: rto = tp->t_rttmin;
! 247: TCPT_RANGESET(tp->t_rxtcur,
! 248: rto * tcp_backoff[tp->t_rxtshift],
! 249: tp->t_rttmin, TCPTV_REXMTMAX);
! 250: TCP_TIMER_ARM(tp, TCPT_REXMT, tp->t_rxtcur);
! 251:
! 252: /*
! 253: * If we are losing and we are trying path MTU discovery,
! 254: * try turning it off. This will avoid black holes in
! 255: * the network which suppress or fail to send "packet
! 256: * too big" ICMP messages. We should ideally do
! 257: * lots more sophisticated searching to find the right
! 258: * value here...
! 259: */
! 260: if (ip_mtudisc && tp->t_inpcb &&
! 261: TCPS_HAVEESTABLISHED(tp->t_state) &&
! 262: tp->t_rxtshift > TCP_MAXRXTSHIFT / 6) {
! 263: struct inpcb *inp = tp->t_inpcb;
! 264: struct rtentry *rt = NULL;
! 265: struct sockaddr_in sin;
! 266:
! 267: /* No data to send means path mtu is not a problem */
! 268: if (!inp->inp_socket->so_snd.sb_cc)
! 269: goto leave;
! 270:
! 271: rt = in_pcbrtentry(inp);
! 272: /* Check if path MTU discovery is disabled already */
! 273: if (rt && (rt->rt_flags & RTF_HOST) &&
! 274: (rt->rt_rmx.rmx_locks & RTV_MTU))
! 275: goto leave;
! 276:
! 277: rt = NULL;
! 278: switch(tp->pf) {
! 279: #ifdef INET6
! 280: case PF_INET6:
! 281: /*
! 282: * We can not turn off path MTU for IPv6.
! 283: * Do nothing for now, maybe lower to
! 284: * minimum MTU.
! 285: */
! 286: break;
! 287: #endif
! 288: case PF_INET:
! 289: bzero(&sin, sizeof(struct sockaddr_in));
! 290: sin.sin_family = AF_INET;
! 291: sin.sin_len = sizeof(struct sockaddr_in);
! 292: sin.sin_addr = inp->inp_faddr;
! 293: rt = icmp_mtudisc_clone(sintosa(&sin));
! 294: break;
! 295: }
! 296: if (rt != NULL) {
! 297: /* Disable path MTU discovery */
! 298: if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
! 299: rt->rt_rmx.rmx_locks |= RTV_MTU;
! 300: in_rtchange(inp, 0);
! 301: }
! 302:
! 303: rtfree(rt);
! 304: }
! 305: leave:
! 306: ;
! 307: }
! 308:
! 309: /*
! 310: * If losing, let the lower level know and try for
! 311: * a better route. Also, if we backed off this far,
! 312: * our srtt estimate is probably bogus. Clobber it
! 313: * so we'll take the next rtt measurement as our srtt;
! 314: * move the current srtt into rttvar to keep the current
! 315: * retransmit times until then.
! 316: */
! 317: if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
! 318: in_losing(tp->t_inpcb);
! 319: tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
! 320: tp->t_srtt = 0;
! 321: }
! 322: tp->snd_nxt = tp->snd_una;
! 323: #if defined(TCP_SACK)
! 324: /*
! 325: * Note: We overload snd_last to function also as the
! 326: * snd_last variable described in RFC 2582
! 327: */
! 328: tp->snd_last = tp->snd_max;
! 329: #endif /* TCP_SACK */
! 330: /*
! 331: * If timing a segment in this window, stop the timer.
! 332: */
! 333: tp->t_rtttime = 0;
! 334: #ifdef TCP_ECN
! 335: /*
! 336: * if ECN is enabled, there might be a broken firewall which
! 337: * blocks ecn packets. fall back to non-ecn.
! 338: */
! 339: if ((tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_SYN_RECEIVED)
! 340: && tcp_do_ecn && !(tp->t_flags & TF_DISABLE_ECN))
! 341: tp->t_flags |= TF_DISABLE_ECN;
! 342: #endif
! 343: /*
! 344: * Close the congestion window down to one segment
! 345: * (we'll open it by one segment for each ack we get).
! 346: * Since we probably have a window's worth of unacked
! 347: * data accumulated, this "slow start" keeps us from
! 348: * dumping all that data as back-to-back packets (which
! 349: * might overwhelm an intermediate gateway).
! 350: *
! 351: * There are two phases to the opening: Initially we
! 352: * open by one mss on each ack. This makes the window
! 353: * size increase exponentially with time. If the
! 354: * window is larger than the path can handle, this
! 355: * exponential growth results in dropped packet(s)
! 356: * almost immediately. To get more time between
! 357: * drops but still "push" the network to take advantage
! 358: * of improving conditions, we switch from exponential
! 359: * to linear window opening at some threshold size.
! 360: * For a threshold, we use half the current window
! 361: * size, truncated to a multiple of the mss.
! 362: *
! 363: * (the minimum cwnd that will give us exponential
! 364: * growth is 2 mss. We don't allow the threshold
! 365: * to go below this.)
! 366: */
! 367: {
! 368: u_long win = ulmin(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg;
! 369: if (win < 2)
! 370: win = 2;
! 371: tp->snd_cwnd = tp->t_maxseg;
! 372: tp->snd_ssthresh = win * tp->t_maxseg;
! 373: tp->t_dupacks = 0;
! 374: #ifdef TCP_ECN
! 375: tp->snd_last = tp->snd_max;
! 376: tp->t_flags |= TF_SEND_CWR;
! 377: #endif
! 378: #if 1 /* TCP_ECN */
! 379: tcpstat.tcps_cwr_timeout++;
! 380: #endif
! 381: }
! 382: (void) tcp_output(tp);
! 383:
! 384: out:
! 385: splx(s);
! 386: }
! 387:
! 388: void
! 389: tcp_timer_persist(void *arg)
! 390: {
! 391: struct tcpcb *tp = arg;
! 392: uint32_t rto;
! 393: int s;
! 394:
! 395: s = splsoftnet();
! 396: if ((tp->t_flags & TF_DEAD) ||
! 397: TCP_TIMER_ISARMED(tp, TCPT_REXMT)) {
! 398: splx(s);
! 399: return;
! 400: }
! 401: tcpstat.tcps_persisttimeo++;
! 402: /*
! 403: * Hack: if the peer is dead/unreachable, we do not
! 404: * time out if the window is closed. After a full
! 405: * backoff, drop the connection if the idle time
! 406: * (no responses to probes) reaches the maximum
! 407: * backoff that we would use if retransmitting.
! 408: */
! 409: rto = TCP_REXMTVAL(tp);
! 410: if (rto < tp->t_rttmin)
! 411: rto = tp->t_rttmin;
! 412: if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
! 413: ((tcp_now - tp->t_rcvtime) >= tcp_maxpersistidle ||
! 414: (tcp_now - tp->t_rcvtime) >= rto * tcp_totbackoff)) {
! 415: tcpstat.tcps_persistdrop++;
! 416: tp = tcp_drop(tp, ETIMEDOUT);
! 417: goto out;
! 418: }
! 419: tcp_setpersist(tp);
! 420: tp->t_force = 1;
! 421: (void) tcp_output(tp);
! 422: tp->t_force = 0;
! 423: out:
! 424: splx(s);
! 425: }
! 426:
! 427: void
! 428: tcp_timer_keep(void *arg)
! 429: {
! 430: struct tcpcb *tp = arg;
! 431: int s;
! 432:
! 433: s = splsoftnet();
! 434: if (tp->t_flags & TF_DEAD) {
! 435: splx(s);
! 436: return;
! 437: }
! 438:
! 439: tcpstat.tcps_keeptimeo++;
! 440: if (TCPS_HAVEESTABLISHED(tp->t_state) == 0)
! 441: goto dropit;
! 442: if (tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE &&
! 443: tp->t_state <= TCPS_CLOSING) {
! 444: if ((tcp_maxidle > 0) &&
! 445: ((tcp_now - tp->t_rcvtime) >= tcp_keepidle + tcp_maxidle))
! 446: goto dropit;
! 447: /*
! 448: * Send a packet designed to force a response
! 449: * if the peer is up and reachable:
! 450: * either an ACK if the connection is still alive,
! 451: * or an RST if the peer has closed the connection
! 452: * due to timeout or reboot.
! 453: * Using sequence number tp->snd_una-1
! 454: * causes the transmitted zero-length segment
! 455: * to lie outside the receive window;
! 456: * by the protocol spec, this requires the
! 457: * correspondent TCP to respond.
! 458: */
! 459: tcpstat.tcps_keepprobe++;
! 460: #ifdef TCP_COMPAT_42
! 461: /*
! 462: * The keepalive packet must have nonzero length
! 463: * to get a 4.2 host to respond.
! 464: */
! 465: tcp_respond(tp, mtod(tp->t_template, caddr_t),
! 466: (struct mbuf *)NULL, tp->rcv_nxt - 1, tp->snd_una - 1, 0);
! 467: #else
! 468: tcp_respond(tp, mtod(tp->t_template, caddr_t),
! 469: (struct mbuf *)NULL, tp->rcv_nxt, tp->snd_una - 1, 0);
! 470: #endif
! 471: TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepintvl);
! 472: } else
! 473: TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle);
! 474:
! 475: splx(s);
! 476: return;
! 477:
! 478: dropit:
! 479: tcpstat.tcps_keepdrops++;
! 480: tp = tcp_drop(tp, ETIMEDOUT);
! 481:
! 482: splx(s);
! 483: }
! 484:
! 485: void
! 486: tcp_timer_2msl(void *arg)
! 487: {
! 488: struct tcpcb *tp = arg;
! 489: int s;
! 490:
! 491: s = splsoftnet();
! 492: if (tp->t_flags & TF_DEAD) {
! 493: splx(s);
! 494: return;
! 495: }
! 496:
! 497: #ifdef TCP_SACK
! 498: tcp_timer_freesack(tp);
! 499: #endif
! 500:
! 501: if (tp->t_state != TCPS_TIME_WAIT &&
! 502: ((tcp_maxidle == 0) || ((tcp_now - tp->t_rcvtime) <= tcp_maxidle)))
! 503: TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_keepintvl);
! 504: else
! 505: tp = tcp_close(tp);
! 506:
! 507: splx(s);
! 508: }
CVSweb