Annotation of sys/netinet/tcp_timer.c, Revision 1.1.1.1
1.1 nbrk 1: /* $OpenBSD: tcp_timer.c,v 1.39 2007/06/15 18:23:07 markus Exp $ */
2: /* $NetBSD: tcp_timer.c,v 1.14 1996/02/13 23:44:09 christos Exp $ */
3:
4: /*
5: * Copyright (c) 1982, 1986, 1988, 1990, 1993
6: * The Regents of the University of California. All rights reserved.
7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
16: * 3. Neither the name of the University nor the names of its contributors
17: * may be used to endorse or promote products derived from this software
18: * without specific prior written permission.
19: *
20: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30: * SUCH DAMAGE.
31: *
32: * @(#)tcp_timer.c 8.1 (Berkeley) 6/10/93
33: */
34:
35: #include <sys/param.h>
36: #include <sys/systm.h>
37: #include <sys/mbuf.h>
38: #include <sys/socket.h>
39: #include <sys/socketvar.h>
40: #include <sys/protosw.h>
41: #include <sys/kernel.h>
42:
43: #include <net/route.h>
44:
45: #include <netinet/in.h>
46: #include <netinet/in_systm.h>
47: #include <netinet/ip.h>
48: #include <netinet/in_pcb.h>
49: #include <netinet/ip_var.h>
50: #include <netinet/tcp.h>
51: #include <netinet/tcp_fsm.h>
52: #include <netinet/tcp_timer.h>
53: #include <netinet/tcp_var.h>
54: #include <netinet/ip_icmp.h>
55: #include <netinet/tcp_seq.h>
56:
57: int tcp_keepidle;
58: int tcp_keepintvl;
59: int tcp_maxpersistidle; /* max idle time in persist */
60: int tcp_maxidle;
61:
62: /*
63: * Time to delay the ACK. This is initialized in tcp_init(), unless
64: * its patched.
65: */
66: int tcp_delack_ticks;
67:
68: void tcp_timer_rexmt(void *);
69: void tcp_timer_persist(void *);
70: void tcp_timer_keep(void *);
71: void tcp_timer_2msl(void *);
72:
73: const tcp_timer_func_t tcp_timer_funcs[TCPT_NTIMERS] = {
74: tcp_timer_rexmt,
75: tcp_timer_persist,
76: tcp_timer_keep,
77: tcp_timer_2msl,
78: };
79:
80: /*
81: * Timer state initialization, called from tcp_init().
82: */
83: void
84: tcp_timer_init(void)
85: {
86:
87: if (tcp_keepidle == 0)
88: tcp_keepidle = TCPTV_KEEP_IDLE;
89:
90: if (tcp_keepintvl == 0)
91: tcp_keepintvl = TCPTV_KEEPINTVL;
92:
93: if (tcp_maxpersistidle == 0)
94: tcp_maxpersistidle = TCPTV_KEEP_IDLE;
95:
96: if (tcp_delack_ticks == 0)
97: tcp_delack_ticks = TCP_DELACK_TICKS;
98: }
99:
100: /*
101: * Callout to process delayed ACKs for a TCPCB.
102: */
103: void
104: tcp_delack(void *arg)
105: {
106: struct tcpcb *tp = arg;
107: int s;
108:
109: /*
110: * If tcp_output() wasn't able to transmit the ACK
111: * for whatever reason, it will restart the delayed
112: * ACK callout.
113: */
114:
115: s = splsoftnet();
116: if (tp->t_flags & TF_DEAD) {
117: splx(s);
118: return;
119: }
120: tp->t_flags |= TF_ACKNOW;
121: (void) tcp_output(tp);
122: splx(s);
123: }
124:
125: /*
126: * Tcp protocol timeout routine called every 500 ms.
127: * Updates the timers in all active tcb's and
128: * causes finite state machine actions if timers expire.
129: */
130: void
131: tcp_slowtimo()
132: {
133: int s;
134:
135: s = splsoftnet();
136: tcp_maxidle = TCPTV_KEEPCNT * tcp_keepintvl;
137: #ifdef TCP_COMPAT_42
138: tcp_iss += TCP_ISSINCR/PR_SLOWHZ; /* increment iss */
139: if ((int)tcp_iss < 0)
140: tcp_iss = 0; /* XXX */
141: #else
142: tcp_iss += TCP_ISSINCR2/PR_SLOWHZ; /* increment iss */
143: #endif /* TCP_COMPAT_42 */
144: tcp_now++; /* for timestamps */
145: splx(s);
146: }
147:
148: /*
149: * Cancel all timers for TCP tp.
150: */
151: void
152: tcp_canceltimers(tp)
153: struct tcpcb *tp;
154: {
155: int i;
156:
157: for (i = 0; i < TCPT_NTIMERS; i++)
158: TCP_TIMER_DISARM(tp, i);
159: }
160:
161: int tcp_backoff[TCP_MAXRXTSHIFT + 1] =
162: { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
163:
164: int tcp_totbackoff = 511; /* sum of tcp_backoff[] */
165:
166: /*
167: * TCP timer processing.
168: */
169:
170: #ifdef TCP_SACK
171: void tcp_timer_freesack(struct tcpcb *);
172:
173: void
174: tcp_timer_freesack(struct tcpcb *tp)
175: {
176: struct sackhole *p, *q;
177: /*
178: * Free SACK holes for 2MSL and REXMT timers.
179: */
180: q = tp->snd_holes;
181: while (q != NULL) {
182: p = q;
183: q = q->next;
184: pool_put(&sackhl_pool, p);
185: }
186: tp->snd_holes = 0;
187: #ifdef TCP_FACK
188: tp->snd_fack = tp->snd_una;
189: tp->retran_data = 0;
190: tp->snd_awnd = 0;
191: #endif /* TCP_FACK */
192: }
193: #endif /* TCP_SACK */
194:
195: void
196: tcp_timer_rexmt(void *arg)
197: {
198: struct tcpcb *tp = arg;
199: uint32_t rto;
200: int s;
201:
202: s = splsoftnet();
203: if (tp->t_flags & TF_DEAD) {
204: splx(s);
205: return;
206: }
207:
208: if ((tp->t_flags & TF_PMTUD_PEND) && tp->t_inpcb &&
209: SEQ_GEQ(tp->t_pmtud_th_seq, tp->snd_una) &&
210: SEQ_LT(tp->t_pmtud_th_seq, (int)(tp->snd_una + tp->t_maxseg))) {
211: extern struct sockaddr_in icmpsrc;
212: struct icmp icmp;
213:
214: tp->t_flags &= ~TF_PMTUD_PEND;
215:
216: /* XXX create fake icmp message with relevant entries */
217: icmp.icmp_nextmtu = tp->t_pmtud_nextmtu;
218: icmp.icmp_ip.ip_len = tp->t_pmtud_ip_len;
219: icmp.icmp_ip.ip_hl = tp->t_pmtud_ip_hl;
220: icmpsrc.sin_addr = tp->t_inpcb->inp_faddr;
221: icmp_mtudisc(&icmp);
222:
223: /*
224: * Notify all connections to the same peer about
225: * new mss and trigger retransmit.
226: */
227: in_pcbnotifyall(&tcbtable, sintosa(&icmpsrc), EMSGSIZE,
228: tcp_mtudisc);
229: splx(s);
230: return;
231: }
232:
233: #ifdef TCP_SACK
234: tcp_timer_freesack(tp);
235: #endif
236: if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
237: tp->t_rxtshift = TCP_MAXRXTSHIFT;
238: tcpstat.tcps_timeoutdrop++;
239: (void)tcp_drop(tp, tp->t_softerror ?
240: tp->t_softerror : ETIMEDOUT);
241: goto out;
242: }
243: tcpstat.tcps_rexmttimeo++;
244: rto = TCP_REXMTVAL(tp);
245: if (rto < tp->t_rttmin)
246: rto = tp->t_rttmin;
247: TCPT_RANGESET(tp->t_rxtcur,
248: rto * tcp_backoff[tp->t_rxtshift],
249: tp->t_rttmin, TCPTV_REXMTMAX);
250: TCP_TIMER_ARM(tp, TCPT_REXMT, tp->t_rxtcur);
251:
252: /*
253: * If we are losing and we are trying path MTU discovery,
254: * try turning it off. This will avoid black holes in
255: * the network which suppress or fail to send "packet
256: * too big" ICMP messages. We should ideally do
257: * lots more sophisticated searching to find the right
258: * value here...
259: */
260: if (ip_mtudisc && tp->t_inpcb &&
261: TCPS_HAVEESTABLISHED(tp->t_state) &&
262: tp->t_rxtshift > TCP_MAXRXTSHIFT / 6) {
263: struct inpcb *inp = tp->t_inpcb;
264: struct rtentry *rt = NULL;
265: struct sockaddr_in sin;
266:
267: /* No data to send means path mtu is not a problem */
268: if (!inp->inp_socket->so_snd.sb_cc)
269: goto leave;
270:
271: rt = in_pcbrtentry(inp);
272: /* Check if path MTU discovery is disabled already */
273: if (rt && (rt->rt_flags & RTF_HOST) &&
274: (rt->rt_rmx.rmx_locks & RTV_MTU))
275: goto leave;
276:
277: rt = NULL;
278: switch(tp->pf) {
279: #ifdef INET6
280: case PF_INET6:
281: /*
282: * We can not turn off path MTU for IPv6.
283: * Do nothing for now, maybe lower to
284: * minimum MTU.
285: */
286: break;
287: #endif
288: case PF_INET:
289: bzero(&sin, sizeof(struct sockaddr_in));
290: sin.sin_family = AF_INET;
291: sin.sin_len = sizeof(struct sockaddr_in);
292: sin.sin_addr = inp->inp_faddr;
293: rt = icmp_mtudisc_clone(sintosa(&sin));
294: break;
295: }
296: if (rt != NULL) {
297: /* Disable path MTU discovery */
298: if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
299: rt->rt_rmx.rmx_locks |= RTV_MTU;
300: in_rtchange(inp, 0);
301: }
302:
303: rtfree(rt);
304: }
305: leave:
306: ;
307: }
308:
309: /*
310: * If losing, let the lower level know and try for
311: * a better route. Also, if we backed off this far,
312: * our srtt estimate is probably bogus. Clobber it
313: * so we'll take the next rtt measurement as our srtt;
314: * move the current srtt into rttvar to keep the current
315: * retransmit times until then.
316: */
317: if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
318: in_losing(tp->t_inpcb);
319: tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
320: tp->t_srtt = 0;
321: }
322: tp->snd_nxt = tp->snd_una;
323: #if defined(TCP_SACK)
324: /*
325: * Note: We overload snd_last to function also as the
326: * snd_last variable described in RFC 2582
327: */
328: tp->snd_last = tp->snd_max;
329: #endif /* TCP_SACK */
330: /*
331: * If timing a segment in this window, stop the timer.
332: */
333: tp->t_rtttime = 0;
334: #ifdef TCP_ECN
335: /*
336: * if ECN is enabled, there might be a broken firewall which
337: * blocks ecn packets. fall back to non-ecn.
338: */
339: if ((tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_SYN_RECEIVED)
340: && tcp_do_ecn && !(tp->t_flags & TF_DISABLE_ECN))
341: tp->t_flags |= TF_DISABLE_ECN;
342: #endif
343: /*
344: * Close the congestion window down to one segment
345: * (we'll open it by one segment for each ack we get).
346: * Since we probably have a window's worth of unacked
347: * data accumulated, this "slow start" keeps us from
348: * dumping all that data as back-to-back packets (which
349: * might overwhelm an intermediate gateway).
350: *
351: * There are two phases to the opening: Initially we
352: * open by one mss on each ack. This makes the window
353: * size increase exponentially with time. If the
354: * window is larger than the path can handle, this
355: * exponential growth results in dropped packet(s)
356: * almost immediately. To get more time between
357: * drops but still "push" the network to take advantage
358: * of improving conditions, we switch from exponential
359: * to linear window opening at some threshold size.
360: * For a threshold, we use half the current window
361: * size, truncated to a multiple of the mss.
362: *
363: * (the minimum cwnd that will give us exponential
364: * growth is 2 mss. We don't allow the threshold
365: * to go below this.)
366: */
367: {
368: u_long win = ulmin(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg;
369: if (win < 2)
370: win = 2;
371: tp->snd_cwnd = tp->t_maxseg;
372: tp->snd_ssthresh = win * tp->t_maxseg;
373: tp->t_dupacks = 0;
374: #ifdef TCP_ECN
375: tp->snd_last = tp->snd_max;
376: tp->t_flags |= TF_SEND_CWR;
377: #endif
378: #if 1 /* TCP_ECN */
379: tcpstat.tcps_cwr_timeout++;
380: #endif
381: }
382: (void) tcp_output(tp);
383:
384: out:
385: splx(s);
386: }
387:
388: void
389: tcp_timer_persist(void *arg)
390: {
391: struct tcpcb *tp = arg;
392: uint32_t rto;
393: int s;
394:
395: s = splsoftnet();
396: if ((tp->t_flags & TF_DEAD) ||
397: TCP_TIMER_ISARMED(tp, TCPT_REXMT)) {
398: splx(s);
399: return;
400: }
401: tcpstat.tcps_persisttimeo++;
402: /*
403: * Hack: if the peer is dead/unreachable, we do not
404: * time out if the window is closed. After a full
405: * backoff, drop the connection if the idle time
406: * (no responses to probes) reaches the maximum
407: * backoff that we would use if retransmitting.
408: */
409: rto = TCP_REXMTVAL(tp);
410: if (rto < tp->t_rttmin)
411: rto = tp->t_rttmin;
412: if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
413: ((tcp_now - tp->t_rcvtime) >= tcp_maxpersistidle ||
414: (tcp_now - tp->t_rcvtime) >= rto * tcp_totbackoff)) {
415: tcpstat.tcps_persistdrop++;
416: tp = tcp_drop(tp, ETIMEDOUT);
417: goto out;
418: }
419: tcp_setpersist(tp);
420: tp->t_force = 1;
421: (void) tcp_output(tp);
422: tp->t_force = 0;
423: out:
424: splx(s);
425: }
426:
427: void
428: tcp_timer_keep(void *arg)
429: {
430: struct tcpcb *tp = arg;
431: int s;
432:
433: s = splsoftnet();
434: if (tp->t_flags & TF_DEAD) {
435: splx(s);
436: return;
437: }
438:
439: tcpstat.tcps_keeptimeo++;
440: if (TCPS_HAVEESTABLISHED(tp->t_state) == 0)
441: goto dropit;
442: if (tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE &&
443: tp->t_state <= TCPS_CLOSING) {
444: if ((tcp_maxidle > 0) &&
445: ((tcp_now - tp->t_rcvtime) >= tcp_keepidle + tcp_maxidle))
446: goto dropit;
447: /*
448: * Send a packet designed to force a response
449: * if the peer is up and reachable:
450: * either an ACK if the connection is still alive,
451: * or an RST if the peer has closed the connection
452: * due to timeout or reboot.
453: * Using sequence number tp->snd_una-1
454: * causes the transmitted zero-length segment
455: * to lie outside the receive window;
456: * by the protocol spec, this requires the
457: * correspondent TCP to respond.
458: */
459: tcpstat.tcps_keepprobe++;
460: #ifdef TCP_COMPAT_42
461: /*
462: * The keepalive packet must have nonzero length
463: * to get a 4.2 host to respond.
464: */
465: tcp_respond(tp, mtod(tp->t_template, caddr_t),
466: (struct mbuf *)NULL, tp->rcv_nxt - 1, tp->snd_una - 1, 0);
467: #else
468: tcp_respond(tp, mtod(tp->t_template, caddr_t),
469: (struct mbuf *)NULL, tp->rcv_nxt, tp->snd_una - 1, 0);
470: #endif
471: TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepintvl);
472: } else
473: TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle);
474:
475: splx(s);
476: return;
477:
478: dropit:
479: tcpstat.tcps_keepdrops++;
480: tp = tcp_drop(tp, ETIMEDOUT);
481:
482: splx(s);
483: }
484:
485: void
486: tcp_timer_2msl(void *arg)
487: {
488: struct tcpcb *tp = arg;
489: int s;
490:
491: s = splsoftnet();
492: if (tp->t_flags & TF_DEAD) {
493: splx(s);
494: return;
495: }
496:
497: #ifdef TCP_SACK
498: tcp_timer_freesack(tp);
499: #endif
500:
501: if (tp->t_state != TCPS_TIME_WAIT &&
502: ((tcp_maxidle == 0) || ((tcp_now - tp->t_rcvtime) <= tcp_maxidle)))
503: TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_keepintvl);
504: else
505: tp = tcp_close(tp);
506:
507: splx(s);
508: }
CVSweb