Annotation of sys/kern/kern_clock.c, Revision 1.1.1.1
1.1 nbrk 1: /* $OpenBSD: kern_clock.c,v 1.64 2007/05/16 17:27:30 art Exp $ */
2: /* $NetBSD: kern_clock.c,v 1.34 1996/06/09 04:51:03 briggs Exp $ */
3:
4: /*-
5: * Copyright (c) 1982, 1986, 1991, 1993
6: * The Regents of the University of California. All rights reserved.
7: * (c) UNIX System Laboratories, Inc.
8: * All or some portions of this file are derived from material licensed
9: * to the University of California by American Telephone and Telegraph
10: * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11: * the permission of UNIX System Laboratories, Inc.
12: *
13: * Redistribution and use in source and binary forms, with or without
14: * modification, are permitted provided that the following conditions
15: * are met:
16: * 1. Redistributions of source code must retain the above copyright
17: * notice, this list of conditions and the following disclaimer.
18: * 2. Redistributions in binary form must reproduce the above copyright
19: * notice, this list of conditions and the following disclaimer in the
20: * documentation and/or other materials provided with the distribution.
21: * 3. Neither the name of the University nor the names of its contributors
22: * may be used to endorse or promote products derived from this software
23: * without specific prior written permission.
24: *
25: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35: * SUCH DAMAGE.
36: *
37: * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94
38: */
39:
40: #include <sys/param.h>
41: #include <sys/systm.h>
42: #include <sys/dkstat.h>
43: #include <sys/timeout.h>
44: #include <sys/kernel.h>
45: #include <sys/limits.h>
46: #include <sys/proc.h>
47: #include <sys/user.h>
48: #include <sys/resourcevar.h>
49: #include <sys/signalvar.h>
50: #include <uvm/uvm_extern.h>
51: #include <sys/sysctl.h>
52: #include <sys/sched.h>
53: #ifdef __HAVE_TIMECOUNTER
54: #include <sys/timetc.h>
55: #endif
56:
57: #include <machine/cpu.h>
58:
59: #ifdef GPROF
60: #include <sys/gmon.h>
61: #endif
62:
63: /*
64: * Clock handling routines.
65: *
66: * This code is written to operate with two timers that run independently of
67: * each other. The main clock, running hz times per second, is used to keep
68: * track of real time. The second timer handles kernel and user profiling,
69: * and does resource use estimation. If the second timer is programmable,
70: * it is randomized to avoid aliasing between the two clocks. For example,
71: * the randomization prevents an adversary from always giving up the cpu
72: * just before its quantum expires. Otherwise, it would never accumulate
73: * cpu ticks. The mean frequency of the second timer is stathz.
74: *
75: * If no second timer exists, stathz will be zero; in this case we drive
76: * profiling and statistics off the main clock. This WILL NOT be accurate;
77: * do not do it unless absolutely necessary.
78: *
79: * The statistics clock may (or may not) be run at a higher rate while
80: * profiling. This profile clock runs at profhz. We require that profhz
81: * be an integral multiple of stathz.
82: *
83: * If the statistics clock is running fast, it must be divided by the ratio
84: * profhz/stathz for statistics. (For profiling, every tick counts.)
85: */
86:
87: /*
88: * Bump a timeval by a small number of usec's.
89: */
90: #define BUMPTIME(t, usec) { \
91: volatile struct timeval *tp = (t); \
92: long us; \
93: \
94: tp->tv_usec = us = tp->tv_usec + (usec); \
95: if (us >= 1000000) { \
96: tp->tv_usec = us - 1000000; \
97: tp->tv_sec++; \
98: } \
99: }
100:
101: int stathz;
102: int schedhz;
103: int profhz;
104: int profprocs;
105: int ticks;
106: static int psdiv, pscnt; /* prof => stat divider */
107: int psratio; /* ratio: prof / stat */
108:
109: long cp_time[CPUSTATES];
110:
111: #ifndef __HAVE_TIMECOUNTER
112: int tickfix, tickfixinterval; /* used if tick not really integral */
113: static int tickfixcnt; /* accumulated fractional error */
114:
115: volatile time_t time_second;
116: volatile time_t time_uptime;
117:
118: volatile struct timeval time
119: __attribute__((__aligned__(__alignof__(quad_t))));
120: volatile struct timeval mono_time;
121: #endif
122:
123: #ifdef __HAVE_GENERIC_SOFT_INTERRUPTS
124: void *softclock_si;
125: void generic_softclock(void *);
126:
127: void
128: generic_softclock(void *ignore)
129: {
130: /*
131: * XXX - don't commit, just a dummy wrapper until we learn everyone
132: * deal with a changed proto for softclock().
133: */
134: softclock();
135: }
136: #endif
137:
138: /*
139: * Initialize clock frequencies and start both clocks running.
140: */
141: void
142: initclocks(void)
143: {
144: int i;
145: #ifdef __HAVE_TIMECOUNTER
146: extern void inittimecounter(void);
147: #endif
148:
149: #ifdef __HAVE_GENERIC_SOFT_INTERRUPTS
150: softclock_si = softintr_establish(IPL_SOFTCLOCK, generic_softclock, NULL);
151: if (softclock_si == NULL)
152: panic("initclocks: unable to register softclock intr");
153: #endif
154:
155: /*
156: * Set divisors to 1 (normal case) and let the machine-specific
157: * code do its bit.
158: */
159: psdiv = pscnt = 1;
160: cpu_initclocks();
161:
162: /*
163: * Compute profhz/stathz, and fix profhz if needed.
164: */
165: i = stathz ? stathz : hz;
166: if (profhz == 0)
167: profhz = i;
168: psratio = profhz / i;
169: #ifdef __HAVE_TIMECOUNTER
170: inittimecounter();
171: #endif
172: }
173:
174: /*
175: * hardclock does the accounting needed for ITIMER_PROF and ITIMER_VIRTUAL.
176: * We don't want to send signals with psignal from hardclock because it makes
177: * MULTIPROCESSOR locking very complicated. Instead we use a small trick
178: * to send the signals safely and without blocking too many interrupts
179: * while doing that (signal handling can be heavy).
180: *
181: * hardclock detects that the itimer has expired, and schedules a timeout
182: * to deliver the signal. This works because of the following reasons:
183: * - The timeout structures can be in struct pstats because the timers
184: * can be only activated on curproc (never swapped). Swapout can
185: * only happen from a kernel thread and softclock runs before threads
186: * are scheduled.
187: * - The timeout can be scheduled with a 1 tick time because we're
188: * doing it before the timeout processing in hardclock. So it will
189: * be scheduled to run as soon as possible.
190: * - The timeout will be run in softclock which will run before we
191: * return to userland and process pending signals.
192: * - If the system is so busy that several VIRTUAL/PROF ticks are
193: * sent before softclock processing, we'll send only one signal.
194: * But if we'd send the signal from hardclock only one signal would
195: * be delivered to the user process. So userland will only see one
196: * signal anyway.
197: */
198:
199: void
200: virttimer_trampoline(void *v)
201: {
202: struct proc *p = v;
203:
204: psignal(p, SIGVTALRM);
205: }
206:
207: void
208: proftimer_trampoline(void *v)
209: {
210: struct proc *p = v;
211:
212: psignal(p, SIGPROF);
213: }
214:
215: /*
216: * The real-time timer, interrupting hz times per second.
217: */
218: void
219: hardclock(struct clockframe *frame)
220: {
221: struct proc *p;
222: #ifndef __HAVE_TIMECOUNTER
223: int delta;
224: extern int tickdelta;
225: extern long timedelta;
226: extern int64_t ntp_tick_permanent;
227: extern int64_t ntp_tick_acc;
228: #endif
229: struct cpu_info *ci = curcpu();
230:
231: p = curproc;
232: if (p && ((p->p_flag & (P_SYSTEM | P_WEXIT)) == 0)) {
233: struct pstats *pstats;
234:
235: /*
236: * Run current process's virtual and profile time, as needed.
237: */
238: pstats = p->p_stats;
239: if (CLKF_USERMODE(frame) &&
240: timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
241: itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0)
242: timeout_add(&pstats->p_virt_to, 1);
243: if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
244: itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0)
245: timeout_add(&pstats->p_prof_to, 1);
246: }
247:
248: /*
249: * If no separate statistics clock is available, run it from here.
250: */
251: if (stathz == 0)
252: statclock(frame);
253:
254: if (--ci->ci_schedstate.spc_rrticks <= 0)
255: roundrobin(ci);
256:
257: /*
258: * If we are not the primary CPU, we're not allowed to do
259: * any more work.
260: */
261: if (CPU_IS_PRIMARY(ci) == 0)
262: return;
263:
264: #ifndef __HAVE_TIMECOUNTER
265: /*
266: * Increment the time-of-day. The increment is normally just
267: * ``tick''. If the machine is one which has a clock frequency
268: * such that ``hz'' would not divide the second evenly into
269: * milliseconds, a periodic adjustment must be applied. Finally,
270: * if we are still adjusting the time (see adjtime()),
271: * ``tickdelta'' may also be added in.
272: */
273:
274: delta = tick;
275:
276: if (tickfix) {
277: tickfixcnt += tickfix;
278: if (tickfixcnt >= tickfixinterval) {
279: delta++;
280: tickfixcnt -= tickfixinterval;
281: }
282: }
283: /* Imprecise 4bsd adjtime() handling */
284: if (timedelta != 0) {
285: delta += tickdelta;
286: timedelta -= tickdelta;
287: }
288:
289: /*
290: * ntp_tick_permanent accumulates the clock correction each
291: * tick. The unit is ns per tick shifted left 32 bits. If we have
292: * accumulated more than 1us, we bump delta in the right
293: * direction. Use a loop to avoid long long div; typicallly
294: * the loops will be executed 0 or 1 iteration.
295: */
296: if (ntp_tick_permanent != 0) {
297: ntp_tick_acc += ntp_tick_permanent;
298: while (ntp_tick_acc >= (1000LL << 32)) {
299: delta++;
300: ntp_tick_acc -= (1000LL << 32);
301: }
302: while (ntp_tick_acc <= -(1000LL << 32)) {
303: delta--;
304: ntp_tick_acc += (1000LL << 32);
305: }
306: }
307:
308: BUMPTIME(&time, delta);
309: BUMPTIME(&mono_time, delta);
310: time_second = time.tv_sec;
311: time_uptime = mono_time.tv_sec;
312: #else
313: tc_ticktock();
314: #endif
315:
316: /*
317: * Update real-time timeout queue.
318: * Process callouts at a very low cpu priority, so we don't keep the
319: * relatively high clock interrupt priority any longer than necessary.
320: */
321: if (timeout_hardclock_update()) {
322: #ifdef __HAVE_GENERIC_SOFT_INTERRUPTS
323: softintr_schedule(softclock_si);
324: #else
325: setsoftclock();
326: #endif
327: }
328: }
329:
330: /*
331: * Compute number of hz until specified time. Used to
332: * compute the second argument to timeout_add() from an absolute time.
333: */
334: int
335: hzto(struct timeval *tv)
336: {
337: struct timeval now;
338: unsigned long ticks;
339: long sec, usec;
340:
341: /*
342: * If the number of usecs in the whole seconds part of the time
343: * difference fits in a long, then the total number of usecs will
344: * fit in an unsigned long. Compute the total and convert it to
345: * ticks, rounding up and adding 1 to allow for the current tick
346: * to expire. Rounding also depends on unsigned long arithmetic
347: * to avoid overflow.
348: *
349: * Otherwise, if the number of ticks in the whole seconds part of
350: * the time difference fits in a long, then convert the parts to
351: * ticks separately and add, using similar rounding methods and
352: * overflow avoidance. This method would work in the previous
353: * case but it is slightly slower and assumes that hz is integral.
354: *
355: * Otherwise, round the time difference down to the maximum
356: * representable value.
357: *
358: * If ints have 32 bits, then the maximum value for any timeout in
359: * 10ms ticks is 248 days.
360: */
361: getmicrotime(&now);
362: sec = tv->tv_sec - now.tv_sec;
363: usec = tv->tv_usec - now.tv_usec;
364: if (usec < 0) {
365: sec--;
366: usec += 1000000;
367: }
368: if (sec < 0 || (sec == 0 && usec <= 0)) {
369: ticks = 0;
370: } else if (sec <= LONG_MAX / 1000000)
371: ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1))
372: / tick + 1;
373: else if (sec <= LONG_MAX / hz)
374: ticks = sec * hz
375: + ((unsigned long)usec + (tick - 1)) / tick + 1;
376: else
377: ticks = LONG_MAX;
378: if (ticks > INT_MAX)
379: ticks = INT_MAX;
380: return ((int)ticks);
381: }
382:
383: /*
384: * Compute number of hz in the specified amount of time.
385: */
386: int
387: tvtohz(struct timeval *tv)
388: {
389: unsigned long ticks;
390: long sec, usec;
391:
392: /*
393: * If the number of usecs in the whole seconds part of the time
394: * fits in a long, then the total number of usecs will
395: * fit in an unsigned long. Compute the total and convert it to
396: * ticks, rounding up and adding 1 to allow for the current tick
397: * to expire. Rounding also depends on unsigned long arithmetic
398: * to avoid overflow.
399: *
400: * Otherwise, if the number of ticks in the whole seconds part of
401: * the time fits in a long, then convert the parts to
402: * ticks separately and add, using similar rounding methods and
403: * overflow avoidance. This method would work in the previous
404: * case but it is slightly slower and assumes that hz is integral.
405: *
406: * Otherwise, round the time down to the maximum
407: * representable value.
408: *
409: * If ints have 32 bits, then the maximum value for any timeout in
410: * 10ms ticks is 248 days.
411: */
412: sec = tv->tv_sec;
413: usec = tv->tv_usec;
414: if (sec < 0 || (sec == 0 && usec <= 0))
415: ticks = 0;
416: else if (sec <= LONG_MAX / 1000000)
417: ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1))
418: / tick + 1;
419: else if (sec <= LONG_MAX / hz)
420: ticks = sec * hz
421: + ((unsigned long)usec + (tick - 1)) / tick + 1;
422: else
423: ticks = LONG_MAX;
424: if (ticks > INT_MAX)
425: ticks = INT_MAX;
426: return ((int)ticks);
427: }
428:
429: /*
430: * Start profiling on a process.
431: *
432: * Kernel profiling passes proc0 which never exits and hence
433: * keeps the profile clock running constantly.
434: */
435: void
436: startprofclock(struct proc *p)
437: {
438: int s;
439:
440: if ((p->p_flag & P_PROFIL) == 0) {
441: atomic_setbits_int(&p->p_flag, P_PROFIL);
442: if (++profprocs == 1 && stathz != 0) {
443: s = splstatclock();
444: psdiv = pscnt = psratio;
445: setstatclockrate(profhz);
446: splx(s);
447: }
448: }
449: }
450:
451: /*
452: * Stop profiling on a process.
453: */
454: void
455: stopprofclock(struct proc *p)
456: {
457: int s;
458:
459: if (p->p_flag & P_PROFIL) {
460: atomic_clearbits_int(&p->p_flag, P_PROFIL);
461: if (--profprocs == 0 && stathz != 0) {
462: s = splstatclock();
463: psdiv = pscnt = 1;
464: setstatclockrate(stathz);
465: splx(s);
466: }
467: }
468: }
469:
470: /*
471: * Statistics clock. Grab profile sample, and if divider reaches 0,
472: * do process and kernel statistics.
473: */
474: void
475: statclock(struct clockframe *frame)
476: {
477: #ifdef GPROF
478: struct gmonparam *g;
479: int i;
480: #endif
481: struct cpu_info *ci = curcpu();
482: struct schedstate_percpu *spc = &ci->ci_schedstate;
483: struct proc *p = curproc;
484:
485: /*
486: * Notice changes in divisor frequency, and adjust clock
487: * frequency accordingly.
488: */
489: if (spc->spc_psdiv != psdiv) {
490: spc->spc_psdiv = psdiv;
491: spc->spc_pscnt = psdiv;
492: if (psdiv == 1) {
493: setstatclockrate(stathz);
494: } else {
495: setstatclockrate(profhz);
496: }
497: }
498:
499: if (CLKF_USERMODE(frame)) {
500: if (p->p_flag & P_PROFIL)
501: addupc_intr(p, CLKF_PC(frame));
502: if (--spc->spc_pscnt > 0)
503: return;
504: /*
505: * Came from user mode; CPU was in user state.
506: * If this process is being profiled record the tick.
507: */
508: p->p_uticks++;
509: if (p->p_nice > NZERO)
510: spc->spc_cp_time[CP_NICE]++;
511: else
512: spc->spc_cp_time[CP_USER]++;
513: } else {
514: #ifdef GPROF
515: /*
516: * Kernel statistics are just like addupc_intr, only easier.
517: */
518: g = &_gmonparam;
519: if (g->state == GMON_PROF_ON) {
520: i = CLKF_PC(frame) - g->lowpc;
521: if (i < g->textsize) {
522: i /= HISTFRACTION * sizeof(*g->kcount);
523: g->kcount[i]++;
524: }
525: }
526: #endif
527: #if defined(PROC_PC)
528: if (p != NULL && p->p_flag & P_PROFIL)
529: addupc_intr(p, PROC_PC(p));
530: #endif
531: if (--spc->spc_pscnt > 0)
532: return;
533: /*
534: * Came from kernel mode, so we were:
535: * - handling an interrupt,
536: * - doing syscall or trap work on behalf of the current
537: * user process, or
538: * - spinning in the idle loop.
539: * Whichever it is, charge the time as appropriate.
540: * Note that we charge interrupts to the current process,
541: * regardless of whether they are ``for'' that process,
542: * so that we know how much of its real time was spent
543: * in ``non-process'' (i.e., interrupt) work.
544: */
545: if (CLKF_INTR(frame)) {
546: if (p != NULL)
547: p->p_iticks++;
548: spc->spc_cp_time[CP_INTR]++;
549: } else if (p != NULL) {
550: p->p_sticks++;
551: spc->spc_cp_time[CP_SYS]++;
552: } else
553: spc->spc_cp_time[CP_IDLE]++;
554: }
555: spc->spc_pscnt = psdiv;
556:
557: if (p != NULL) {
558: p->p_cpticks++;
559: /*
560: * If no schedclock is provided, call it here at ~~12-25 Hz;
561: * ~~16 Hz is best
562: */
563: if (schedhz == 0) {
564: if ((++curcpu()->ci_schedstate.spc_schedticks & 3) ==
565: 0)
566: schedclock(p);
567: }
568: }
569: }
570:
571: /*
572: * Return information about system clocks.
573: */
574: int
575: sysctl_clockrate(char *where, size_t *sizep)
576: {
577: struct clockinfo clkinfo;
578:
579: /*
580: * Construct clockinfo structure.
581: */
582: clkinfo.tick = tick;
583: clkinfo.tickadj = tickadj;
584: clkinfo.hz = hz;
585: clkinfo.profhz = profhz;
586: clkinfo.stathz = stathz ? stathz : hz;
587: return (sysctl_rdstruct(where, sizep, NULL, &clkinfo, sizeof(clkinfo)));
588: }
589:
590: #ifndef __HAVE_TIMECOUNTER
591: /*
592: * Placeholders until everyone uses the timecounters code.
593: * Won't improve anything except maybe removing a bunch of bugs in fixed code.
594: */
595:
596: void
597: getmicrotime(struct timeval *tvp)
598: {
599: int s;
600:
601: s = splhigh();
602: *tvp = time;
603: splx(s);
604: }
605:
606: void
607: nanotime(struct timespec *tsp)
608: {
609: struct timeval tv;
610:
611: microtime(&tv);
612: TIMEVAL_TO_TIMESPEC(&tv, tsp);
613: }
614:
615: void
616: getnanotime(struct timespec *tsp)
617: {
618: struct timeval tv;
619:
620: getmicrotime(&tv);
621: TIMEVAL_TO_TIMESPEC(&tv, tsp);
622: }
623:
624: void
625: nanouptime(struct timespec *tsp)
626: {
627: struct timeval tv;
628:
629: microuptime(&tv);
630: TIMEVAL_TO_TIMESPEC(&tv, tsp);
631: }
632:
633:
634: void
635: getnanouptime(struct timespec *tsp)
636: {
637: struct timeval tv;
638:
639: getmicrouptime(&tv);
640: TIMEVAL_TO_TIMESPEC(&tv, tsp);
641: }
642:
643: void
644: microuptime(struct timeval *tvp)
645: {
646: struct timeval tv;
647:
648: microtime(&tv);
649: timersub(&tv, &boottime, tvp);
650: }
651:
652: void
653: getmicrouptime(struct timeval *tvp)
654: {
655: int s;
656:
657: s = splhigh();
658: *tvp = mono_time;
659: splx(s);
660: }
661: #endif /* __HAVE_TIMECOUNTER */
CVSweb