Annotation of sys/kern/subr_pool.c, Revision 1.1.1.1
1.1 nbrk 1: /* $OpenBSD: subr_pool.c,v 1.55 2007/08/16 15:18:54 art Exp $ */
2: /* $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $ */
3:
4: /*-
5: * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc.
6: * All rights reserved.
7: *
8: * This code is derived from software contributed to The NetBSD Foundation
9: * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace
10: * Simulation Facility, NASA Ames Research Center.
11: *
12: * Redistribution and use in source and binary forms, with or without
13: * modification, are permitted provided that the following conditions
14: * are met:
15: * 1. Redistributions of source code must retain the above copyright
16: * notice, this list of conditions and the following disclaimer.
17: * 2. Redistributions in binary form must reproduce the above copyright
18: * notice, this list of conditions and the following disclaimer in the
19: * documentation and/or other materials provided with the distribution.
20: * 3. All advertising materials mentioning features or use of this software
21: * must display the following acknowledgement:
22: * This product includes software developed by the NetBSD
23: * Foundation, Inc. and its contributors.
24: * 4. Neither the name of The NetBSD Foundation nor the names of its
25: * contributors may be used to endorse or promote products derived
26: * from this software without specific prior written permission.
27: *
28: * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
29: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
30: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
31: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
32: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38: * POSSIBILITY OF SUCH DAMAGE.
39: */
40:
41: #include <sys/param.h>
42: #include <sys/systm.h>
43: #include <sys/proc.h>
44: #include <sys/errno.h>
45: #include <sys/kernel.h>
46: #include <sys/malloc.h>
47: #include <sys/lock.h>
48: #include <sys/pool.h>
49: #include <sys/syslog.h>
50: #include <sys/sysctl.h>
51:
52: #include <uvm/uvm.h>
53:
54: /*
55: * XXX - for now.
56: */
57: #ifdef LOCKDEBUG
58: #define simple_lock_freecheck(a, s) do { /* nothing */ } while (0)
59: #define simple_lock_only_held(lkp, str) do { /* nothing */ } while (0)
60: #endif
61:
62: /*
63: * Pool resource management utility.
64: *
65: * Memory is allocated in pages which are split into pieces according to
66: * the pool item size. Each page is kept on one of three lists in the
67: * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages',
68: * for empty, full and partially-full pages respectively. The individual
69: * pool items are on a linked list headed by `ph_itemlist' in each page
70: * header. The memory for building the page list is either taken from
71: * the allocated pages themselves (for small pool items) or taken from
72: * an internal pool of page headers (`phpool').
73: */
74:
75: /* List of all pools */
76: TAILQ_HEAD(,pool) pool_head = TAILQ_HEAD_INITIALIZER(pool_head);
77:
78: /* Private pool for page header structures */
79: static struct pool phpool;
80:
81: /* This spin lock protects both pool_head */
82: struct simplelock pool_head_slock;
83:
84: struct pool_item_header {
85: /* Page headers */
86: LIST_ENTRY(pool_item_header)
87: ph_pagelist; /* pool page list */
88: TAILQ_HEAD(,pool_item) ph_itemlist; /* chunk list for this page */
89: SPLAY_ENTRY(pool_item_header)
90: ph_node; /* Off-page page headers */
91: int ph_nmissing; /* # of chunks in use */
92: caddr_t ph_page; /* this page's address */
93: };
94:
95: struct pool_item {
96: #ifdef DIAGNOSTIC
97: int pi_magic;
98: #endif
99: #ifdef DEADBEEF1
100: #define PI_MAGIC DEADBEEF1
101: #else
102: #define PI_MAGIC 0xdeafbeef
103: #endif
104: /* Other entries use only this list entry */
105: TAILQ_ENTRY(pool_item) pi_list;
106: };
107:
108: #define POOL_NEEDS_CATCHUP(pp) \
109: ((pp)->pr_nitems < (pp)->pr_minitems)
110:
111: /*
112: * Every pool gets a unique serial number assigned to it. If this counter
113: * wraps, we're screwed, but we shouldn't create so many pools anyway.
114: */
115: unsigned int pool_serial;
116:
117: /*
118: * Pool cache management.
119: *
120: * Pool caches provide a way for constructed objects to be cached by the
121: * pool subsystem. This can lead to performance improvements by avoiding
122: * needless object construction/destruction; it is deferred until absolutely
123: * necessary.
124: *
125: * Caches are grouped into cache groups. Each cache group references
126: * up to 16 constructed objects. When a cache allocates an object
127: * from the pool, it calls the object's constructor and places it into
128: * a cache group. When a cache group frees an object back to the pool,
129: * it first calls the object's destructor. This allows the object to
130: * persist in constructed form while freed to the cache.
131: *
132: * Multiple caches may exist for each pool. This allows a single
133: * object type to have multiple constructed forms. The pool references
134: * each cache, so that when a pool is drained by the pagedaemon, it can
135: * drain each individual cache as well. Each time a cache is drained,
136: * the most idle cache group is freed to the pool in its entirety.
137: *
138: * Pool caches are layed on top of pools. By layering them, we can avoid
139: * the complexity of cache management for pools which would not benefit
140: * from it.
141: */
142:
143: /* The cache group pool. */
144: static struct pool pcgpool;
145:
146: /* The pool cache group. */
147: #define PCG_NOBJECTS 16
148: struct pool_cache_group {
149: TAILQ_ENTRY(pool_cache_group)
150: pcg_list; /* link in the pool cache's group list */
151: u_int pcg_avail; /* # available objects */
152: /* pointers to the objects */
153: void *pcg_objects[PCG_NOBJECTS];
154: };
155:
156: void pool_cache_reclaim(struct pool_cache *);
157: void pool_cache_do_invalidate(struct pool_cache *, int,
158: void (*)(struct pool *, void *));
159:
160: int pool_catchup(struct pool *);
161: void pool_prime_page(struct pool *, caddr_t, struct pool_item_header *);
162: void pool_update_curpage(struct pool *);
163: void pool_do_put(struct pool *, void *);
164: void pr_rmpage(struct pool *, struct pool_item_header *,
165: struct pool_pagelist *);
166: int pool_chk_page(struct pool *, const char *, struct pool_item_header *);
167:
168: void *pool_allocator_alloc(struct pool *, int);
169: void pool_allocator_free(struct pool *, void *);
170:
171: #ifdef DDB
172: void pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...));
173: void pool_print1(struct pool *, const char *, int (*)(const char *, ...));
174: #endif
175:
176:
177: /*
178: * Pool log entry. An array of these is allocated in pool_init().
179: */
180: struct pool_log {
181: const char *pl_file;
182: long pl_line;
183: int pl_action;
184: #define PRLOG_GET 1
185: #define PRLOG_PUT 2
186: void *pl_addr;
187: };
188:
189: /* Number of entries in pool log buffers */
190: #ifndef POOL_LOGSIZE
191: #define POOL_LOGSIZE 10
192: #endif
193:
194: int pool_logsize = POOL_LOGSIZE;
195:
196: #ifdef POOL_DIAGNOSTIC
197: static __inline void
198: pr_log(struct pool *pp, void *v, int action, const char *file, long line)
199: {
200: int n = pp->pr_curlogentry;
201: struct pool_log *pl;
202:
203: if ((pp->pr_roflags & PR_LOGGING) == 0)
204: return;
205:
206: /*
207: * Fill in the current entry. Wrap around and overwrite
208: * the oldest entry if necessary.
209: */
210: pl = &pp->pr_log[n];
211: pl->pl_file = file;
212: pl->pl_line = line;
213: pl->pl_action = action;
214: pl->pl_addr = v;
215: if (++n >= pp->pr_logsize)
216: n = 0;
217: pp->pr_curlogentry = n;
218: }
219:
220: static void
221: pr_printlog(struct pool *pp, struct pool_item *pi,
222: int (*pr)(const char *, ...))
223: {
224: int i = pp->pr_logsize;
225: int n = pp->pr_curlogentry;
226:
227: if ((pp->pr_roflags & PR_LOGGING) == 0)
228: return;
229:
230: /*
231: * Print all entries in this pool's log.
232: */
233: while (i-- > 0) {
234: struct pool_log *pl = &pp->pr_log[n];
235: if (pl->pl_action != 0) {
236: if (pi == NULL || pi == pl->pl_addr) {
237: (*pr)("\tlog entry %d:\n", i);
238: (*pr)("\t\taction = %s, addr = %p\n",
239: pl->pl_action == PRLOG_GET ? "get" : "put",
240: pl->pl_addr);
241: (*pr)("\t\tfile: %s at line %lu\n",
242: pl->pl_file, pl->pl_line);
243: }
244: }
245: if (++n >= pp->pr_logsize)
246: n = 0;
247: }
248: }
249:
250: static __inline void
251: pr_enter(struct pool *pp, const char *file, long line)
252: {
253:
254: if (__predict_false(pp->pr_entered_file != NULL)) {
255: printf("pool %s: reentrancy at file %s line %ld\n",
256: pp->pr_wchan, file, line);
257: printf(" previous entry at file %s line %ld\n",
258: pp->pr_entered_file, pp->pr_entered_line);
259: panic("pr_enter");
260: }
261:
262: pp->pr_entered_file = file;
263: pp->pr_entered_line = line;
264: }
265:
266: static __inline void
267: pr_leave(struct pool *pp)
268: {
269:
270: if (__predict_false(pp->pr_entered_file == NULL)) {
271: printf("pool %s not entered?\n", pp->pr_wchan);
272: panic("pr_leave");
273: }
274:
275: pp->pr_entered_file = NULL;
276: pp->pr_entered_line = 0;
277: }
278:
279: static __inline void
280: pr_enter_check(struct pool *pp, int (*pr)(const char *, ...))
281: {
282:
283: if (pp->pr_entered_file != NULL)
284: (*pr)("\n\tcurrently entered from file %s line %ld\n",
285: pp->pr_entered_file, pp->pr_entered_line);
286: }
287: #else
288: #define pr_log(pp, v, action, file, line)
289: #define pr_printlog(pp, pi, pr)
290: #define pr_enter(pp, file, line)
291: #define pr_leave(pp)
292: #define pr_enter_check(pp, pr)
293: #endif /* POOL_DIAGNOSTIC */
294:
295: static __inline int
296: phtree_compare(struct pool_item_header *a, struct pool_item_header *b)
297: {
298: if (a->ph_page < b->ph_page)
299: return (-1);
300: else if (a->ph_page > b->ph_page)
301: return (1);
302: else
303: return (0);
304: }
305:
306: SPLAY_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare);
307: SPLAY_GENERATE(phtree, pool_item_header, ph_node, phtree_compare);
308:
309: /*
310: * Return the pool page header based on page address.
311: */
312: static __inline struct pool_item_header *
313: pr_find_pagehead(struct pool *pp, caddr_t page)
314: {
315: struct pool_item_header *ph, tmp;
316:
317: if ((pp->pr_roflags & PR_PHINPAGE) != 0)
318: return ((struct pool_item_header *)(page + pp->pr_phoffset));
319:
320: tmp.ph_page = page;
321: ph = SPLAY_FIND(phtree, &pp->pr_phtree, &tmp);
322: return ph;
323: }
324:
325: /*
326: * Remove a page from the pool.
327: */
328: void
329: pr_rmpage(struct pool *pp, struct pool_item_header *ph,
330: struct pool_pagelist *pq)
331: {
332: int s;
333:
334: /*
335: * If the page was idle, decrement the idle page count.
336: */
337: if (ph->ph_nmissing == 0) {
338: #ifdef DIAGNOSTIC
339: if (pp->pr_nidle == 0)
340: panic("pr_rmpage: nidle inconsistent");
341: if (pp->pr_nitems < pp->pr_itemsperpage)
342: panic("pr_rmpage: nitems inconsistent");
343: #endif
344: pp->pr_nidle--;
345: }
346:
347: pp->pr_nitems -= pp->pr_itemsperpage;
348:
349: /*
350: * Unlink a page from the pool and release it (or queue it for release).
351: */
352: LIST_REMOVE(ph, ph_pagelist);
353: if (pq) {
354: LIST_INSERT_HEAD(pq, ph, ph_pagelist);
355: } else {
356: pool_allocator_free(pp, ph->ph_page);
357: if ((pp->pr_roflags & PR_PHINPAGE) == 0) {
358: SPLAY_REMOVE(phtree, &pp->pr_phtree, ph);
359: s = splhigh();
360: pool_put(&phpool, ph);
361: splx(s);
362: }
363: }
364: pp->pr_npages--;
365: pp->pr_npagefree++;
366:
367: pool_update_curpage(pp);
368: }
369:
370: /*
371: * Initialize the given pool resource structure.
372: *
373: * We export this routine to allow other kernel parts to declare
374: * static pools that must be initialized before malloc() is available.
375: */
376: void
377: pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags,
378: const char *wchan, struct pool_allocator *palloc)
379: {
380: int off, slack;
381:
382: #ifdef POOL_DIAGNOSTIC
383: /*
384: * Always log if POOL_DIAGNOSTIC is defined.
385: */
386: if (pool_logsize != 0)
387: flags |= PR_LOGGING;
388: #endif
389:
390: #ifdef MALLOC_DEBUG
391: if ((flags & PR_DEBUG) && (ioff != 0 || align != 0))
392: flags &= ~PR_DEBUG;
393: #endif
394: /*
395: * Check arguments and construct default values.
396: */
397: if (palloc == NULL)
398: palloc = &pool_allocator_nointr;
399: if ((palloc->pa_flags & PA_INITIALIZED) == 0) {
400: if (palloc->pa_pagesz == 0)
401: palloc->pa_pagesz = PAGE_SIZE;
402:
403: TAILQ_INIT(&palloc->pa_list);
404:
405: simple_lock_init(&palloc->pa_slock);
406: palloc->pa_pagemask = ~(palloc->pa_pagesz - 1);
407: palloc->pa_pageshift = ffs(palloc->pa_pagesz) - 1;
408: palloc->pa_flags |= PA_INITIALIZED;
409: }
410:
411: if (align == 0)
412: align = ALIGN(1);
413:
414: if (size < sizeof(struct pool_item))
415: size = sizeof(struct pool_item);
416:
417: size = roundup(size, align);
418: #ifdef DIAGNOSTIC
419: if (size > palloc->pa_pagesz)
420: panic("pool_init: pool item size (%lu) too large",
421: (u_long)size);
422: #endif
423:
424: /*
425: * Initialize the pool structure.
426: */
427: LIST_INIT(&pp->pr_emptypages);
428: LIST_INIT(&pp->pr_fullpages);
429: LIST_INIT(&pp->pr_partpages);
430: TAILQ_INIT(&pp->pr_cachelist);
431: pp->pr_curpage = NULL;
432: pp->pr_npages = 0;
433: pp->pr_minitems = 0;
434: pp->pr_minpages = 0;
435: pp->pr_maxpages = 8;
436: pp->pr_roflags = flags;
437: pp->pr_flags = 0;
438: pp->pr_size = size;
439: pp->pr_align = align;
440: pp->pr_wchan = wchan;
441: pp->pr_alloc = palloc;
442: pp->pr_nitems = 0;
443: pp->pr_nout = 0;
444: pp->pr_hardlimit = UINT_MAX;
445: pp->pr_hardlimit_warning = NULL;
446: pp->pr_hardlimit_ratecap.tv_sec = 0;
447: pp->pr_hardlimit_ratecap.tv_usec = 0;
448: pp->pr_hardlimit_warning_last.tv_sec = 0;
449: pp->pr_hardlimit_warning_last.tv_usec = 0;
450: pp->pr_serial = ++pool_serial;
451: if (pool_serial == 0)
452: panic("pool_init: too much uptime");
453:
454: /*
455: * Decide whether to put the page header off page to avoid
456: * wasting too large a part of the page. Off-page page headers
457: * go on a hash table, so we can match a returned item
458: * with its header based on the page address.
459: * We use 1/16 of the page size as the threshold (XXX: tune)
460: */
461: if (pp->pr_size < palloc->pa_pagesz/16) {
462: /* Use the end of the page for the page header */
463: pp->pr_roflags |= PR_PHINPAGE;
464: pp->pr_phoffset = off = palloc->pa_pagesz -
465: ALIGN(sizeof(struct pool_item_header));
466: } else {
467: /* The page header will be taken from our page header pool */
468: pp->pr_phoffset = 0;
469: off = palloc->pa_pagesz;
470: SPLAY_INIT(&pp->pr_phtree);
471: }
472:
473: /*
474: * Alignment is to take place at `ioff' within the item. This means
475: * we must reserve up to `align - 1' bytes on the page to allow
476: * appropriate positioning of each item.
477: *
478: * Silently enforce `0 <= ioff < align'.
479: */
480: pp->pr_itemoffset = ioff = ioff % align;
481: pp->pr_itemsperpage = (off - ((align - ioff) % align)) / pp->pr_size;
482: KASSERT(pp->pr_itemsperpage != 0);
483:
484: /*
485: * Use the slack between the chunks and the page header
486: * for "cache coloring".
487: */
488: slack = off - pp->pr_itemsperpage * pp->pr_size;
489: pp->pr_maxcolor = (slack / align) * align;
490: pp->pr_curcolor = 0;
491:
492: pp->pr_nget = 0;
493: pp->pr_nfail = 0;
494: pp->pr_nput = 0;
495: pp->pr_npagealloc = 0;
496: pp->pr_npagefree = 0;
497: pp->pr_hiwat = 0;
498: pp->pr_nidle = 0;
499:
500: #ifdef POOL_DIAGNOSTIC
501: if (flags & PR_LOGGING) {
502: if (kmem_map == NULL ||
503: (pp->pr_log = malloc(pool_logsize * sizeof(struct pool_log),
504: M_TEMP, M_NOWAIT)) == NULL)
505: pp->pr_roflags &= ~PR_LOGGING;
506: pp->pr_curlogentry = 0;
507: pp->pr_logsize = pool_logsize;
508: }
509: #endif
510:
511: pp->pr_entered_file = NULL;
512: pp->pr_entered_line = 0;
513:
514: simple_lock_init(&pp->pr_slock);
515:
516: pp->pr_ipl = -1;
517:
518: /*
519: * Initialize private page header pool and cache magazine pool if we
520: * haven't done so yet.
521: * XXX LOCKING.
522: */
523: if (phpool.pr_size == 0) {
524: pool_init(&phpool, sizeof(struct pool_item_header), 0, 0,
525: 0, "phpool", NULL);
526: pool_init(&pcgpool, sizeof(struct pool_cache_group), 0, 0,
527: 0, "pcgpool", NULL);
528: }
529:
530: simple_lock_init(&pool_head_slock);
531:
532: /* Insert this into the list of all pools. */
533: simple_lock(&pool_head_slock);
534: TAILQ_INSERT_TAIL(&pool_head, pp, pr_poollist);
535: simple_unlock(&pool_head_slock);
536:
537: /* Insert into the list of pools using this allocator. */
538: simple_lock(&palloc->pa_slock);
539: TAILQ_INSERT_TAIL(&palloc->pa_list, pp, pr_alloc_list);
540: simple_unlock(&palloc->pa_slock);
541: }
542:
543: #ifdef DIAGNOSTIC
544: void
545: pool_setipl(struct pool *pp, int ipl)
546: {
547: pp->pr_ipl = ipl;
548: }
549: #endif
550:
551: /*
552: * Decommission a pool resource.
553: */
554: void
555: pool_destroy(struct pool *pp)
556: {
557: struct pool_item_header *ph;
558: struct pool_cache *pc;
559:
560: /* Locking order: pool_allocator -> pool */
561: simple_lock(&pp->pr_alloc->pa_slock);
562: TAILQ_REMOVE(&pp->pr_alloc->pa_list, pp, pr_alloc_list);
563: simple_unlock(&pp->pr_alloc->pa_slock);
564:
565: /* Destroy all caches for this pool. */
566: while ((pc = TAILQ_FIRST(&pp->pr_cachelist)) != NULL)
567: pool_cache_destroy(pc);
568:
569: #ifdef DIAGNOSTIC
570: if (pp->pr_nout != 0) {
571: pr_printlog(pp, NULL, printf);
572: panic("pool_destroy: pool busy: still out: %u",
573: pp->pr_nout);
574: }
575: #endif
576:
577: /* Remove all pages */
578: while ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL)
579: pr_rmpage(pp, ph, NULL);
580: KASSERT(LIST_EMPTY(&pp->pr_fullpages));
581: KASSERT(LIST_EMPTY(&pp->pr_partpages));
582:
583: /* Remove from global pool list */
584: simple_lock(&pool_head_slock);
585: TAILQ_REMOVE(&pool_head, pp, pr_poollist);
586: simple_unlock(&pool_head_slock);
587:
588: #ifdef POOL_DIAGNOSTIC
589: if ((pp->pr_roflags & PR_LOGGING) != 0)
590: free(pp->pr_log, M_TEMP);
591: #endif
592: }
593:
594: static struct pool_item_header *
595: pool_alloc_item_header(struct pool *pp, caddr_t storage, int flags)
596: {
597: struct pool_item_header *ph;
598: int s;
599:
600: LOCK_ASSERT(simple_lock_held(&pp->pr_slock) == 0);
601:
602: if ((pp->pr_roflags & PR_PHINPAGE) != 0)
603: ph = (struct pool_item_header *) (storage + pp->pr_phoffset);
604: else {
605: s = splhigh();
606: ph = pool_get(&phpool, flags);
607: splx(s);
608: }
609:
610: return (ph);
611: }
612:
613: /*
614: * Grab an item from the pool; must be called at appropriate spl level
615: */
616: void *
617: #ifdef POOL_DIAGNOSTIC
618: _pool_get(struct pool *pp, int flags, const char *file, long line)
619: #else
620: pool_get(struct pool *pp, int flags)
621: #endif
622: {
623: struct pool_item *pi;
624: struct pool_item_header *ph;
625: void *v;
626:
627: #ifdef DIAGNOSTIC
628: if ((flags & PR_WAITOK) != 0)
629: splassert(IPL_NONE);
630: if (pp->pr_ipl != -1)
631: splassert(pp->pr_ipl);
632: if (__predict_false(curproc == NULL && /* doing_shutdown == 0 && XXX*/
633: (flags & PR_WAITOK) != 0))
634: panic("pool_get: %s:must have NOWAIT", pp->pr_wchan);
635:
636: #ifdef LOCKDEBUG
637: if (flags & PR_WAITOK)
638: simple_lock_only_held(NULL, "pool_get(PR_WAITOK)");
639: #endif
640: #endif /* DIAGNOSTIC */
641:
642: #ifdef MALLOC_DEBUG
643: if (pp->pr_roflags & PR_DEBUG) {
644: void *addr;
645:
646: addr = NULL;
647: debug_malloc(pp->pr_size, M_DEBUG,
648: (flags & PR_WAITOK) ? M_WAITOK : M_NOWAIT, &addr);
649: return (addr);
650: }
651: #endif
652:
653: simple_lock(&pp->pr_slock);
654: pr_enter(pp, file, line);
655:
656: startover:
657: /*
658: * Check to see if we've reached the hard limit. If we have,
659: * and we can wait, then wait until an item has been returned to
660: * the pool.
661: */
662: #ifdef DIAGNOSTIC
663: if (__predict_false(pp->pr_nout > pp->pr_hardlimit)) {
664: pr_leave(pp);
665: simple_unlock(&pp->pr_slock);
666: panic("pool_get: %s: crossed hard limit", pp->pr_wchan);
667: }
668: #endif
669: if (__predict_false(pp->pr_nout == pp->pr_hardlimit)) {
670: if ((flags & PR_WAITOK) && !(flags & PR_LIMITFAIL)) {
671: /*
672: * XXX: A warning isn't logged in this case. Should
673: * it be?
674: */
675: pp->pr_flags |= PR_WANTED;
676: pr_leave(pp);
677: ltsleep(pp, PSWP, pp->pr_wchan, 0, &pp->pr_slock);
678: pr_enter(pp, file, line);
679: goto startover;
680: }
681:
682: /*
683: * Log a message that the hard limit has been hit.
684: */
685: if (pp->pr_hardlimit_warning != NULL &&
686: ratecheck(&pp->pr_hardlimit_warning_last,
687: &pp->pr_hardlimit_ratecap))
688: log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning);
689:
690: pp->pr_nfail++;
691:
692: pr_leave(pp);
693: simple_unlock(&pp->pr_slock);
694: return (NULL);
695: }
696:
697: /*
698: * The convention we use is that if `curpage' is not NULL, then
699: * it points at a non-empty bucket. In particular, `curpage'
700: * never points at a page header which has PR_PHINPAGE set and
701: * has no items in its bucket.
702: */
703: if ((ph = pp->pr_curpage) == NULL) {
704: #ifdef DIAGNOSTIC
705: if (pp->pr_nitems != 0) {
706: simple_unlock(&pp->pr_slock);
707: printf("pool_get: %s: curpage NULL, nitems %u\n",
708: pp->pr_wchan, pp->pr_nitems);
709: panic("pool_get: nitems inconsistent");
710: }
711: #endif
712:
713: /*
714: * Call the back-end page allocator for more memory.
715: * Release the pool lock, as the back-end page allocator
716: * may block.
717: */
718: pr_leave(pp);
719: simple_unlock(&pp->pr_slock);
720: v = pool_allocator_alloc(pp, flags);
721: if (__predict_true(v != NULL))
722: ph = pool_alloc_item_header(pp, v, flags);
723: simple_lock(&pp->pr_slock);
724: pr_enter(pp, file, line);
725:
726: if (__predict_false(v == NULL || ph == NULL)) {
727: if (v != NULL)
728: pool_allocator_free(pp, v);
729:
730: /*
731: * We were unable to allocate a page or item
732: * header, but we released the lock during
733: * allocation, so perhaps items were freed
734: * back to the pool. Check for this case.
735: */
736: if (pp->pr_curpage != NULL)
737: goto startover;
738:
739: if ((flags & PR_WAITOK) == 0) {
740: pp->pr_nfail++;
741: pr_leave(pp);
742: simple_unlock(&pp->pr_slock);
743: return (NULL);
744: }
745:
746: /*
747: * Wait for items to be returned to this pool.
748: *
749: * XXX: maybe we should wake up once a second and
750: * try again?
751: */
752: pp->pr_flags |= PR_WANTED;
753: /* PA_WANTED is already set on the allocator. */
754: pr_leave(pp);
755: ltsleep(pp, PSWP, pp->pr_wchan, 0, &pp->pr_slock);
756: pr_enter(pp, file, line);
757: goto startover;
758: }
759:
760: /* We have more memory; add it to the pool */
761: pool_prime_page(pp, v, ph);
762: pp->pr_npagealloc++;
763:
764: /* Start the allocation process over. */
765: goto startover;
766: }
767: if (__predict_false((v = pi = TAILQ_FIRST(&ph->ph_itemlist)) == NULL)) {
768: pr_leave(pp);
769: simple_unlock(&pp->pr_slock);
770: panic("pool_get: %s: page empty", pp->pr_wchan);
771: }
772: #ifdef DIAGNOSTIC
773: if (__predict_false(pp->pr_nitems == 0)) {
774: pr_leave(pp);
775: simple_unlock(&pp->pr_slock);
776: printf("pool_get: %s: items on itemlist, nitems %u\n",
777: pp->pr_wchan, pp->pr_nitems);
778: panic("pool_get: nitems inconsistent");
779: }
780: #endif
781:
782: #ifdef POOL_DIAGNOSTIC
783: pr_log(pp, v, PRLOG_GET, file, line);
784: #endif
785:
786: #ifdef DIAGNOSTIC
787: if (__predict_false(pi->pi_magic != PI_MAGIC)) {
788: pr_printlog(pp, pi, printf);
789: panic("pool_get(%s): free list modified: magic=%x; page %p;"
790: " item addr %p",
791: pp->pr_wchan, pi->pi_magic, ph->ph_page, pi);
792: }
793: #endif
794:
795: /*
796: * Remove from item list.
797: */
798: TAILQ_REMOVE(&ph->ph_itemlist, pi, pi_list);
799: pp->pr_nitems--;
800: pp->pr_nout++;
801: if (ph->ph_nmissing == 0) {
802: #ifdef DIAGNOSTIC
803: if (__predict_false(pp->pr_nidle == 0))
804: panic("pool_get: nidle inconsistent");
805: #endif
806: pp->pr_nidle--;
807:
808: /*
809: * This page was previously empty. Move it to the list of
810: * partially-full pages. This page is already curpage.
811: */
812: LIST_REMOVE(ph, ph_pagelist);
813: LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist);
814: }
815: ph->ph_nmissing++;
816: if (TAILQ_EMPTY(&ph->ph_itemlist)) {
817: #ifdef DIAGNOSTIC
818: if (__predict_false(ph->ph_nmissing != pp->pr_itemsperpage)) {
819: pr_leave(pp);
820: simple_unlock(&pp->pr_slock);
821: panic("pool_get: %s: nmissing inconsistent",
822: pp->pr_wchan);
823: }
824: #endif
825: /*
826: * This page is now full. Move it to the full list
827: * and select a new current page.
828: */
829: LIST_REMOVE(ph, ph_pagelist);
830: LIST_INSERT_HEAD(&pp->pr_fullpages, ph, ph_pagelist);
831: pool_update_curpage(pp);
832: }
833:
834: pp->pr_nget++;
835:
836: /*
837: * If we have a low water mark and we are now below that low
838: * water mark, add more items to the pool.
839: */
840: if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) {
841: /*
842: * XXX: Should we log a warning? Should we set up a timeout
843: * to try again in a second or so? The latter could break
844: * a caller's assumptions about interrupt protection, etc.
845: */
846: }
847:
848: pr_leave(pp);
849: simple_unlock(&pp->pr_slock);
850: return (v);
851: }
852:
853: /*
854: * Internal version of pool_put(). Pool is already locked/entered.
855: */
856: void
857: pool_do_put(struct pool *pp, void *v)
858: {
859: struct pool_item *pi = v;
860: struct pool_item_header *ph;
861: caddr_t page;
862:
863: #ifdef MALLOC_DEBUG
864: if (pp->pr_roflags & PR_DEBUG) {
865: debug_free(v, M_DEBUG);
866: return;
867: }
868: #endif
869:
870: LOCK_ASSERT(simple_lock_held(&pp->pr_slock));
871:
872: page = (caddr_t)((vaddr_t)v & pp->pr_alloc->pa_pagemask);
873:
874: #ifdef DIAGNOSTIC
875: if (pp->pr_ipl != -1)
876: splassert(pp->pr_ipl);
877:
878: if (__predict_false(pp->pr_nout == 0)) {
879: printf("pool %s: putting with none out\n",
880: pp->pr_wchan);
881: panic("pool_put");
882: }
883: #endif
884:
885: if (__predict_false((ph = pr_find_pagehead(pp, page)) == NULL)) {
886: pr_printlog(pp, NULL, printf);
887: panic("pool_put: %s: page header missing", pp->pr_wchan);
888: }
889:
890: #ifdef LOCKDEBUG
891: /*
892: * Check if we're freeing a locked simple lock.
893: */
894: simple_lock_freecheck((caddr_t)pi, ((caddr_t)pi) + pp->pr_size);
895: #endif
896:
897: /*
898: * Return to item list.
899: */
900: #ifdef DIAGNOSTIC
901: pi->pi_magic = PI_MAGIC;
902: #endif
903: #ifdef DEBUG
904: {
905: int i, *ip = v;
906:
907: for (i = 0; i < pp->pr_size / sizeof(int); i++) {
908: *ip++ = PI_MAGIC;
909: }
910: }
911: #endif
912:
913: TAILQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list);
914: ph->ph_nmissing--;
915: pp->pr_nput++;
916: pp->pr_nitems++;
917: pp->pr_nout--;
918:
919: /* Cancel "pool empty" condition if it exists */
920: if (pp->pr_curpage == NULL)
921: pp->pr_curpage = ph;
922:
923: if (pp->pr_flags & PR_WANTED) {
924: pp->pr_flags &= ~PR_WANTED;
925: if (ph->ph_nmissing == 0)
926: pp->pr_nidle++;
927: wakeup(pp);
928: return;
929: }
930:
931: /*
932: * If this page is now empty, do one of two things:
933: *
934: * (1) If we have more pages than the page high water mark,
935: * free the page back to the system.
936: *
937: * (2) Otherwise, move the page to the empty page list.
938: *
939: * Either way, select a new current page (so we use a partially-full
940: * page if one is available).
941: */
942: if (ph->ph_nmissing == 0) {
943: pp->pr_nidle++;
944: if (pp->pr_nidle > pp->pr_maxpages ||
945: (pp->pr_alloc->pa_flags & PA_WANT) != 0) {
946: pr_rmpage(pp, ph, NULL);
947: } else {
948: LIST_REMOVE(ph, ph_pagelist);
949: LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist);
950: }
951: pool_update_curpage(pp);
952: }
953:
954: /*
955: * If the page was previously completely full, move it to the
956: * partially-full list and make it the current page. The next
957: * allocation will get the item from this page, instead of
958: * further fragmenting the pool.
959: */
960: else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) {
961: LIST_REMOVE(ph, ph_pagelist);
962: LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist);
963: pp->pr_curpage = ph;
964: }
965: }
966:
967: /*
968: * Return resource to the pool; must be called at appropriate spl level
969: */
970: #ifdef POOL_DIAGNOSTIC
971: void
972: _pool_put(struct pool *pp, void *v, const char *file, long line)
973: {
974:
975: simple_lock(&pp->pr_slock);
976: pr_enter(pp, file, line);
977:
978: pr_log(pp, v, PRLOG_PUT, file, line);
979:
980: pool_do_put(pp, v);
981:
982: pr_leave(pp);
983: simple_unlock(&pp->pr_slock);
984: }
985: #undef pool_put
986: #endif /* POOL_DIAGNOSTIC */
987:
988: void
989: pool_put(struct pool *pp, void *v)
990: {
991:
992: simple_lock(&pp->pr_slock);
993:
994: pool_do_put(pp, v);
995:
996: simple_unlock(&pp->pr_slock);
997: }
998:
999: #ifdef POOL_DIAGNOSTIC
1000: #define pool_put(h, v) _pool_put((h), (v), __FILE__, __LINE__)
1001: #endif
1002:
1003: /*
1004: * Add N items to the pool.
1005: */
1006: int
1007: pool_prime(struct pool *pp, int n)
1008: {
1009: struct pool_item_header *ph;
1010: caddr_t cp;
1011: int newpages;
1012:
1013: simple_lock(&pp->pr_slock);
1014:
1015: newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1016:
1017: while (newpages-- > 0) {
1018: simple_unlock(&pp->pr_slock);
1019: cp = pool_allocator_alloc(pp, PR_NOWAIT);
1020: if (__predict_true(cp != NULL))
1021: ph = pool_alloc_item_header(pp, cp, PR_NOWAIT);
1022: simple_lock(&pp->pr_slock);
1023:
1024: if (__predict_false(cp == NULL || ph == NULL)) {
1025: if (cp != NULL)
1026: pool_allocator_free(pp, cp);
1027: break;
1028: }
1029:
1030: pool_prime_page(pp, cp, ph);
1031: pp->pr_npagealloc++;
1032: pp->pr_minpages++;
1033: }
1034:
1035: if (pp->pr_minpages >= pp->pr_maxpages)
1036: pp->pr_maxpages = pp->pr_minpages + 1; /* XXX */
1037:
1038: simple_unlock(&pp->pr_slock);
1039: return (0);
1040: }
1041:
1042: /*
1043: * Add a page worth of items to the pool.
1044: *
1045: * Note, we must be called with the pool descriptor LOCKED.
1046: */
1047: void
1048: pool_prime_page(struct pool *pp, caddr_t storage, struct pool_item_header *ph)
1049: {
1050: struct pool_item *pi;
1051: caddr_t cp = storage;
1052: unsigned int align = pp->pr_align;
1053: unsigned int ioff = pp->pr_itemoffset;
1054: int n;
1055:
1056: #ifdef DIAGNOSTIC
1057: if (((u_long)cp & (pp->pr_alloc->pa_pagesz - 1)) != 0)
1058: panic("pool_prime_page: %s: unaligned page", pp->pr_wchan);
1059: #endif
1060:
1061: /*
1062: * Insert page header.
1063: */
1064: LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist);
1065: TAILQ_INIT(&ph->ph_itemlist);
1066: ph->ph_page = storage;
1067: ph->ph_nmissing = 0;
1068: if ((pp->pr_roflags & PR_PHINPAGE) == 0)
1069: SPLAY_INSERT(phtree, &pp->pr_phtree, ph);
1070:
1071: pp->pr_nidle++;
1072:
1073: /*
1074: * Color this page.
1075: */
1076: cp = (caddr_t)(cp + pp->pr_curcolor);
1077: if ((pp->pr_curcolor += align) > pp->pr_maxcolor)
1078: pp->pr_curcolor = 0;
1079:
1080: /*
1081: * Adjust storage to apply aligment to `pr_itemoffset' in each item.
1082: */
1083: if (ioff != 0)
1084: cp = (caddr_t)(cp + (align - ioff));
1085:
1086: /*
1087: * Insert remaining chunks on the bucket list.
1088: */
1089: n = pp->pr_itemsperpage;
1090: pp->pr_nitems += n;
1091:
1092: while (n--) {
1093: pi = (struct pool_item *)cp;
1094:
1095: KASSERT(((((vaddr_t)pi) + ioff) & (align - 1)) == 0);
1096:
1097: /* Insert on page list */
1098: TAILQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list);
1099: #ifdef DIAGNOSTIC
1100: pi->pi_magic = PI_MAGIC;
1101: #endif
1102: cp = (caddr_t)(cp + pp->pr_size);
1103: }
1104:
1105: /*
1106: * If the pool was depleted, point at the new page.
1107: */
1108: if (pp->pr_curpage == NULL)
1109: pp->pr_curpage = ph;
1110:
1111: if (++pp->pr_npages > pp->pr_hiwat)
1112: pp->pr_hiwat = pp->pr_npages;
1113: }
1114:
1115: /*
1116: * Used by pool_get() when nitems drops below the low water mark. This
1117: * is used to catch up pr_nitems with the low water mark.
1118: *
1119: * Note 1, we never wait for memory here, we let the caller decide what to do.
1120: *
1121: * Note 2, we must be called with the pool already locked, and we return
1122: * with it locked.
1123: */
1124: int
1125: pool_catchup(struct pool *pp)
1126: {
1127: struct pool_item_header *ph;
1128: caddr_t cp;
1129: int error = 0;
1130:
1131: while (POOL_NEEDS_CATCHUP(pp)) {
1132: /*
1133: * Call the page back-end allocator for more memory.
1134: *
1135: * XXX: We never wait, so should we bother unlocking
1136: * the pool descriptor?
1137: */
1138: simple_unlock(&pp->pr_slock);
1139: cp = pool_allocator_alloc(pp, PR_NOWAIT);
1140: if (__predict_true(cp != NULL))
1141: ph = pool_alloc_item_header(pp, cp, PR_NOWAIT);
1142: simple_lock(&pp->pr_slock);
1143: if (__predict_false(cp == NULL || ph == NULL)) {
1144: if (cp != NULL)
1145: pool_allocator_free(pp, cp);
1146: error = ENOMEM;
1147: break;
1148: }
1149: pool_prime_page(pp, cp, ph);
1150: pp->pr_npagealloc++;
1151: }
1152:
1153: return (error);
1154: }
1155:
1156: void
1157: pool_update_curpage(struct pool *pp)
1158: {
1159:
1160: pp->pr_curpage = LIST_FIRST(&pp->pr_partpages);
1161: if (pp->pr_curpage == NULL) {
1162: pp->pr_curpage = LIST_FIRST(&pp->pr_emptypages);
1163: }
1164: }
1165:
1166: void
1167: pool_setlowat(struct pool *pp, int n)
1168: {
1169:
1170: simple_lock(&pp->pr_slock);
1171:
1172: pp->pr_minitems = n;
1173: pp->pr_minpages = (n == 0)
1174: ? 0
1175: : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1176:
1177: /* Make sure we're caught up with the newly-set low water mark. */
1178: if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) {
1179: /*
1180: * XXX: Should we log a warning? Should we set up a timeout
1181: * to try again in a second or so? The latter could break
1182: * a caller's assumptions about interrupt protection, etc.
1183: */
1184: }
1185:
1186: simple_unlock(&pp->pr_slock);
1187: }
1188:
1189: void
1190: pool_sethiwat(struct pool *pp, int n)
1191: {
1192:
1193: simple_lock(&pp->pr_slock);
1194:
1195: pp->pr_maxpages = (n == 0)
1196: ? 0
1197: : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1198:
1199: simple_unlock(&pp->pr_slock);
1200: }
1201:
1202: int
1203: pool_sethardlimit(struct pool *pp, unsigned n, const char *warnmess, int ratecap)
1204: {
1205: int error = 0;
1206:
1207: simple_lock(&pp->pr_slock);
1208:
1209: if (n < pp->pr_nout) {
1210: error = EINVAL;
1211: goto done;
1212: }
1213:
1214: pp->pr_hardlimit = n;
1215: pp->pr_hardlimit_warning = warnmess;
1216: pp->pr_hardlimit_ratecap.tv_sec = ratecap;
1217: pp->pr_hardlimit_warning_last.tv_sec = 0;
1218: pp->pr_hardlimit_warning_last.tv_usec = 0;
1219:
1220: /*
1221: * In-line version of pool_sethiwat(), because we don't want to
1222: * release the lock.
1223: */
1224: pp->pr_maxpages = (n == 0 || n == UINT_MAX)
1225: ? n
1226: : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1227:
1228: done:
1229: simple_unlock(&pp->pr_slock);
1230:
1231: return (error);
1232: }
1233:
1234: /*
1235: * Release all complete pages that have not been used recently.
1236: *
1237: * Returns non-zero if any pages have been reclaimed.
1238: */
1239: int
1240: #ifdef POOL_DIAGNOSTIC
1241: _pool_reclaim(struct pool *pp, const char *file, long line)
1242: #else
1243: pool_reclaim(struct pool *pp)
1244: #endif
1245: {
1246: struct pool_item_header *ph, *phnext;
1247: struct pool_cache *pc;
1248: struct pool_pagelist pq;
1249: int s;
1250:
1251: if (simple_lock_try(&pp->pr_slock) == 0)
1252: return (0);
1253: pr_enter(pp, file, line);
1254:
1255: LIST_INIT(&pq);
1256:
1257: /*
1258: * Reclaim items from the pool's caches.
1259: */
1260: TAILQ_FOREACH(pc, &pp->pr_cachelist, pc_poollist)
1261: pool_cache_reclaim(pc);
1262:
1263: for (ph = LIST_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) {
1264: phnext = LIST_NEXT(ph, ph_pagelist);
1265:
1266: /* Check our minimum page claim */
1267: if (pp->pr_npages <= pp->pr_minpages)
1268: break;
1269:
1270: KASSERT(ph->ph_nmissing == 0);
1271:
1272: /*
1273: * If freeing this page would put us below
1274: * the low water mark, stop now.
1275: */
1276: if ((pp->pr_nitems - pp->pr_itemsperpage) <
1277: pp->pr_minitems)
1278: break;
1279:
1280: pr_rmpage(pp, ph, &pq);
1281: }
1282:
1283: pr_leave(pp);
1284: simple_unlock(&pp->pr_slock);
1285: if (LIST_EMPTY(&pq))
1286: return (0);
1287: while ((ph = LIST_FIRST(&pq)) != NULL) {
1288: LIST_REMOVE(ph, ph_pagelist);
1289: pool_allocator_free(pp, ph->ph_page);
1290: if (pp->pr_roflags & PR_PHINPAGE) {
1291: continue;
1292: }
1293: SPLAY_REMOVE(phtree, &pp->pr_phtree, ph);
1294: s = splhigh();
1295: pool_put(&phpool, ph);
1296: splx(s);
1297: }
1298:
1299: return (1);
1300: }
1301:
1302: #ifdef DDB
1303: #include <machine/db_machdep.h>
1304: #include <ddb/db_interface.h>
1305: #include <ddb/db_output.h>
1306:
1307: /*
1308: * Diagnostic helpers.
1309: */
1310: void
1311: pool_printit(struct pool *pp, const char *modif, int (*pr)(const char *, ...))
1312: {
1313: int s;
1314:
1315: s = splvm();
1316: if (simple_lock_try(&pp->pr_slock) == 0) {
1317: pr("pool %s is locked; try again later\n",
1318: pp->pr_wchan);
1319: splx(s);
1320: return;
1321: }
1322: pool_print1(pp, modif, pr);
1323: simple_unlock(&pp->pr_slock);
1324: splx(s);
1325: }
1326:
1327: void
1328: pool_print_pagelist(struct pool_pagelist *pl, int (*pr)(const char *, ...))
1329: {
1330: struct pool_item_header *ph;
1331: #ifdef DIAGNOSTIC
1332: struct pool_item *pi;
1333: #endif
1334:
1335: LIST_FOREACH(ph, pl, ph_pagelist) {
1336: (*pr)("\t\tpage %p, nmissing %d\n",
1337: ph->ph_page, ph->ph_nmissing);
1338: #ifdef DIAGNOSTIC
1339: TAILQ_FOREACH(pi, &ph->ph_itemlist, pi_list) {
1340: if (pi->pi_magic != PI_MAGIC) {
1341: (*pr)("\t\t\titem %p, magic 0x%x\n",
1342: pi, pi->pi_magic);
1343: }
1344: }
1345: #endif
1346: }
1347: }
1348:
1349: void
1350: pool_print1(struct pool *pp, const char *modif, int (*pr)(const char *, ...))
1351: {
1352: struct pool_item_header *ph;
1353: struct pool_cache *pc;
1354: struct pool_cache_group *pcg;
1355: int i, print_log = 0, print_pagelist = 0, print_cache = 0;
1356: char c;
1357:
1358: while ((c = *modif++) != '\0') {
1359: if (c == 'l')
1360: print_log = 1;
1361: if (c == 'p')
1362: print_pagelist = 1;
1363: if (c == 'c')
1364: print_cache = 1;
1365: modif++;
1366: }
1367:
1368: (*pr)("POOL %s: size %u, align %u, ioff %u, roflags 0x%08x\n",
1369: pp->pr_wchan, pp->pr_size, pp->pr_align, pp->pr_itemoffset,
1370: pp->pr_roflags);
1371: (*pr)("\talloc %p\n", pp->pr_alloc);
1372: (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n",
1373: pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages);
1374: (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n",
1375: pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit);
1376:
1377: (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n",
1378: pp->pr_nget, pp->pr_nfail, pp->pr_nput);
1379: (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n",
1380: pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle);
1381:
1382: if (print_pagelist == 0)
1383: goto skip_pagelist;
1384:
1385: if ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL)
1386: (*pr)("\n\tempty page list:\n");
1387: pool_print_pagelist(&pp->pr_emptypages, pr);
1388: if ((ph = LIST_FIRST(&pp->pr_fullpages)) != NULL)
1389: (*pr)("\n\tfull page list:\n");
1390: pool_print_pagelist(&pp->pr_fullpages, pr);
1391: if ((ph = LIST_FIRST(&pp->pr_partpages)) != NULL)
1392: (*pr)("\n\tpartial-page list:\n");
1393: pool_print_pagelist(&pp->pr_partpages, pr);
1394:
1395: if (pp->pr_curpage == NULL)
1396: (*pr)("\tno current page\n");
1397: else
1398: (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page);
1399:
1400: skip_pagelist:
1401: if (print_log == 0)
1402: goto skip_log;
1403:
1404: (*pr)("\n");
1405: if ((pp->pr_roflags & PR_LOGGING) == 0)
1406: (*pr)("\tno log\n");
1407: else
1408: pr_printlog(pp, NULL, pr);
1409:
1410: skip_log:
1411: if (print_cache == 0)
1412: goto skip_cache;
1413:
1414: TAILQ_FOREACH(pc, &pp->pr_cachelist, pc_poollist) {
1415: (*pr)("\tcache %p: allocfrom %p freeto %p\n", pc,
1416: pc->pc_allocfrom, pc->pc_freeto);
1417: (*pr)("\t hits %lu misses %lu ngroups %lu nitems %lu\n",
1418: pc->pc_hits, pc->pc_misses, pc->pc_ngroups, pc->pc_nitems);
1419: TAILQ_FOREACH(pcg, &pc->pc_grouplist, pcg_list) {
1420: (*pr)("\t\tgroup %p: avail %d\n", pcg, pcg->pcg_avail);
1421: for (i = 0; i < PCG_NOBJECTS; i++)
1422: (*pr)("\t\t\t%p\n", pcg->pcg_objects[i]);
1423: }
1424: }
1425:
1426: skip_cache:
1427: pr_enter_check(pp, pr);
1428: }
1429:
1430: void
1431: db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif)
1432: {
1433: struct pool *pp;
1434: char maxp[16];
1435: int ovflw;
1436: char mode;
1437:
1438: mode = modif[0];
1439: if (mode != '\0' && mode != 'a') {
1440: db_printf("usage: show all pools [/a]\n");
1441: return;
1442: }
1443:
1444: if (mode == '\0')
1445: db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n",
1446: "Name",
1447: "Size",
1448: "Requests",
1449: "Fail",
1450: "Releases",
1451: "Pgreq",
1452: "Pgrel",
1453: "Npage",
1454: "Hiwat",
1455: "Minpg",
1456: "Maxpg",
1457: "Idle");
1458: else
1459: db_printf("%-10s %18s %18s\n",
1460: "Name", "Address", "Allocator");
1461:
1462: TAILQ_FOREACH(pp, &pool_head, pr_poollist) {
1463: if (mode == 'a') {
1464: db_printf("%-10s %18p %18p\n", pp->pr_wchan, pp,
1465: pp->pr_alloc);
1466: continue;
1467: }
1468:
1469: if (!pp->pr_nget)
1470: continue;
1471:
1472: if (pp->pr_maxpages == UINT_MAX)
1473: snprintf(maxp, sizeof maxp, "inf");
1474: else
1475: snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages);
1476:
1477: #define PRWORD(ovflw, fmt, width, fixed, val) do { \
1478: (ovflw) += db_printf((fmt), \
1479: (width) - (fixed) - (ovflw) > 0 ? \
1480: (width) - (fixed) - (ovflw) : 0, \
1481: (val)) - (width); \
1482: if ((ovflw) < 0) \
1483: (ovflw) = 0; \
1484: } while (/* CONSTCOND */0)
1485:
1486: ovflw = 0;
1487: PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan);
1488: PRWORD(ovflw, " %*u", 4, 1, pp->pr_size);
1489: PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget);
1490: PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail);
1491: PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput);
1492: PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc);
1493: PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree);
1494: PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages);
1495: PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat);
1496: PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages);
1497: PRWORD(ovflw, " %*s", 6, 1, maxp);
1498: PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle);
1499: }
1500: }
1501:
1502: int
1503: pool_chk_page(struct pool *pp, const char *label, struct pool_item_header *ph)
1504: {
1505: struct pool_item *pi;
1506: caddr_t page;
1507: int n;
1508:
1509: page = (caddr_t)((u_long)ph & pp->pr_alloc->pa_pagemask);
1510: if (page != ph->ph_page &&
1511: (pp->pr_roflags & PR_PHINPAGE) != 0) {
1512: if (label != NULL)
1513: printf("%s: ", label);
1514: printf("pool(%p:%s): page inconsistency: page %p;"
1515: " at page head addr %p (p %p)\n", pp,
1516: pp->pr_wchan, ph->ph_page,
1517: ph, page);
1518: return 1;
1519: }
1520:
1521: for (pi = TAILQ_FIRST(&ph->ph_itemlist), n = 0;
1522: pi != NULL;
1523: pi = TAILQ_NEXT(pi,pi_list), n++) {
1524:
1525: #ifdef DIAGNOSTIC
1526: if (pi->pi_magic != PI_MAGIC) {
1527: if (label != NULL)
1528: printf("%s: ", label);
1529: printf("pool(%s): free list modified: magic=%x;"
1530: " page %p; item ordinal %d;"
1531: " addr %p (p %p)\n",
1532: pp->pr_wchan, pi->pi_magic, ph->ph_page,
1533: n, pi, page);
1534: panic("pool");
1535: }
1536: #endif
1537: page =
1538: (caddr_t)((u_long)pi & pp->pr_alloc->pa_pagemask);
1539: if (page == ph->ph_page)
1540: continue;
1541:
1542: if (label != NULL)
1543: printf("%s: ", label);
1544: printf("pool(%p:%s): page inconsistency: page %p;"
1545: " item ordinal %d; addr %p (p %p)\n", pp,
1546: pp->pr_wchan, ph->ph_page,
1547: n, pi, page);
1548: return 1;
1549: }
1550: return 0;
1551: }
1552:
1553: int
1554: pool_chk(struct pool *pp, const char *label)
1555: {
1556: struct pool_item_header *ph;
1557: int r = 0;
1558:
1559: simple_lock(&pp->pr_slock);
1560: LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) {
1561: r = pool_chk_page(pp, label, ph);
1562: if (r) {
1563: goto out;
1564: }
1565: }
1566: LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) {
1567: r = pool_chk_page(pp, label, ph);
1568: if (r) {
1569: goto out;
1570: }
1571: }
1572: LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) {
1573: r = pool_chk_page(pp, label, ph);
1574: if (r) {
1575: goto out;
1576: }
1577: }
1578:
1579: out:
1580: simple_unlock(&pp->pr_slock);
1581: return (r);
1582: }
1583: #endif
1584:
1585: /*
1586: * pool_cache_init:
1587: *
1588: * Initialize a pool cache.
1589: *
1590: * NOTE: If the pool must be protected from interrupts, we expect
1591: * to be called at the appropriate interrupt priority level.
1592: */
1593: void
1594: pool_cache_init(struct pool_cache *pc, struct pool *pp,
1595: int (*ctor)(void *, void *, int),
1596: void (*dtor)(void *, void *),
1597: void *arg)
1598: {
1599:
1600: TAILQ_INIT(&pc->pc_grouplist);
1601: simple_lock_init(&pc->pc_slock);
1602:
1603: pc->pc_allocfrom = NULL;
1604: pc->pc_freeto = NULL;
1605: pc->pc_pool = pp;
1606:
1607: pc->pc_ctor = ctor;
1608: pc->pc_dtor = dtor;
1609: pc->pc_arg = arg;
1610:
1611: pc->pc_hits = 0;
1612: pc->pc_misses = 0;
1613:
1614: pc->pc_ngroups = 0;
1615:
1616: pc->pc_nitems = 0;
1617:
1618: simple_lock(&pp->pr_slock);
1619: TAILQ_INSERT_TAIL(&pp->pr_cachelist, pc, pc_poollist);
1620: simple_unlock(&pp->pr_slock);
1621: }
1622:
1623: /*
1624: * pool_cache_destroy:
1625: *
1626: * Destroy a pool cache.
1627: */
1628: void
1629: pool_cache_destroy(struct pool_cache *pc)
1630: {
1631: struct pool *pp = pc->pc_pool;
1632:
1633: /* First, invalidate the entire cache. */
1634: pool_cache_invalidate(pc);
1635:
1636: /* ...and remove it from the pool's cache list. */
1637: simple_lock(&pp->pr_slock);
1638: TAILQ_REMOVE(&pp->pr_cachelist, pc, pc_poollist);
1639: simple_unlock(&pp->pr_slock);
1640: }
1641:
1642: static __inline void *
1643: pcg_get(struct pool_cache_group *pcg)
1644: {
1645: void *object;
1646: u_int idx;
1647:
1648: KASSERT(pcg->pcg_avail <= PCG_NOBJECTS);
1649: KASSERT(pcg->pcg_avail != 0);
1650: idx = --pcg->pcg_avail;
1651:
1652: KASSERT(pcg->pcg_objects[idx] != NULL);
1653: object = pcg->pcg_objects[idx];
1654: pcg->pcg_objects[idx] = NULL;
1655:
1656: return (object);
1657: }
1658:
1659: static __inline void
1660: pcg_put(struct pool_cache_group *pcg, void *object)
1661: {
1662: u_int idx;
1663:
1664: KASSERT(pcg->pcg_avail < PCG_NOBJECTS);
1665: idx = pcg->pcg_avail++;
1666:
1667: KASSERT(pcg->pcg_objects[idx] == NULL);
1668: pcg->pcg_objects[idx] = object;
1669: }
1670:
1671: /*
1672: * pool_cache_get:
1673: *
1674: * Get an object from a pool cache.
1675: */
1676: void *
1677: pool_cache_get(struct pool_cache *pc, int flags)
1678: {
1679: struct pool_cache_group *pcg;
1680: void *object;
1681:
1682: #ifdef LOCKDEBUG
1683: if (flags & PR_WAITOK)
1684: simple_lock_only_held(NULL, "pool_cache_get(PR_WAITOK)");
1685: #endif
1686:
1687: simple_lock(&pc->pc_slock);
1688:
1689: if ((pcg = pc->pc_allocfrom) == NULL) {
1690: TAILQ_FOREACH(pcg, &pc->pc_grouplist, pcg_list) {
1691: if (pcg->pcg_avail != 0) {
1692: pc->pc_allocfrom = pcg;
1693: goto have_group;
1694: }
1695: }
1696:
1697: /*
1698: * No groups with any available objects. Allocate
1699: * a new object, construct it, and return it to
1700: * the caller. We will allocate a group, if necessary,
1701: * when the object is freed back to the cache.
1702: */
1703: pc->pc_misses++;
1704: simple_unlock(&pc->pc_slock);
1705: object = pool_get(pc->pc_pool, flags);
1706: if (object != NULL && pc->pc_ctor != NULL) {
1707: if ((*pc->pc_ctor)(pc->pc_arg, object, flags) != 0) {
1708: pool_put(pc->pc_pool, object);
1709: return (NULL);
1710: }
1711: }
1712: return (object);
1713: }
1714:
1715: have_group:
1716: pc->pc_hits++;
1717: pc->pc_nitems--;
1718: object = pcg_get(pcg);
1719:
1720: if (pcg->pcg_avail == 0)
1721: pc->pc_allocfrom = NULL;
1722:
1723: simple_unlock(&pc->pc_slock);
1724:
1725: return (object);
1726: }
1727:
1728: /*
1729: * pool_cache_put:
1730: *
1731: * Put an object back to the pool cache.
1732: */
1733: void
1734: pool_cache_put(struct pool_cache *pc, void *object)
1735: {
1736: struct pool_cache_group *pcg;
1737: int s;
1738:
1739: simple_lock(&pc->pc_slock);
1740:
1741: if ((pcg = pc->pc_freeto) == NULL) {
1742: TAILQ_FOREACH(pcg, &pc->pc_grouplist, pcg_list) {
1743: if (pcg->pcg_avail != PCG_NOBJECTS) {
1744: pc->pc_freeto = pcg;
1745: goto have_group;
1746: }
1747: }
1748:
1749: /*
1750: * No empty groups to free the object to. Attempt to
1751: * allocate one.
1752: */
1753: simple_unlock(&pc->pc_slock);
1754: s = splvm();
1755: pcg = pool_get(&pcgpool, PR_NOWAIT);
1756: splx(s);
1757: if (pcg != NULL) {
1758: memset(pcg, 0, sizeof(*pcg));
1759: simple_lock(&pc->pc_slock);
1760: pc->pc_ngroups++;
1761: TAILQ_INSERT_TAIL(&pc->pc_grouplist, pcg, pcg_list);
1762: if (pc->pc_freeto == NULL)
1763: pc->pc_freeto = pcg;
1764: goto have_group;
1765: }
1766:
1767: /*
1768: * Unable to allocate a cache group; destruct the object
1769: * and free it back to the pool.
1770: */
1771: pool_cache_destruct_object(pc, object);
1772: return;
1773: }
1774:
1775: have_group:
1776: pc->pc_nitems++;
1777: pcg_put(pcg, object);
1778:
1779: if (pcg->pcg_avail == PCG_NOBJECTS)
1780: pc->pc_freeto = NULL;
1781:
1782: simple_unlock(&pc->pc_slock);
1783: }
1784:
1785: /*
1786: * pool_cache_destruct_object:
1787: *
1788: * Force destruction of an object and its release back into
1789: * the pool.
1790: */
1791: void
1792: pool_cache_destruct_object(struct pool_cache *pc, void *object)
1793: {
1794:
1795: if (pc->pc_dtor != NULL)
1796: (*pc->pc_dtor)(pc->pc_arg, object);
1797: pool_put(pc->pc_pool, object);
1798: }
1799:
1800: /*
1801: * pool_cache_do_invalidate:
1802: *
1803: * This internal function implements pool_cache_invalidate() and
1804: * pool_cache_reclaim().
1805: */
1806: void
1807: pool_cache_do_invalidate(struct pool_cache *pc, int free_groups,
1808: void (*putit)(struct pool *, void *))
1809: {
1810: struct pool_cache_group *pcg, *npcg;
1811: void *object;
1812: int s;
1813:
1814: for (pcg = TAILQ_FIRST(&pc->pc_grouplist); pcg != NULL;
1815: pcg = npcg) {
1816: npcg = TAILQ_NEXT(pcg, pcg_list);
1817: while (pcg->pcg_avail != 0) {
1818: pc->pc_nitems--;
1819: object = pcg_get(pcg);
1820: if (pcg->pcg_avail == 0 && pc->pc_allocfrom == pcg)
1821: pc->pc_allocfrom = NULL;
1822: if (pc->pc_dtor != NULL)
1823: (*pc->pc_dtor)(pc->pc_arg, object);
1824: (*putit)(pc->pc_pool, object);
1825: }
1826: if (free_groups) {
1827: pc->pc_ngroups--;
1828: TAILQ_REMOVE(&pc->pc_grouplist, pcg, pcg_list);
1829: if (pc->pc_freeto == pcg)
1830: pc->pc_freeto = NULL;
1831: s = splvm();
1832: pool_put(&pcgpool, pcg);
1833: splx(s);
1834: }
1835: }
1836: }
1837:
1838: /*
1839: * pool_cache_invalidate:
1840: *
1841: * Invalidate a pool cache (destruct and release all of the
1842: * cached objects).
1843: */
1844: void
1845: pool_cache_invalidate(struct pool_cache *pc)
1846: {
1847:
1848: simple_lock(&pc->pc_slock);
1849: pool_cache_do_invalidate(pc, 0, pool_put);
1850: simple_unlock(&pc->pc_slock);
1851: }
1852:
1853: /*
1854: * pool_cache_reclaim:
1855: *
1856: * Reclaim a pool cache for pool_reclaim().
1857: */
1858: void
1859: pool_cache_reclaim(struct pool_cache *pc)
1860: {
1861:
1862: simple_lock(&pc->pc_slock);
1863: pool_cache_do_invalidate(pc, 1, pool_do_put);
1864: simple_unlock(&pc->pc_slock);
1865: }
1866:
1867: /*
1868: * We have three different sysctls.
1869: * kern.pool.npools - the number of pools.
1870: * kern.pool.pool.<pool#> - the pool struct for the pool#.
1871: * kern.pool.name.<pool#> - the name for pool#.
1872: */
1873: int
1874: sysctl_dopool(int *name, u_int namelen, char *where, size_t *sizep)
1875: {
1876: struct pool *pp, *foundpool = NULL;
1877: size_t buflen = where != NULL ? *sizep : 0;
1878: int npools = 0, s;
1879: unsigned int lookfor;
1880: size_t len;
1881:
1882: switch (*name) {
1883: case KERN_POOL_NPOOLS:
1884: if (namelen != 1 || buflen != sizeof(int))
1885: return (EINVAL);
1886: lookfor = 0;
1887: break;
1888: case KERN_POOL_NAME:
1889: if (namelen != 2 || buflen < 1)
1890: return (EINVAL);
1891: lookfor = name[1];
1892: break;
1893: case KERN_POOL_POOL:
1894: if (namelen != 2 || buflen != sizeof(struct pool))
1895: return (EINVAL);
1896: lookfor = name[1];
1897: break;
1898: default:
1899: return (EINVAL);
1900: }
1901:
1902: s = splvm();
1903: simple_lock(&pool_head_slock);
1904:
1905: TAILQ_FOREACH(pp, &pool_head, pr_poollist) {
1906: npools++;
1907: if (lookfor == pp->pr_serial) {
1908: foundpool = pp;
1909: break;
1910: }
1911: }
1912:
1913: simple_unlock(&pool_head_slock);
1914: splx(s);
1915:
1916: if (*name != KERN_POOL_NPOOLS && foundpool == NULL)
1917: return (ENOENT);
1918:
1919: switch (*name) {
1920: case KERN_POOL_NPOOLS:
1921: return copyout(&npools, where, buflen);
1922: case KERN_POOL_NAME:
1923: len = strlen(foundpool->pr_wchan) + 1;
1924: if (*sizep < len)
1925: return (ENOMEM);
1926: *sizep = len;
1927: return copyout(foundpool->pr_wchan, where, len);
1928: case KERN_POOL_POOL:
1929: return copyout(foundpool, where, buflen);
1930: }
1931: /* NOTREACHED */
1932: return (0); /* XXX - Stupid gcc */
1933: }
1934:
1935: /*
1936: * Pool backend allocators.
1937: *
1938: * Each pool has a backend allocator that handles allocation, deallocation
1939: */
1940: void *pool_page_alloc_oldnointr(struct pool *, int);
1941: void pool_page_free_oldnointr(struct pool *, void *);
1942: void *pool_page_alloc(struct pool *, int);
1943: void pool_page_free(struct pool *, void *);
1944:
1945: /* previous nointr. handles large allocations safely */
1946: struct pool_allocator pool_allocator_oldnointr = {
1947: pool_page_alloc_oldnointr, pool_page_free_oldnointr, 0,
1948: };
1949: /* safe for interrupts, name preserved for compat
1950: * this is the default allocator */
1951: struct pool_allocator pool_allocator_nointr = {
1952: pool_page_alloc, pool_page_free, 0,
1953: };
1954:
1955: /*
1956: * XXX - we have at least three different resources for the same allocation
1957: * and each resource can be depleted. First we have the ready elements in
1958: * the pool. Then we have the resource (typically a vm_map) for this
1959: * allocator, then we have physical memory. Waiting for any of these can
1960: * be unnecessary when any other is freed, but the kernel doesn't support
1961: * sleeping on multiple addresses, so we have to fake. The caller sleeps on
1962: * the pool (so that we can be awakened when an item is returned to the pool),
1963: * but we set PA_WANT on the allocator. When a page is returned to
1964: * the allocator and PA_WANT is set pool_allocator_free will wakeup all
1965: * sleeping pools belonging to this allocator. (XXX - thundering herd).
1966: * We also wake up the allocator in case someone without a pool (malloc)
1967: * is sleeping waiting for this allocator.
1968: */
1969:
1970: void *
1971: pool_allocator_alloc(struct pool *pp, int flags)
1972: {
1973:
1974: return (pp->pr_alloc->pa_alloc(pp, flags));
1975: }
1976:
1977: void
1978: pool_allocator_free(struct pool *pp, void *v)
1979: {
1980: struct pool_allocator *pa = pp->pr_alloc;
1981: int s;
1982:
1983: (*pa->pa_free)(pp, v);
1984:
1985: s = splvm();
1986: simple_lock(&pa->pa_slock);
1987: if ((pa->pa_flags & PA_WANT) == 0) {
1988: simple_unlock(&pa->pa_slock);
1989: splx(s);
1990: return;
1991: }
1992:
1993: TAILQ_FOREACH(pp, &pa->pa_list, pr_alloc_list) {
1994: simple_lock(&pp->pr_slock);
1995: if ((pp->pr_flags & PR_WANTED) != 0) {
1996: pp->pr_flags &= ~PR_WANTED;
1997: wakeup(pp);
1998: }
1999: simple_unlock(&pp->pr_slock);
2000: }
2001: pa->pa_flags &= ~PA_WANT;
2002: simple_unlock(&pa->pa_slock);
2003: splx(s);
2004: }
2005:
2006: void *
2007: pool_page_alloc(struct pool *pp, int flags)
2008: {
2009: boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE;
2010:
2011: return (uvm_km_getpage(waitok));
2012: }
2013:
2014: void
2015: pool_page_free(struct pool *pp, void *v)
2016: {
2017:
2018: uvm_km_putpage(v);
2019: }
2020:
2021: void *
2022: pool_page_alloc_oldnointr(struct pool *pp, int flags)
2023: {
2024: boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE;
2025:
2026: splassert(IPL_NONE);
2027:
2028: return ((void *)uvm_km_alloc_poolpage1(kernel_map, uvm.kernel_object,
2029: waitok));
2030: }
2031:
2032: void
2033: pool_page_free_oldnointr(struct pool *pp, void *v)
2034: {
2035: splassert(IPL_NONE);
2036:
2037: uvm_km_free_poolpage1(kernel_map, (vaddr_t)v);
2038: }
CVSweb