Print this page
5045 use atomic_{inc,dec}_* instead of atomic_add_*
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/fs/dnlc.c
+++ new/usr/src/uts/common/fs/dnlc.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 */
24 24
25 25 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
26 26 /* All Rights Reserved */
27 27
28 28 /*
29 29 * University Copyright- Copyright (c) 1982, 1986, 1988
30 30 * The Regents of the University of California
31 31 * All Rights Reserved
32 32 *
33 33 * University Acknowledgment- Portions of this document are derived from
34 34 * software developed by the University of California, Berkeley, and its
35 35 * contributors.
36 36 */
37 37
38 38 #include <sys/types.h>
39 39 #include <sys/systm.h>
40 40 #include <sys/param.h>
41 41 #include <sys/t_lock.h>
42 42 #include <sys/systm.h>
43 43 #include <sys/vfs.h>
44 44 #include <sys/vnode.h>
45 45 #include <sys/dnlc.h>
46 46 #include <sys/kmem.h>
47 47 #include <sys/cmn_err.h>
48 48 #include <sys/vtrace.h>
49 49 #include <sys/bitmap.h>
50 50 #include <sys/var.h>
51 51 #include <sys/sysmacros.h>
52 52 #include <sys/kstat.h>
53 53 #include <sys/atomic.h>
54 54 #include <sys/taskq.h>
55 55
56 56 /*
57 57 * Directory name lookup cache.
58 58 * Based on code originally done by Robert Elz at Melbourne.
59 59 *
60 60 * Names found by directory scans are retained in a cache
61 61 * for future reference. Each hash chain is ordered by LRU
62 62 * Cache is indexed by hash value obtained from (vp, name)
63 63 * where the vp refers to the directory containing the name.
64 64 */
65 65
66 66 /*
67 67 * We want to be able to identify files that are referenced only by the DNLC.
68 68 * When adding a reference from the DNLC, call VN_HOLD_DNLC instead of VN_HOLD,
69 69 * since multiple DNLC references should only be counted once in v_count. This
70 70 * file contains only two(2) calls to VN_HOLD, renamed VN_HOLD_CALLER in the
71 71 * hope that no one will mistakenly add a VN_HOLD to this file. (Unfortunately
72 72 * it is not possible to #undef VN_HOLD and retain VN_HOLD_CALLER. Ideally a
73 73 * Makefile rule would grep uncommented C tokens to check that VN_HOLD is
74 74 * referenced only once in this file, to define VN_HOLD_CALLER.)
75 75 */
76 76 #define VN_HOLD_CALLER VN_HOLD
77 77 #define VN_HOLD_DNLC(vp) { \
78 78 mutex_enter(&(vp)->v_lock); \
79 79 if ((vp)->v_count_dnlc == 0) \
80 80 (vp)->v_count++; \
81 81 (vp)->v_count_dnlc++; \
82 82 mutex_exit(&(vp)->v_lock); \
83 83 }
84 84 #define VN_RELE_DNLC(vp) { \
85 85 vn_rele_dnlc(vp); \
86 86 }
87 87
88 88 /*
89 89 * Tunable nc_hashavelen is the average length desired for this chain, from
90 90 * which the size of the nc_hash table is derived at create time.
91 91 */
92 92 #define NC_HASHAVELEN_DEFAULT 4
93 93 int nc_hashavelen = NC_HASHAVELEN_DEFAULT;
94 94
95 95 /*
96 96 * NC_MOVETOFRONT is the move-to-front threshold: if the hash lookup
97 97 * depth exceeds this value, we move the looked-up entry to the front of
98 98 * its hash chain. The idea is to make sure that the most frequently
99 99 * accessed entries are found most quickly (by keeping them near the
100 100 * front of their hash chains).
101 101 */
102 102 #define NC_MOVETOFRONT 2
103 103
104 104 /*
105 105 *
106 106 * DNLC_MAX_RELE is used to size an array on the stack when releasing
107 107 * vnodes. This array is used rather than calling VN_RELE() inline because
108 108 * all dnlc locks must be dropped by that time in order to avoid a
109 109 * possible deadlock. This deadlock occurs when the dnlc holds the last
110 110 * reference to the vnode and so the VOP_INACTIVE vector is called which
111 111 * can in turn call back into the dnlc. A global array was used but had
112 112 * many problems:
113 113 * 1) Actually doesn't have an upper bound on the array size as
114 114 * entries can be added after starting the purge.
115 115 * 2) The locking scheme causes a hang.
116 116 * 3) Caused serialisation on the global lock.
117 117 * 4) The array was often unnecessarily huge.
118 118 *
119 119 * Note the current value 8 allows up to 4 cache entries (to be purged
120 120 * from each hash chain), before having to cycle around and retry.
121 121 * This ought to be ample given that nc_hashavelen is typically very small.
122 122 */
123 123 #define DNLC_MAX_RELE 8 /* must be even */
124 124
125 125 /*
126 126 * Hash table of name cache entries for fast lookup, dynamically
127 127 * allocated at startup.
128 128 */
129 129 nc_hash_t *nc_hash;
130 130
131 131 /*
132 132 * Rotors. Used to select entries on a round-robin basis.
133 133 */
134 134 static nc_hash_t *dnlc_purge_fs1_rotor;
135 135 static nc_hash_t *dnlc_free_rotor;
136 136
137 137 /*
138 138 * # of dnlc entries (uninitialized)
139 139 *
140 140 * the initial value was chosen as being
141 141 * a random string of bits, probably not
142 142 * normally chosen by a systems administrator
143 143 */
144 144 int ncsize = -1;
145 145 volatile uint32_t dnlc_nentries = 0; /* current num of name cache entries */
146 146 static int nc_hashsz; /* size of hash table */
147 147 static int nc_hashmask; /* size of hash table minus 1 */
148 148
149 149 /*
150 150 * The dnlc_reduce_cache() taskq queue is activated when there are
151 151 * ncsize name cache entries and if no parameter is provided, it reduces
152 152 * the size down to dnlc_nentries_low_water, which is by default one
153 153 * hundreth less (or 99%) of ncsize.
154 154 *
155 155 * If a parameter is provided to dnlc_reduce_cache(), then we reduce
156 156 * the size down based on ncsize_onepercent - where ncsize_onepercent
157 157 * is 1% of ncsize; however, we never let dnlc_reduce_cache() reduce
158 158 * the size below 3% of ncsize (ncsize_min_percent).
159 159 */
160 160 #define DNLC_LOW_WATER_DIVISOR_DEFAULT 100
161 161 uint_t dnlc_low_water_divisor = DNLC_LOW_WATER_DIVISOR_DEFAULT;
162 162 uint_t dnlc_nentries_low_water;
163 163 int dnlc_reduce_idle = 1; /* no locking needed */
164 164 uint_t ncsize_onepercent;
165 165 uint_t ncsize_min_percent;
166 166
167 167 /*
168 168 * If dnlc_nentries hits dnlc_max_nentries (twice ncsize)
169 169 * then this means the dnlc_reduce_cache() taskq is failing to
170 170 * keep up. In this case we refuse to add new entries to the dnlc
171 171 * until the taskq catches up.
172 172 */
173 173 uint_t dnlc_max_nentries; /* twice ncsize */
174 174 uint64_t dnlc_max_nentries_cnt = 0; /* statistic on times we failed */
175 175
176 176 /*
177 177 * Tunable to define when we should just remove items from
178 178 * the end of the chain.
179 179 */
180 180 #define DNLC_LONG_CHAIN 8
181 181 uint_t dnlc_long_chain = DNLC_LONG_CHAIN;
182 182
183 183 /*
184 184 * ncstats has been deprecated, due to the integer size of the counters
185 185 * which can easily overflow in the dnlc.
186 186 * It is maintained (at some expense) for compatability.
187 187 * The preferred interface is the kstat accessible nc_stats below.
188 188 */
189 189 struct ncstats ncstats;
190 190
191 191 struct nc_stats ncs = {
192 192 { "hits", KSTAT_DATA_UINT64 },
193 193 { "misses", KSTAT_DATA_UINT64 },
194 194 { "negative_cache_hits", KSTAT_DATA_UINT64 },
195 195 { "enters", KSTAT_DATA_UINT64 },
196 196 { "double_enters", KSTAT_DATA_UINT64 },
197 197 { "purge_total_entries", KSTAT_DATA_UINT64 },
198 198 { "purge_all", KSTAT_DATA_UINT64 },
199 199 { "purge_vp", KSTAT_DATA_UINT64 },
200 200 { "purge_vfs", KSTAT_DATA_UINT64 },
201 201 { "purge_fs1", KSTAT_DATA_UINT64 },
202 202 { "pick_free", KSTAT_DATA_UINT64 },
203 203 { "pick_heuristic", KSTAT_DATA_UINT64 },
204 204 { "pick_last", KSTAT_DATA_UINT64 },
205 205
206 206 /* directory caching stats */
207 207
208 208 { "dir_hits", KSTAT_DATA_UINT64 },
209 209 { "dir_misses", KSTAT_DATA_UINT64 },
210 210 { "dir_cached_current", KSTAT_DATA_UINT64 },
211 211 { "dir_entries_cached_current", KSTAT_DATA_UINT64 },
212 212 { "dir_cached_total", KSTAT_DATA_UINT64 },
213 213 { "dir_start_no_memory", KSTAT_DATA_UINT64 },
214 214 { "dir_add_no_memory", KSTAT_DATA_UINT64 },
215 215 { "dir_add_abort", KSTAT_DATA_UINT64 },
216 216 { "dir_add_max", KSTAT_DATA_UINT64 },
217 217 { "dir_remove_entry_fail", KSTAT_DATA_UINT64 },
218 218 { "dir_remove_space_fail", KSTAT_DATA_UINT64 },
219 219 { "dir_update_fail", KSTAT_DATA_UINT64 },
220 220 { "dir_fini_purge", KSTAT_DATA_UINT64 },
221 221 { "dir_reclaim_last", KSTAT_DATA_UINT64 },
222 222 { "dir_reclaim_any", KSTAT_DATA_UINT64 },
223 223 };
224 224
225 225 static int doingcache = 1;
226 226
227 227 vnode_t negative_cache_vnode;
228 228
229 229 /*
230 230 * Insert entry at the front of the queue
231 231 */
232 232 #define nc_inshash(ncp, hp) \
233 233 { \
234 234 (ncp)->hash_next = (hp)->hash_next; \
235 235 (ncp)->hash_prev = (ncache_t *)(hp); \
236 236 (hp)->hash_next->hash_prev = (ncp); \
237 237 (hp)->hash_next = (ncp); \
238 238 }
239 239
240 240 /*
241 241 * Remove entry from hash queue
242 242 */
243 243 #define nc_rmhash(ncp) \
244 244 { \
245 245 (ncp)->hash_prev->hash_next = (ncp)->hash_next; \
246 246 (ncp)->hash_next->hash_prev = (ncp)->hash_prev; \
↓ open down ↓ |
246 lines elided |
↑ open up ↑ |
247 247 (ncp)->hash_prev = NULL; \
248 248 (ncp)->hash_next = NULL; \
249 249 }
250 250
251 251 /*
252 252 * Free an entry.
253 253 */
254 254 #define dnlc_free(ncp) \
255 255 { \
256 256 kmem_free((ncp), sizeof (ncache_t) + (ncp)->namlen); \
257 - atomic_add_32(&dnlc_nentries, -1); \
257 + atomic_dec_32(&dnlc_nentries); \
258 258 }
259 259
260 260
261 261 /*
262 262 * Cached directory info.
263 263 * ======================
264 264 */
265 265
266 266 /*
267 267 * Cached directory free space hash function.
268 268 * Needs the free space handle and the dcp to get the hash table size
269 269 * Returns the hash index.
270 270 */
271 271 #define DDFHASH(handle, dcp) ((handle >> 2) & (dcp)->dc_fhash_mask)
272 272
273 273 /*
274 274 * Cached directory name entry hash function.
275 275 * Uses the name and returns in the input arguments the hash and the name
276 276 * length.
277 277 */
278 278 #define DNLC_DIR_HASH(name, hash, namelen) \
279 279 { \
280 280 char Xc; \
281 281 const char *Xcp; \
282 282 hash = *name; \
283 283 for (Xcp = (name + 1); (Xc = *Xcp) != 0; Xcp++) \
284 284 hash = (hash << 4) + hash + Xc; \
285 285 ASSERT((Xcp - (name)) <= ((1 << NBBY) - 1)); \
286 286 namelen = Xcp - (name); \
287 287 }
288 288
289 289 /* special dircache_t pointer to indicate error should be returned */
290 290 /*
291 291 * The anchor directory cache pointer can contain 3 types of values,
292 292 * 1) NULL: No directory cache
293 293 * 2) DC_RET_LOW_MEM (-1): There was a directory cache that found to be
294 294 * too big or a memory shortage occurred. This value remains in the
295 295 * pointer until a dnlc_dir_start() which returns the a DNOMEM error.
296 296 * This is kludgy but efficient and only visible in this source file.
297 297 * 3) A valid cache pointer.
298 298 */
299 299 #define DC_RET_LOW_MEM (dircache_t *)1
300 300 #define VALID_DIR_CACHE(dcp) ((dircache_t *)(dcp) > DC_RET_LOW_MEM)
301 301
302 302 /* Tunables */
303 303 uint_t dnlc_dir_enable = 1; /* disable caching directories by setting to 0 */
304 304 uint_t dnlc_dir_min_size = 40; /* min no of directory entries before caching */
305 305 uint_t dnlc_dir_max_size = UINT_MAX; /* ditto maximum */
306 306 uint_t dnlc_dir_hash_size_shift = 3; /* 8 entries per hash bucket */
307 307 uint_t dnlc_dir_min_reclaim = 350000; /* approx 1MB of dcentrys */
308 308 /*
309 309 * dnlc_dir_hash_resize_shift determines when the hash tables
310 310 * get re-adjusted due to growth or shrinkage
311 311 * - currently 2 indicating that there can be at most 4
312 312 * times or at least one quarter the number of entries
313 313 * before hash table readjustment. Note that with
314 314 * dnlc_dir_hash_size_shift above set at 3 this would
315 315 * mean readjustment would occur if the average number
316 316 * of entries went above 32 or below 2
317 317 */
318 318 uint_t dnlc_dir_hash_resize_shift = 2; /* readjust rate */
319 319
320 320 static kmem_cache_t *dnlc_dir_space_cache; /* free space entry cache */
321 321 static dchead_t dc_head; /* anchor of cached directories */
322 322
323 323 /* Prototypes */
324 324 static ncache_t *dnlc_get(uchar_t namlen);
325 325 static ncache_t *dnlc_search(vnode_t *dp, const char *name, uchar_t namlen,
326 326 int hash);
327 327 static void dnlc_dir_reclaim(void *unused);
328 328 static void dnlc_dir_abort(dircache_t *dcp);
329 329 static void dnlc_dir_adjust_fhash(dircache_t *dcp);
330 330 static void dnlc_dir_adjust_nhash(dircache_t *dcp);
331 331 static void do_dnlc_reduce_cache(void *);
332 332
333 333
334 334 /*
335 335 * Initialize the directory cache.
336 336 */
337 337 void
338 338 dnlc_init()
339 339 {
340 340 nc_hash_t *hp;
341 341 kstat_t *ksp;
342 342 int i;
343 343
344 344 /*
345 345 * Set up the size of the dnlc (ncsize) and its low water mark.
346 346 */
347 347 if (ncsize == -1) {
348 348 /* calculate a reasonable size for the low water */
349 349 dnlc_nentries_low_water = 4 * (v.v_proc + maxusers) + 320;
350 350 ncsize = dnlc_nentries_low_water +
351 351 (dnlc_nentries_low_water / dnlc_low_water_divisor);
352 352 } else {
353 353 /* don't change the user specified ncsize */
354 354 dnlc_nentries_low_water =
355 355 ncsize - (ncsize / dnlc_low_water_divisor);
356 356 }
357 357 if (ncsize <= 0) {
358 358 doingcache = 0;
359 359 dnlc_dir_enable = 0; /* also disable directory caching */
360 360 ncsize = 0;
361 361 cmn_err(CE_NOTE, "name cache (dnlc) disabled");
362 362 return;
363 363 }
364 364 dnlc_max_nentries = ncsize * 2;
365 365 ncsize_onepercent = ncsize / 100;
366 366 ncsize_min_percent = ncsize_onepercent * 3;
367 367
368 368 /*
369 369 * Initialise the hash table.
370 370 * Compute hash size rounding to the next power of two.
371 371 */
372 372 nc_hashsz = ncsize / nc_hashavelen;
373 373 nc_hashsz = 1 << highbit(nc_hashsz);
374 374 nc_hashmask = nc_hashsz - 1;
375 375 nc_hash = kmem_zalloc(nc_hashsz * sizeof (*nc_hash), KM_SLEEP);
376 376 for (i = 0; i < nc_hashsz; i++) {
377 377 hp = (nc_hash_t *)&nc_hash[i];
378 378 mutex_init(&hp->hash_lock, NULL, MUTEX_DEFAULT, NULL);
379 379 hp->hash_next = (ncache_t *)hp;
380 380 hp->hash_prev = (ncache_t *)hp;
381 381 }
382 382
383 383 /*
384 384 * Initialize rotors
385 385 */
386 386 dnlc_free_rotor = dnlc_purge_fs1_rotor = &nc_hash[0];
387 387
388 388 /*
389 389 * Set up the directory caching to use kmem_cache_alloc
390 390 * for its free space entries so that we can get a callback
391 391 * when the system is short on memory, to allow us to free
392 392 * up some memory. we don't use the constructor/deconstructor
393 393 * functions.
394 394 */
395 395 dnlc_dir_space_cache = kmem_cache_create("dnlc_space_cache",
396 396 sizeof (dcfree_t), 0, NULL, NULL, dnlc_dir_reclaim, NULL,
397 397 NULL, 0);
398 398
399 399 /*
400 400 * Initialise the head of the cached directory structures
401 401 */
402 402 mutex_init(&dc_head.dch_lock, NULL, MUTEX_DEFAULT, NULL);
403 403 dc_head.dch_next = (dircache_t *)&dc_head;
404 404 dc_head.dch_prev = (dircache_t *)&dc_head;
405 405
406 406 /*
407 407 * Initialise the reference count of the negative cache vnode to 1
408 408 * so that it never goes away (VOP_INACTIVE isn't called on it).
409 409 */
410 410 negative_cache_vnode.v_count = 1;
411 411 negative_cache_vnode.v_count_dnlc = 0;
412 412
413 413 /*
414 414 * Initialise kstats - both the old compatability raw kind and
415 415 * the more extensive named stats.
416 416 */
417 417 ksp = kstat_create("unix", 0, "ncstats", "misc", KSTAT_TYPE_RAW,
418 418 sizeof (struct ncstats), KSTAT_FLAG_VIRTUAL);
419 419 if (ksp) {
420 420 ksp->ks_data = (void *) &ncstats;
421 421 kstat_install(ksp);
422 422 }
423 423 ksp = kstat_create("unix", 0, "dnlcstats", "misc", KSTAT_TYPE_NAMED,
424 424 sizeof (ncs) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
425 425 if (ksp) {
426 426 ksp->ks_data = (void *) &ncs;
427 427 kstat_install(ksp);
428 428 }
429 429 }
430 430
431 431 /*
432 432 * Add a name to the directory cache.
433 433 */
434 434 void
435 435 dnlc_enter(vnode_t *dp, const char *name, vnode_t *vp)
436 436 {
437 437 ncache_t *ncp;
438 438 nc_hash_t *hp;
439 439 uchar_t namlen;
440 440 int hash;
441 441
442 442 TRACE_0(TR_FAC_NFS, TR_DNLC_ENTER_START, "dnlc_enter_start:");
443 443
444 444 if (!doingcache) {
445 445 TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
446 446 "dnlc_enter_end:(%S) %d", "not caching", 0);
447 447 return;
448 448 }
449 449
450 450 /*
451 451 * Get a new dnlc entry. Assume the entry won't be in the cache
452 452 * and initialize it now
453 453 */
454 454 DNLCHASH(name, dp, hash, namlen);
455 455 if ((ncp = dnlc_get(namlen)) == NULL)
456 456 return;
457 457 ncp->dp = dp;
458 458 VN_HOLD_DNLC(dp);
459 459 ncp->vp = vp;
460 460 VN_HOLD_DNLC(vp);
461 461 bcopy(name, ncp->name, namlen + 1); /* name and null */
462 462 ncp->hash = hash;
463 463 hp = &nc_hash[hash & nc_hashmask];
464 464
465 465 mutex_enter(&hp->hash_lock);
466 466 if (dnlc_search(dp, name, namlen, hash) != NULL) {
467 467 mutex_exit(&hp->hash_lock);
468 468 ncstats.dbl_enters++;
469 469 ncs.ncs_dbl_enters.value.ui64++;
470 470 VN_RELE_DNLC(dp);
471 471 VN_RELE_DNLC(vp);
472 472 dnlc_free(ncp); /* crfree done here */
473 473 TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
474 474 "dnlc_enter_end:(%S) %d", "dbl enter", ncstats.dbl_enters);
475 475 return;
476 476 }
477 477 /*
478 478 * Insert back into the hash chain.
479 479 */
480 480 nc_inshash(ncp, hp);
481 481 mutex_exit(&hp->hash_lock);
482 482 ncstats.enters++;
483 483 ncs.ncs_enters.value.ui64++;
484 484 TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
485 485 "dnlc_enter_end:(%S) %d", "done", ncstats.enters);
486 486 }
487 487
488 488 /*
489 489 * Add a name to the directory cache.
490 490 *
491 491 * This function is basically identical with
492 492 * dnlc_enter(). The difference is that when the
493 493 * desired dnlc entry is found, the vnode in the
494 494 * ncache is compared with the vnode passed in.
495 495 *
496 496 * If they are not equal then the ncache is
497 497 * updated with the passed in vnode. Otherwise
498 498 * it just frees up the newly allocated dnlc entry.
499 499 */
500 500 void
501 501 dnlc_update(vnode_t *dp, const char *name, vnode_t *vp)
502 502 {
503 503 ncache_t *ncp;
504 504 ncache_t *tcp;
505 505 vnode_t *tvp;
506 506 nc_hash_t *hp;
507 507 int hash;
508 508 uchar_t namlen;
509 509
510 510 TRACE_0(TR_FAC_NFS, TR_DNLC_ENTER_START, "dnlc_update_start:");
511 511
512 512 if (!doingcache) {
513 513 TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
514 514 "dnlc_update_end:(%S) %d", "not caching", 0);
515 515 return;
516 516 }
517 517
518 518 /*
519 519 * Get a new dnlc entry and initialize it now.
520 520 * If we fail to get a new entry, call dnlc_remove() to purge
521 521 * any existing dnlc entry including negative cache (DNLC_NO_VNODE)
522 522 * entry.
523 523 * Failure to clear an existing entry could result in false dnlc
524 524 * lookup (negative/stale entry).
525 525 */
526 526 DNLCHASH(name, dp, hash, namlen);
527 527 if ((ncp = dnlc_get(namlen)) == NULL) {
528 528 dnlc_remove(dp, name);
529 529 return;
530 530 }
531 531 ncp->dp = dp;
532 532 VN_HOLD_DNLC(dp);
533 533 ncp->vp = vp;
534 534 VN_HOLD_DNLC(vp);
535 535 bcopy(name, ncp->name, namlen + 1); /* name and null */
536 536 ncp->hash = hash;
537 537 hp = &nc_hash[hash & nc_hashmask];
538 538
539 539 mutex_enter(&hp->hash_lock);
540 540 if ((tcp = dnlc_search(dp, name, namlen, hash)) != NULL) {
541 541 if (tcp->vp != vp) {
542 542 tvp = tcp->vp;
543 543 tcp->vp = vp;
544 544 mutex_exit(&hp->hash_lock);
545 545 VN_RELE_DNLC(tvp);
546 546 ncstats.enters++;
547 547 ncs.ncs_enters.value.ui64++;
548 548 TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
549 549 "dnlc_update_end:(%S) %d", "done", ncstats.enters);
550 550 } else {
551 551 mutex_exit(&hp->hash_lock);
552 552 VN_RELE_DNLC(vp);
553 553 ncstats.dbl_enters++;
554 554 ncs.ncs_dbl_enters.value.ui64++;
555 555 TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
556 556 "dnlc_update_end:(%S) %d",
557 557 "dbl enter", ncstats.dbl_enters);
558 558 }
559 559 VN_RELE_DNLC(dp);
560 560 dnlc_free(ncp); /* crfree done here */
561 561 return;
562 562 }
563 563 /*
564 564 * insert the new entry, since it is not in dnlc yet
565 565 */
566 566 nc_inshash(ncp, hp);
567 567 mutex_exit(&hp->hash_lock);
568 568 ncstats.enters++;
569 569 ncs.ncs_enters.value.ui64++;
570 570 TRACE_2(TR_FAC_NFS, TR_DNLC_ENTER_END,
571 571 "dnlc_update_end:(%S) %d", "done", ncstats.enters);
572 572 }
573 573
574 574 /*
575 575 * Look up a name in the directory name cache.
576 576 *
577 577 * Return a doubly-held vnode if found: one hold so that it may
578 578 * remain in the cache for other users, the other hold so that
579 579 * the cache is not re-cycled and the identity of the vnode is
580 580 * lost before the caller can use the vnode.
581 581 */
582 582 vnode_t *
583 583 dnlc_lookup(vnode_t *dp, const char *name)
584 584 {
585 585 ncache_t *ncp;
586 586 nc_hash_t *hp;
587 587 vnode_t *vp;
588 588 int hash, depth;
589 589 uchar_t namlen;
590 590
591 591 TRACE_2(TR_FAC_NFS, TR_DNLC_LOOKUP_START,
592 592 "dnlc_lookup_start:dp %x name %s", dp, name);
593 593
594 594 if (!doingcache) {
595 595 TRACE_4(TR_FAC_NFS, TR_DNLC_LOOKUP_END,
596 596 "dnlc_lookup_end:%S %d vp %x name %s",
597 597 "not_caching", 0, NULL, name);
598 598 return (NULL);
599 599 }
600 600
601 601 DNLCHASH(name, dp, hash, namlen);
602 602 depth = 1;
603 603 hp = &nc_hash[hash & nc_hashmask];
604 604 mutex_enter(&hp->hash_lock);
605 605
606 606 for (ncp = hp->hash_next; ncp != (ncache_t *)hp;
607 607 ncp = ncp->hash_next) {
608 608 if (ncp->hash == hash && /* fast signature check */
609 609 ncp->dp == dp &&
610 610 ncp->namlen == namlen &&
611 611 bcmp(ncp->name, name, namlen) == 0) {
612 612 /*
613 613 * Move this entry to the head of its hash chain
614 614 * if it's not already close.
615 615 */
616 616 if (depth > NC_MOVETOFRONT) {
617 617 ncache_t *next = ncp->hash_next;
618 618 ncache_t *prev = ncp->hash_prev;
619 619
620 620 prev->hash_next = next;
621 621 next->hash_prev = prev;
622 622 ncp->hash_next = next = hp->hash_next;
623 623 ncp->hash_prev = (ncache_t *)hp;
624 624 next->hash_prev = ncp;
625 625 hp->hash_next = ncp;
626 626
627 627 ncstats.move_to_front++;
628 628 }
629 629
630 630 /*
631 631 * Put a hold on the vnode now so its identity
632 632 * can't change before the caller has a chance to
633 633 * put a hold on it.
634 634 */
635 635 vp = ncp->vp;
636 636 VN_HOLD_CALLER(vp); /* VN_HOLD 1 of 2 in this file */
637 637 mutex_exit(&hp->hash_lock);
638 638 ncstats.hits++;
639 639 ncs.ncs_hits.value.ui64++;
640 640 if (vp == DNLC_NO_VNODE) {
641 641 ncs.ncs_neg_hits.value.ui64++;
642 642 }
643 643 TRACE_4(TR_FAC_NFS, TR_DNLC_LOOKUP_END,
644 644 "dnlc_lookup_end:%S %d vp %x name %s", "hit",
645 645 ncstats.hits, vp, name);
646 646 return (vp);
647 647 }
648 648 depth++;
649 649 }
650 650
651 651 mutex_exit(&hp->hash_lock);
652 652 ncstats.misses++;
653 653 ncs.ncs_misses.value.ui64++;
654 654 TRACE_4(TR_FAC_NFS, TR_DNLC_LOOKUP_END,
655 655 "dnlc_lookup_end:%S %d vp %x name %s", "miss", ncstats.misses,
656 656 NULL, name);
657 657 return (NULL);
658 658 }
659 659
660 660 /*
661 661 * Remove an entry in the directory name cache.
662 662 */
663 663 void
664 664 dnlc_remove(vnode_t *dp, const char *name)
665 665 {
666 666 ncache_t *ncp;
667 667 nc_hash_t *hp;
668 668 uchar_t namlen;
669 669 int hash;
670 670
671 671 if (!doingcache)
672 672 return;
673 673 DNLCHASH(name, dp, hash, namlen);
674 674 hp = &nc_hash[hash & nc_hashmask];
675 675
676 676 mutex_enter(&hp->hash_lock);
677 677 if (ncp = dnlc_search(dp, name, namlen, hash)) {
678 678 /*
679 679 * Free up the entry
680 680 */
681 681 nc_rmhash(ncp);
682 682 mutex_exit(&hp->hash_lock);
683 683 VN_RELE_DNLC(ncp->vp);
684 684 VN_RELE_DNLC(ncp->dp);
685 685 dnlc_free(ncp);
686 686 return;
687 687 }
688 688 mutex_exit(&hp->hash_lock);
689 689 }
690 690
691 691 /*
692 692 * Purge the entire cache.
693 693 */
694 694 void
695 695 dnlc_purge()
696 696 {
697 697 nc_hash_t *nch;
698 698 ncache_t *ncp;
699 699 int index;
700 700 int i;
701 701 vnode_t *nc_rele[DNLC_MAX_RELE];
702 702
703 703 if (!doingcache)
704 704 return;
705 705
706 706 ncstats.purges++;
707 707 ncs.ncs_purge_all.value.ui64++;
708 708
709 709 for (nch = nc_hash; nch < &nc_hash[nc_hashsz]; nch++) {
710 710 index = 0;
711 711 mutex_enter(&nch->hash_lock);
712 712 ncp = nch->hash_next;
713 713 while (ncp != (ncache_t *)nch) {
714 714 ncache_t *np;
715 715
716 716 np = ncp->hash_next;
717 717 nc_rele[index++] = ncp->vp;
718 718 nc_rele[index++] = ncp->dp;
719 719
720 720 nc_rmhash(ncp);
721 721 dnlc_free(ncp);
722 722 ncp = np;
723 723 ncs.ncs_purge_total.value.ui64++;
724 724 if (index == DNLC_MAX_RELE)
725 725 break;
726 726 }
727 727 mutex_exit(&nch->hash_lock);
728 728
729 729 /* Release holds on all the vnodes now that we have no locks */
730 730 for (i = 0; i < index; i++) {
731 731 VN_RELE_DNLC(nc_rele[i]);
732 732 }
733 733 if (ncp != (ncache_t *)nch) {
734 734 nch--; /* Do current hash chain again */
735 735 }
736 736 }
737 737 }
738 738
739 739 /*
740 740 * Purge any cache entries referencing a vnode. Exit as soon as the dnlc
741 741 * reference count goes to zero (the caller still holds a reference).
742 742 */
743 743 void
744 744 dnlc_purge_vp(vnode_t *vp)
745 745 {
746 746 nc_hash_t *nch;
747 747 ncache_t *ncp;
748 748 int index;
749 749 vnode_t *nc_rele[DNLC_MAX_RELE];
750 750
751 751 ASSERT(vp->v_count > 0);
752 752 if (vp->v_count_dnlc == 0) {
753 753 return;
754 754 }
755 755
756 756 if (!doingcache)
757 757 return;
758 758
759 759 ncstats.purges++;
760 760 ncs.ncs_purge_vp.value.ui64++;
761 761
762 762 for (nch = nc_hash; nch < &nc_hash[nc_hashsz]; nch++) {
763 763 index = 0;
764 764 mutex_enter(&nch->hash_lock);
765 765 ncp = nch->hash_next;
766 766 while (ncp != (ncache_t *)nch) {
767 767 ncache_t *np;
768 768
769 769 np = ncp->hash_next;
770 770 if (ncp->dp == vp || ncp->vp == vp) {
771 771 nc_rele[index++] = ncp->vp;
772 772 nc_rele[index++] = ncp->dp;
773 773 nc_rmhash(ncp);
774 774 dnlc_free(ncp);
775 775 ncs.ncs_purge_total.value.ui64++;
776 776 if (index == DNLC_MAX_RELE) {
777 777 ncp = np;
778 778 break;
779 779 }
780 780 }
781 781 ncp = np;
782 782 }
783 783 mutex_exit(&nch->hash_lock);
784 784
785 785 /* Release holds on all the vnodes now that we have no locks */
786 786 while (index) {
787 787 VN_RELE_DNLC(nc_rele[--index]);
788 788 }
789 789
790 790 if (vp->v_count_dnlc == 0) {
791 791 return;
792 792 }
793 793
794 794 if (ncp != (ncache_t *)nch) {
795 795 nch--; /* Do current hash chain again */
796 796 }
797 797 }
798 798 }
799 799
800 800 /*
801 801 * Purge cache entries referencing a vfsp. Caller supplies a count
802 802 * of entries to purge; up to that many will be freed. A count of
803 803 * zero indicates that all such entries should be purged. Returns
804 804 * the number of entries that were purged.
805 805 */
806 806 int
807 807 dnlc_purge_vfsp(vfs_t *vfsp, int count)
808 808 {
809 809 nc_hash_t *nch;
810 810 ncache_t *ncp;
811 811 int n = 0;
812 812 int index;
813 813 int i;
814 814 vnode_t *nc_rele[DNLC_MAX_RELE];
815 815
816 816 if (!doingcache)
817 817 return (0);
818 818
819 819 ncstats.purges++;
820 820 ncs.ncs_purge_vfs.value.ui64++;
821 821
822 822 for (nch = nc_hash; nch < &nc_hash[nc_hashsz]; nch++) {
823 823 index = 0;
824 824 mutex_enter(&nch->hash_lock);
825 825 ncp = nch->hash_next;
826 826 while (ncp != (ncache_t *)nch) {
827 827 ncache_t *np;
828 828
829 829 np = ncp->hash_next;
830 830 ASSERT(ncp->dp != NULL);
831 831 ASSERT(ncp->vp != NULL);
832 832 if ((ncp->dp->v_vfsp == vfsp) ||
833 833 (ncp->vp->v_vfsp == vfsp)) {
834 834 n++;
835 835 nc_rele[index++] = ncp->vp;
836 836 nc_rele[index++] = ncp->dp;
837 837 nc_rmhash(ncp);
838 838 dnlc_free(ncp);
839 839 ncs.ncs_purge_total.value.ui64++;
840 840 if (index == DNLC_MAX_RELE) {
841 841 ncp = np;
842 842 break;
843 843 }
844 844 if (count != 0 && n >= count) {
845 845 break;
846 846 }
847 847 }
848 848 ncp = np;
849 849 }
850 850 mutex_exit(&nch->hash_lock);
851 851 /* Release holds on all the vnodes now that we have no locks */
852 852 for (i = 0; i < index; i++) {
853 853 VN_RELE_DNLC(nc_rele[i]);
854 854 }
855 855 if (count != 0 && n >= count) {
856 856 return (n);
857 857 }
858 858 if (ncp != (ncache_t *)nch) {
859 859 nch--; /* Do current hash chain again */
860 860 }
861 861 }
862 862 return (n);
863 863 }
864 864
865 865 /*
866 866 * Purge 1 entry from the dnlc that is part of the filesystem(s)
867 867 * represented by 'vop'. The purpose of this routine is to allow
868 868 * users of the dnlc to free a vnode that is being held by the dnlc.
869 869 *
870 870 * If we find a vnode that we release which will result in
871 871 * freeing the underlying vnode (count was 1), return 1, 0
872 872 * if no appropriate vnodes found.
873 873 *
874 874 * Note, vop is not the 'right' identifier for a filesystem.
875 875 */
876 876 int
877 877 dnlc_fs_purge1(vnodeops_t *vop)
878 878 {
879 879 nc_hash_t *end;
880 880 nc_hash_t *hp;
881 881 ncache_t *ncp;
882 882 vnode_t *vp;
883 883
884 884 if (!doingcache)
885 885 return (0);
886 886
887 887 ncs.ncs_purge_fs1.value.ui64++;
888 888
889 889 /*
890 890 * Scan the dnlc entries looking for a likely candidate.
891 891 */
892 892 hp = end = dnlc_purge_fs1_rotor;
893 893
894 894 do {
895 895 if (++hp == &nc_hash[nc_hashsz])
896 896 hp = nc_hash;
897 897 dnlc_purge_fs1_rotor = hp;
898 898 if (hp->hash_next == (ncache_t *)hp)
899 899 continue;
900 900 mutex_enter(&hp->hash_lock);
901 901 for (ncp = hp->hash_prev;
902 902 ncp != (ncache_t *)hp;
903 903 ncp = ncp->hash_prev) {
904 904 vp = ncp->vp;
905 905 if (!vn_has_cached_data(vp) && (vp->v_count == 1) &&
906 906 vn_matchops(vp, vop))
907 907 break;
908 908 }
909 909 if (ncp != (ncache_t *)hp) {
910 910 nc_rmhash(ncp);
911 911 mutex_exit(&hp->hash_lock);
912 912 VN_RELE_DNLC(ncp->dp);
913 913 VN_RELE_DNLC(vp)
914 914 dnlc_free(ncp);
915 915 ncs.ncs_purge_total.value.ui64++;
916 916 return (1);
917 917 }
918 918 mutex_exit(&hp->hash_lock);
919 919 } while (hp != end);
920 920 return (0);
921 921 }
922 922
923 923 /*
924 924 * Perform a reverse lookup in the DNLC. This will find the first occurrence of
925 925 * the vnode. If successful, it will return the vnode of the parent, and the
926 926 * name of the entry in the given buffer. If it cannot be found, or the buffer
927 927 * is too small, then it will return NULL. Note that this is a highly
928 928 * inefficient function, since the DNLC is constructed solely for forward
929 929 * lookups.
930 930 */
931 931 vnode_t *
932 932 dnlc_reverse_lookup(vnode_t *vp, char *buf, size_t buflen)
933 933 {
934 934 nc_hash_t *nch;
935 935 ncache_t *ncp;
936 936 vnode_t *pvp;
937 937
938 938 if (!doingcache)
939 939 return (NULL);
940 940
941 941 for (nch = nc_hash; nch < &nc_hash[nc_hashsz]; nch++) {
942 942 mutex_enter(&nch->hash_lock);
943 943 ncp = nch->hash_next;
944 944 while (ncp != (ncache_t *)nch) {
945 945 /*
946 946 * We ignore '..' entries since it can create
947 947 * confusion and infinite loops.
948 948 */
949 949 if (ncp->vp == vp && !(ncp->namlen == 2 &&
950 950 0 == bcmp(ncp->name, "..", 2)) &&
951 951 ncp->namlen < buflen) {
952 952 bcopy(ncp->name, buf, ncp->namlen);
953 953 buf[ncp->namlen] = '\0';
954 954 pvp = ncp->dp;
955 955 /* VN_HOLD 2 of 2 in this file */
956 956 VN_HOLD_CALLER(pvp);
957 957 mutex_exit(&nch->hash_lock);
958 958 return (pvp);
959 959 }
960 960 ncp = ncp->hash_next;
961 961 }
962 962 mutex_exit(&nch->hash_lock);
963 963 }
964 964
965 965 return (NULL);
966 966 }
967 967 /*
968 968 * Utility routine to search for a cache entry. Return the
969 969 * ncache entry if found, NULL otherwise.
970 970 */
971 971 static ncache_t *
972 972 dnlc_search(vnode_t *dp, const char *name, uchar_t namlen, int hash)
973 973 {
974 974 nc_hash_t *hp;
975 975 ncache_t *ncp;
976 976
977 977 hp = &nc_hash[hash & nc_hashmask];
978 978
979 979 for (ncp = hp->hash_next; ncp != (ncache_t *)hp; ncp = ncp->hash_next) {
980 980 if (ncp->hash == hash &&
981 981 ncp->dp == dp &&
982 982 ncp->namlen == namlen &&
983 983 bcmp(ncp->name, name, namlen) == 0)
984 984 return (ncp);
985 985 }
986 986 return (NULL);
987 987 }
988 988
989 989 #if ((1 << NBBY) - 1) < (MAXNAMELEN - 1)
990 990 #error ncache_t name length representation is too small
991 991 #endif
992 992
993 993 void
994 994 dnlc_reduce_cache(void *reduce_percent)
995 995 {
996 996 if (dnlc_reduce_idle && (dnlc_nentries >= ncsize || reduce_percent)) {
997 997 dnlc_reduce_idle = 0;
998 998 if ((taskq_dispatch(system_taskq, do_dnlc_reduce_cache,
999 999 reduce_percent, TQ_NOSLEEP)) == NULL)
1000 1000 dnlc_reduce_idle = 1;
1001 1001 }
1002 1002 }
1003 1003
1004 1004 /*
1005 1005 * Get a new name cache entry.
1006 1006 * If the dnlc_reduce_cache() taskq isn't keeping up with demand, or memory
1007 1007 * is short then just return NULL. If we're over ncsize then kick off a
1008 1008 * thread to free some in use entries down to dnlc_nentries_low_water.
1009 1009 * Caller must initialise all fields except namlen.
1010 1010 * Component names are defined to be less than MAXNAMELEN
1011 1011 * which includes a null.
1012 1012 */
1013 1013 static ncache_t *
1014 1014 dnlc_get(uchar_t namlen)
1015 1015 {
1016 1016 ncache_t *ncp;
↓ open down ↓ |
749 lines elided |
↑ open up ↑ |
1017 1017
1018 1018 if (dnlc_nentries > dnlc_max_nentries) {
1019 1019 dnlc_max_nentries_cnt++; /* keep a statistic */
1020 1020 return (NULL);
1021 1021 }
1022 1022 ncp = kmem_alloc(sizeof (ncache_t) + namlen, KM_NOSLEEP);
1023 1023 if (ncp == NULL) {
1024 1024 return (NULL);
1025 1025 }
1026 1026 ncp->namlen = namlen;
1027 - atomic_add_32(&dnlc_nentries, 1);
1027 + atomic_inc_32(&dnlc_nentries);
1028 1028 dnlc_reduce_cache(NULL);
1029 1029 return (ncp);
1030 1030 }
1031 1031
1032 1032 /*
1033 1033 * Taskq routine to free up name cache entries to reduce the
1034 1034 * cache size to the low water mark if "reduce_percent" is not provided.
1035 1035 * If "reduce_percent" is provided, reduce cache size by
1036 1036 * (ncsize_onepercent * reduce_percent).
1037 1037 */
1038 1038 /*ARGSUSED*/
1039 1039 static void
1040 1040 do_dnlc_reduce_cache(void *reduce_percent)
1041 1041 {
1042 1042 nc_hash_t *hp = dnlc_free_rotor, *start_hp = hp;
1043 1043 vnode_t *vp;
1044 1044 ncache_t *ncp;
1045 1045 int cnt;
1046 1046 uint_t low_water = dnlc_nentries_low_water;
1047 1047
1048 1048 if (reduce_percent) {
1049 1049 uint_t reduce_cnt;
1050 1050
1051 1051 /*
1052 1052 * Never try to reduce the current number
1053 1053 * of cache entries below 3% of ncsize.
1054 1054 */
1055 1055 if (dnlc_nentries <= ncsize_min_percent) {
1056 1056 dnlc_reduce_idle = 1;
1057 1057 return;
1058 1058 }
1059 1059 reduce_cnt = ncsize_onepercent *
1060 1060 (uint_t)(uintptr_t)reduce_percent;
1061 1061
1062 1062 if (reduce_cnt > dnlc_nentries ||
1063 1063 dnlc_nentries - reduce_cnt < ncsize_min_percent)
1064 1064 low_water = ncsize_min_percent;
1065 1065 else
1066 1066 low_water = dnlc_nentries - reduce_cnt;
1067 1067 }
1068 1068
1069 1069 do {
1070 1070 /*
1071 1071 * Find the first non empty hash queue without locking.
1072 1072 * Only look at each hash queue once to avoid an infinite loop.
1073 1073 */
1074 1074 do {
1075 1075 if (++hp == &nc_hash[nc_hashsz])
1076 1076 hp = nc_hash;
1077 1077 } while (hp->hash_next == (ncache_t *)hp && hp != start_hp);
1078 1078
1079 1079 /* return if all hash queues are empty. */
1080 1080 if (hp->hash_next == (ncache_t *)hp) {
1081 1081 dnlc_reduce_idle = 1;
1082 1082 return;
1083 1083 }
1084 1084
1085 1085 mutex_enter(&hp->hash_lock);
1086 1086 for (cnt = 0, ncp = hp->hash_prev; ncp != (ncache_t *)hp;
1087 1087 ncp = ncp->hash_prev, cnt++) {
1088 1088 vp = ncp->vp;
1089 1089 /*
1090 1090 * A name cache entry with a reference count
1091 1091 * of one is only referenced by the dnlc.
1092 1092 * Also negative cache entries are purged first.
1093 1093 */
1094 1094 if (!vn_has_cached_data(vp) &&
1095 1095 ((vp->v_count == 1) || (vp == DNLC_NO_VNODE))) {
1096 1096 ncs.ncs_pick_heur.value.ui64++;
1097 1097 goto found;
1098 1098 }
1099 1099 /*
1100 1100 * Remove from the end of the chain if the
1101 1101 * chain is too long
1102 1102 */
1103 1103 if (cnt > dnlc_long_chain) {
1104 1104 ncp = hp->hash_prev;
1105 1105 ncs.ncs_pick_last.value.ui64++;
1106 1106 vp = ncp->vp;
1107 1107 goto found;
1108 1108 }
1109 1109 }
1110 1110 /* check for race and continue */
1111 1111 if (hp->hash_next == (ncache_t *)hp) {
1112 1112 mutex_exit(&hp->hash_lock);
1113 1113 continue;
1114 1114 }
1115 1115
1116 1116 ncp = hp->hash_prev; /* pick the last one in the hash queue */
1117 1117 ncs.ncs_pick_last.value.ui64++;
1118 1118 vp = ncp->vp;
1119 1119 found:
1120 1120 /*
1121 1121 * Remove from hash chain.
1122 1122 */
1123 1123 nc_rmhash(ncp);
1124 1124 mutex_exit(&hp->hash_lock);
1125 1125 VN_RELE_DNLC(vp);
1126 1126 VN_RELE_DNLC(ncp->dp);
1127 1127 dnlc_free(ncp);
1128 1128 } while (dnlc_nentries > low_water);
1129 1129
1130 1130 dnlc_free_rotor = hp;
1131 1131 dnlc_reduce_idle = 1;
1132 1132 }
1133 1133
1134 1134 /*
1135 1135 * Directory caching routines
1136 1136 * ==========================
1137 1137 *
1138 1138 * See dnlc.h for details of the interfaces below.
1139 1139 */
1140 1140
1141 1141 /*
1142 1142 * Lookup up an entry in a complete or partial directory cache.
1143 1143 */
1144 1144 dcret_t
1145 1145 dnlc_dir_lookup(dcanchor_t *dcap, const char *name, uint64_t *handle)
1146 1146 {
1147 1147 dircache_t *dcp;
1148 1148 dcentry_t *dep;
1149 1149 int hash;
1150 1150 int ret;
1151 1151 uchar_t namlen;
1152 1152
1153 1153 /*
1154 1154 * can test without lock as we are only a cache
1155 1155 */
1156 1156 if (!VALID_DIR_CACHE(dcap->dca_dircache)) {
1157 1157 ncs.ncs_dir_misses.value.ui64++;
1158 1158 return (DNOCACHE);
1159 1159 }
1160 1160
1161 1161 if (!dnlc_dir_enable) {
1162 1162 return (DNOCACHE);
1163 1163 }
1164 1164
1165 1165 mutex_enter(&dcap->dca_lock);
1166 1166 dcp = (dircache_t *)dcap->dca_dircache;
1167 1167 if (VALID_DIR_CACHE(dcp)) {
1168 1168 dcp->dc_actime = ddi_get_lbolt64();
1169 1169 DNLC_DIR_HASH(name, hash, namlen);
1170 1170 dep = dcp->dc_namehash[hash & dcp->dc_nhash_mask];
1171 1171 while (dep != NULL) {
1172 1172 if ((dep->de_hash == hash) &&
1173 1173 (namlen == dep->de_namelen) &&
1174 1174 bcmp(dep->de_name, name, namlen) == 0) {
1175 1175 *handle = dep->de_handle;
1176 1176 mutex_exit(&dcap->dca_lock);
1177 1177 ncs.ncs_dir_hits.value.ui64++;
1178 1178 return (DFOUND);
1179 1179 }
1180 1180 dep = dep->de_next;
1181 1181 }
1182 1182 if (dcp->dc_complete) {
1183 1183 ret = DNOENT;
1184 1184 } else {
1185 1185 ret = DNOCACHE;
1186 1186 }
1187 1187 mutex_exit(&dcap->dca_lock);
1188 1188 return (ret);
1189 1189 } else {
1190 1190 mutex_exit(&dcap->dca_lock);
1191 1191 ncs.ncs_dir_misses.value.ui64++;
1192 1192 return (DNOCACHE);
1193 1193 }
1194 1194 }
1195 1195
1196 1196 /*
1197 1197 * Start a new directory cache. An estimate of the number of
1198 1198 * entries is provided to as a quick check to ensure the directory
1199 1199 * is cacheable.
1200 1200 */
1201 1201 dcret_t
1202 1202 dnlc_dir_start(dcanchor_t *dcap, uint_t num_entries)
1203 1203 {
1204 1204 dircache_t *dcp;
1205 1205
1206 1206 if (!dnlc_dir_enable ||
1207 1207 (num_entries < dnlc_dir_min_size)) {
1208 1208 return (DNOCACHE);
1209 1209 }
1210 1210
1211 1211 if (num_entries > dnlc_dir_max_size) {
1212 1212 return (DTOOBIG);
1213 1213 }
1214 1214
1215 1215 mutex_enter(&dc_head.dch_lock);
1216 1216 mutex_enter(&dcap->dca_lock);
1217 1217
1218 1218 if (dcap->dca_dircache == DC_RET_LOW_MEM) {
1219 1219 dcap->dca_dircache = NULL;
1220 1220 mutex_exit(&dcap->dca_lock);
1221 1221 mutex_exit(&dc_head.dch_lock);
1222 1222 return (DNOMEM);
1223 1223 }
1224 1224
1225 1225 /*
1226 1226 * Check if there's currently a cache.
1227 1227 * This probably only occurs on a race.
1228 1228 */
1229 1229 if (dcap->dca_dircache != NULL) {
1230 1230 mutex_exit(&dcap->dca_lock);
1231 1231 mutex_exit(&dc_head.dch_lock);
1232 1232 return (DNOCACHE);
1233 1233 }
1234 1234
1235 1235 /*
1236 1236 * Allocate the dircache struct, entry and free space hash tables.
1237 1237 * These tables are initially just one entry but dynamically resize
1238 1238 * when entries and free space are added or removed.
1239 1239 */
1240 1240 if ((dcp = kmem_zalloc(sizeof (dircache_t), KM_NOSLEEP)) == NULL) {
1241 1241 goto error;
1242 1242 }
1243 1243 if ((dcp->dc_namehash = kmem_zalloc(sizeof (dcentry_t *),
1244 1244 KM_NOSLEEP)) == NULL) {
1245 1245 goto error;
1246 1246 }
1247 1247 if ((dcp->dc_freehash = kmem_zalloc(sizeof (dcfree_t *),
1248 1248 KM_NOSLEEP)) == NULL) {
1249 1249 goto error;
1250 1250 }
1251 1251
1252 1252 dcp->dc_anchor = dcap; /* set back pointer to anchor */
1253 1253 dcap->dca_dircache = dcp;
1254 1254
1255 1255 /* add into head of global chain */
1256 1256 dcp->dc_next = dc_head.dch_next;
1257 1257 dcp->dc_prev = (dircache_t *)&dc_head;
1258 1258 dcp->dc_next->dc_prev = dcp;
1259 1259 dc_head.dch_next = dcp;
1260 1260
1261 1261 mutex_exit(&dcap->dca_lock);
1262 1262 mutex_exit(&dc_head.dch_lock);
1263 1263 ncs.ncs_cur_dirs.value.ui64++;
1264 1264 ncs.ncs_dirs_cached.value.ui64++;
1265 1265 return (DOK);
1266 1266 error:
1267 1267 if (dcp != NULL) {
1268 1268 if (dcp->dc_namehash) {
1269 1269 kmem_free(dcp->dc_namehash, sizeof (dcentry_t *));
1270 1270 }
1271 1271 kmem_free(dcp, sizeof (dircache_t));
1272 1272 }
1273 1273 /*
1274 1274 * Must also kmem_free dcp->dc_freehash if more error cases are added
1275 1275 */
1276 1276 mutex_exit(&dcap->dca_lock);
1277 1277 mutex_exit(&dc_head.dch_lock);
1278 1278 ncs.ncs_dir_start_nm.value.ui64++;
1279 1279 return (DNOCACHE);
1280 1280 }
1281 1281
1282 1282 /*
1283 1283 * Add a directopry entry to a partial or complete directory cache.
1284 1284 */
1285 1285 dcret_t
1286 1286 dnlc_dir_add_entry(dcanchor_t *dcap, const char *name, uint64_t handle)
1287 1287 {
1288 1288 dircache_t *dcp;
1289 1289 dcentry_t **hp, *dep;
1290 1290 int hash;
1291 1291 uint_t capacity;
1292 1292 uchar_t namlen;
1293 1293
1294 1294 /*
1295 1295 * Allocate the dcentry struct, including the variable
1296 1296 * size name. Note, the null terminator is not copied.
1297 1297 *
1298 1298 * We do this outside the lock to avoid possible deadlock if
1299 1299 * dnlc_dir_reclaim() is called as a result of memory shortage.
1300 1300 */
1301 1301 DNLC_DIR_HASH(name, hash, namlen);
1302 1302 dep = kmem_alloc(sizeof (dcentry_t) - 1 + namlen, KM_NOSLEEP);
1303 1303 if (dep == NULL) {
1304 1304 #ifdef DEBUG
1305 1305 /*
1306 1306 * The kmem allocator generates random failures for
1307 1307 * KM_NOSLEEP calls (see KMEM_RANDOM_ALLOCATION_FAILURE)
1308 1308 * So try again before we blow away a perfectly good cache.
1309 1309 * This is done not to cover an error but purely for
1310 1310 * performance running a debug kernel.
1311 1311 * This random error only occurs in debug mode.
1312 1312 */
1313 1313 dep = kmem_alloc(sizeof (dcentry_t) - 1 + namlen, KM_NOSLEEP);
1314 1314 if (dep != NULL)
1315 1315 goto ok;
1316 1316 #endif
1317 1317 ncs.ncs_dir_add_nm.value.ui64++;
1318 1318 /*
1319 1319 * Free a directory cache. This may be the one we are
1320 1320 * called with.
1321 1321 */
1322 1322 dnlc_dir_reclaim(NULL);
1323 1323 dep = kmem_alloc(sizeof (dcentry_t) - 1 + namlen, KM_NOSLEEP);
1324 1324 if (dep == NULL) {
1325 1325 /*
1326 1326 * still no memory, better delete this cache
1327 1327 */
1328 1328 mutex_enter(&dcap->dca_lock);
1329 1329 dcp = (dircache_t *)dcap->dca_dircache;
1330 1330 if (VALID_DIR_CACHE(dcp)) {
1331 1331 dnlc_dir_abort(dcp);
1332 1332 dcap->dca_dircache = DC_RET_LOW_MEM;
1333 1333 }
1334 1334 mutex_exit(&dcap->dca_lock);
1335 1335 ncs.ncs_dir_addabort.value.ui64++;
1336 1336 return (DNOCACHE);
1337 1337 }
1338 1338 /*
1339 1339 * fall through as if the 1st kmem_alloc had worked
1340 1340 */
1341 1341 }
1342 1342 #ifdef DEBUG
1343 1343 ok:
1344 1344 #endif
1345 1345 mutex_enter(&dcap->dca_lock);
1346 1346 dcp = (dircache_t *)dcap->dca_dircache;
1347 1347 if (VALID_DIR_CACHE(dcp)) {
1348 1348 /*
1349 1349 * If the total number of entries goes above the max
1350 1350 * then free this cache
1351 1351 */
1352 1352 if ((dcp->dc_num_entries + dcp->dc_num_free) >
1353 1353 dnlc_dir_max_size) {
1354 1354 mutex_exit(&dcap->dca_lock);
1355 1355 dnlc_dir_purge(dcap);
1356 1356 kmem_free(dep, sizeof (dcentry_t) - 1 + namlen);
1357 1357 ncs.ncs_dir_add_max.value.ui64++;
1358 1358 return (DTOOBIG);
1359 1359 }
1360 1360 dcp->dc_num_entries++;
1361 1361 capacity = (dcp->dc_nhash_mask + 1) << dnlc_dir_hash_size_shift;
1362 1362 if (dcp->dc_num_entries >=
1363 1363 (capacity << dnlc_dir_hash_resize_shift)) {
1364 1364 dnlc_dir_adjust_nhash(dcp);
1365 1365 }
1366 1366 hp = &dcp->dc_namehash[hash & dcp->dc_nhash_mask];
1367 1367
1368 1368 /*
1369 1369 * Initialise and chain in new entry
1370 1370 */
1371 1371 dep->de_handle = handle;
1372 1372 dep->de_hash = hash;
1373 1373 /*
1374 1374 * Note de_namelen is a uchar_t to conserve space
1375 1375 * and alignment padding. The max length of any
1376 1376 * pathname component is defined as MAXNAMELEN
1377 1377 * which is 256 (including the terminating null).
1378 1378 * So provided this doesn't change, we don't include the null,
1379 1379 * we always use bcmp to compare strings, and we don't
1380 1380 * start storing full names, then we are ok.
1381 1381 * The space savings is worth it.
1382 1382 */
1383 1383 dep->de_namelen = namlen;
1384 1384 bcopy(name, dep->de_name, namlen);
1385 1385 dep->de_next = *hp;
1386 1386 *hp = dep;
1387 1387 dcp->dc_actime = ddi_get_lbolt64();
1388 1388 mutex_exit(&dcap->dca_lock);
1389 1389 ncs.ncs_dir_num_ents.value.ui64++;
1390 1390 return (DOK);
1391 1391 } else {
1392 1392 mutex_exit(&dcap->dca_lock);
1393 1393 kmem_free(dep, sizeof (dcentry_t) - 1 + namlen);
1394 1394 return (DNOCACHE);
1395 1395 }
1396 1396 }
1397 1397
1398 1398 /*
1399 1399 * Add free space to a partial or complete directory cache.
1400 1400 */
1401 1401 dcret_t
1402 1402 dnlc_dir_add_space(dcanchor_t *dcap, uint_t len, uint64_t handle)
1403 1403 {
1404 1404 dircache_t *dcp;
1405 1405 dcfree_t *dfp, **hp;
1406 1406 uint_t capacity;
1407 1407
1408 1408 /*
1409 1409 * We kmem_alloc outside the lock to avoid possible deadlock if
1410 1410 * dnlc_dir_reclaim() is called as a result of memory shortage.
1411 1411 */
1412 1412 dfp = kmem_cache_alloc(dnlc_dir_space_cache, KM_NOSLEEP);
1413 1413 if (dfp == NULL) {
1414 1414 #ifdef DEBUG
1415 1415 /*
1416 1416 * The kmem allocator generates random failures for
1417 1417 * KM_NOSLEEP calls (see KMEM_RANDOM_ALLOCATION_FAILURE)
1418 1418 * So try again before we blow away a perfectly good cache.
1419 1419 * This random error only occurs in debug mode
1420 1420 */
1421 1421 dfp = kmem_cache_alloc(dnlc_dir_space_cache, KM_NOSLEEP);
1422 1422 if (dfp != NULL)
1423 1423 goto ok;
1424 1424 #endif
1425 1425 ncs.ncs_dir_add_nm.value.ui64++;
1426 1426 /*
1427 1427 * Free a directory cache. This may be the one we are
1428 1428 * called with.
1429 1429 */
1430 1430 dnlc_dir_reclaim(NULL);
1431 1431 dfp = kmem_cache_alloc(dnlc_dir_space_cache, KM_NOSLEEP);
1432 1432 if (dfp == NULL) {
1433 1433 /*
1434 1434 * still no memory, better delete this cache
1435 1435 */
1436 1436 mutex_enter(&dcap->dca_lock);
1437 1437 dcp = (dircache_t *)dcap->dca_dircache;
1438 1438 if (VALID_DIR_CACHE(dcp)) {
1439 1439 dnlc_dir_abort(dcp);
1440 1440 dcap->dca_dircache = DC_RET_LOW_MEM;
1441 1441 }
1442 1442 mutex_exit(&dcap->dca_lock);
1443 1443 ncs.ncs_dir_addabort.value.ui64++;
1444 1444 return (DNOCACHE);
1445 1445 }
1446 1446 /*
1447 1447 * fall through as if the 1st kmem_alloc had worked
1448 1448 */
1449 1449 }
1450 1450
1451 1451 #ifdef DEBUG
1452 1452 ok:
1453 1453 #endif
1454 1454 mutex_enter(&dcap->dca_lock);
1455 1455 dcp = (dircache_t *)dcap->dca_dircache;
1456 1456 if (VALID_DIR_CACHE(dcp)) {
1457 1457 if ((dcp->dc_num_entries + dcp->dc_num_free) >
1458 1458 dnlc_dir_max_size) {
1459 1459 mutex_exit(&dcap->dca_lock);
1460 1460 dnlc_dir_purge(dcap);
1461 1461 kmem_cache_free(dnlc_dir_space_cache, dfp);
1462 1462 ncs.ncs_dir_add_max.value.ui64++;
1463 1463 return (DTOOBIG);
1464 1464 }
1465 1465 dcp->dc_num_free++;
1466 1466 capacity = (dcp->dc_fhash_mask + 1) << dnlc_dir_hash_size_shift;
1467 1467 if (dcp->dc_num_free >=
1468 1468 (capacity << dnlc_dir_hash_resize_shift)) {
1469 1469 dnlc_dir_adjust_fhash(dcp);
1470 1470 }
1471 1471 /*
1472 1472 * Initialise and chain a new entry
1473 1473 */
1474 1474 dfp->df_handle = handle;
1475 1475 dfp->df_len = len;
1476 1476 dcp->dc_actime = ddi_get_lbolt64();
1477 1477 hp = &(dcp->dc_freehash[DDFHASH(handle, dcp)]);
1478 1478 dfp->df_next = *hp;
1479 1479 *hp = dfp;
1480 1480 mutex_exit(&dcap->dca_lock);
1481 1481 ncs.ncs_dir_num_ents.value.ui64++;
1482 1482 return (DOK);
1483 1483 } else {
1484 1484 mutex_exit(&dcap->dca_lock);
1485 1485 kmem_cache_free(dnlc_dir_space_cache, dfp);
1486 1486 return (DNOCACHE);
1487 1487 }
1488 1488 }
1489 1489
1490 1490 /*
1491 1491 * Mark a directory cache as complete.
1492 1492 */
1493 1493 void
1494 1494 dnlc_dir_complete(dcanchor_t *dcap)
1495 1495 {
1496 1496 dircache_t *dcp;
1497 1497
1498 1498 mutex_enter(&dcap->dca_lock);
1499 1499 dcp = (dircache_t *)dcap->dca_dircache;
1500 1500 if (VALID_DIR_CACHE(dcp)) {
1501 1501 dcp->dc_complete = B_TRUE;
1502 1502 }
1503 1503 mutex_exit(&dcap->dca_lock);
1504 1504 }
1505 1505
1506 1506 /*
1507 1507 * Internal routine to delete a partial or full directory cache.
1508 1508 * No additional locking needed.
1509 1509 */
1510 1510 static void
1511 1511 dnlc_dir_abort(dircache_t *dcp)
1512 1512 {
1513 1513 dcentry_t *dep, *nhp;
1514 1514 dcfree_t *fep, *fhp;
1515 1515 uint_t nhtsize = dcp->dc_nhash_mask + 1; /* name hash table size */
1516 1516 uint_t fhtsize = dcp->dc_fhash_mask + 1; /* free hash table size */
1517 1517 uint_t i;
1518 1518
1519 1519 /*
1520 1520 * Free up the cached name entries and hash table
1521 1521 */
1522 1522 for (i = 0; i < nhtsize; i++) { /* for each hash bucket */
1523 1523 nhp = dcp->dc_namehash[i];
1524 1524 while (nhp != NULL) { /* for each chained entry */
1525 1525 dep = nhp->de_next;
1526 1526 kmem_free(nhp, sizeof (dcentry_t) - 1 +
1527 1527 nhp->de_namelen);
1528 1528 nhp = dep;
1529 1529 }
1530 1530 }
1531 1531 kmem_free(dcp->dc_namehash, sizeof (dcentry_t *) * nhtsize);
1532 1532
1533 1533 /*
1534 1534 * Free up the free space entries and hash table
1535 1535 */
1536 1536 for (i = 0; i < fhtsize; i++) { /* for each hash bucket */
1537 1537 fhp = dcp->dc_freehash[i];
1538 1538 while (fhp != NULL) { /* for each chained entry */
1539 1539 fep = fhp->df_next;
1540 1540 kmem_cache_free(dnlc_dir_space_cache, fhp);
1541 1541 fhp = fep;
1542 1542 }
1543 1543 }
1544 1544 kmem_free(dcp->dc_freehash, sizeof (dcfree_t *) * fhtsize);
1545 1545
1546 1546 /*
1547 1547 * Finally free the directory cache structure itself
1548 1548 */
1549 1549 ncs.ncs_dir_num_ents.value.ui64 -= (dcp->dc_num_entries +
1550 1550 dcp->dc_num_free);
1551 1551 kmem_free(dcp, sizeof (dircache_t));
1552 1552 ncs.ncs_cur_dirs.value.ui64--;
1553 1553 }
1554 1554
1555 1555 /*
1556 1556 * Remove a partial or complete directory cache
1557 1557 */
1558 1558 void
1559 1559 dnlc_dir_purge(dcanchor_t *dcap)
1560 1560 {
1561 1561 dircache_t *dcp;
1562 1562
1563 1563 mutex_enter(&dc_head.dch_lock);
1564 1564 mutex_enter(&dcap->dca_lock);
1565 1565 dcp = (dircache_t *)dcap->dca_dircache;
1566 1566 if (!VALID_DIR_CACHE(dcp)) {
1567 1567 mutex_exit(&dcap->dca_lock);
1568 1568 mutex_exit(&dc_head.dch_lock);
1569 1569 return;
1570 1570 }
1571 1571 dcap->dca_dircache = NULL;
1572 1572 /*
1573 1573 * Unchain from global list
1574 1574 */
1575 1575 dcp->dc_prev->dc_next = dcp->dc_next;
1576 1576 dcp->dc_next->dc_prev = dcp->dc_prev;
1577 1577 mutex_exit(&dcap->dca_lock);
1578 1578 mutex_exit(&dc_head.dch_lock);
1579 1579 dnlc_dir_abort(dcp);
1580 1580 }
1581 1581
1582 1582 /*
1583 1583 * Remove an entry from a complete or partial directory cache.
1584 1584 * Return the handle if it's non null.
1585 1585 */
1586 1586 dcret_t
1587 1587 dnlc_dir_rem_entry(dcanchor_t *dcap, const char *name, uint64_t *handlep)
1588 1588 {
1589 1589 dircache_t *dcp;
1590 1590 dcentry_t **prevpp, *te;
1591 1591 uint_t capacity;
1592 1592 int hash;
1593 1593 int ret;
1594 1594 uchar_t namlen;
1595 1595
1596 1596 if (!dnlc_dir_enable) {
1597 1597 return (DNOCACHE);
1598 1598 }
1599 1599
1600 1600 mutex_enter(&dcap->dca_lock);
1601 1601 dcp = (dircache_t *)dcap->dca_dircache;
1602 1602 if (VALID_DIR_CACHE(dcp)) {
1603 1603 dcp->dc_actime = ddi_get_lbolt64();
1604 1604 if (dcp->dc_nhash_mask > 0) { /* ie not minimum */
1605 1605 capacity = (dcp->dc_nhash_mask + 1) <<
1606 1606 dnlc_dir_hash_size_shift;
1607 1607 if (dcp->dc_num_entries <=
1608 1608 (capacity >> dnlc_dir_hash_resize_shift)) {
1609 1609 dnlc_dir_adjust_nhash(dcp);
1610 1610 }
1611 1611 }
1612 1612 DNLC_DIR_HASH(name, hash, namlen);
1613 1613 prevpp = &dcp->dc_namehash[hash & dcp->dc_nhash_mask];
1614 1614 while (*prevpp != NULL) {
1615 1615 if (((*prevpp)->de_hash == hash) &&
1616 1616 (namlen == (*prevpp)->de_namelen) &&
1617 1617 bcmp((*prevpp)->de_name, name, namlen) == 0) {
1618 1618 if (handlep != NULL) {
1619 1619 *handlep = (*prevpp)->de_handle;
1620 1620 }
1621 1621 te = *prevpp;
1622 1622 *prevpp = (*prevpp)->de_next;
1623 1623 kmem_free(te, sizeof (dcentry_t) - 1 +
1624 1624 te->de_namelen);
1625 1625
1626 1626 /*
1627 1627 * If the total number of entries
1628 1628 * falls below half the minimum number
1629 1629 * of entries then free this cache.
1630 1630 */
1631 1631 if (--dcp->dc_num_entries <
1632 1632 (dnlc_dir_min_size >> 1)) {
1633 1633 mutex_exit(&dcap->dca_lock);
1634 1634 dnlc_dir_purge(dcap);
1635 1635 } else {
1636 1636 mutex_exit(&dcap->dca_lock);
1637 1637 }
1638 1638 ncs.ncs_dir_num_ents.value.ui64--;
1639 1639 return (DFOUND);
1640 1640 }
1641 1641 prevpp = &((*prevpp)->de_next);
1642 1642 }
1643 1643 if (dcp->dc_complete) {
1644 1644 ncs.ncs_dir_reme_fai.value.ui64++;
1645 1645 ret = DNOENT;
1646 1646 } else {
1647 1647 ret = DNOCACHE;
1648 1648 }
1649 1649 mutex_exit(&dcap->dca_lock);
1650 1650 return (ret);
1651 1651 } else {
1652 1652 mutex_exit(&dcap->dca_lock);
1653 1653 return (DNOCACHE);
1654 1654 }
1655 1655 }
1656 1656
1657 1657
1658 1658 /*
1659 1659 * Remove free space of at least the given length from a complete
1660 1660 * or partial directory cache.
1661 1661 */
1662 1662 dcret_t
1663 1663 dnlc_dir_rem_space_by_len(dcanchor_t *dcap, uint_t len, uint64_t *handlep)
1664 1664 {
1665 1665 dircache_t *dcp;
1666 1666 dcfree_t **prevpp, *tfp;
1667 1667 uint_t fhtsize; /* free hash table size */
1668 1668 uint_t i;
1669 1669 uint_t capacity;
1670 1670 int ret;
1671 1671
1672 1672 if (!dnlc_dir_enable) {
1673 1673 return (DNOCACHE);
1674 1674 }
1675 1675
1676 1676 mutex_enter(&dcap->dca_lock);
1677 1677 dcp = (dircache_t *)dcap->dca_dircache;
1678 1678 if (VALID_DIR_CACHE(dcp)) {
1679 1679 dcp->dc_actime = ddi_get_lbolt64();
1680 1680 if (dcp->dc_fhash_mask > 0) { /* ie not minimum */
1681 1681 capacity = (dcp->dc_fhash_mask + 1) <<
1682 1682 dnlc_dir_hash_size_shift;
1683 1683 if (dcp->dc_num_free <=
1684 1684 (capacity >> dnlc_dir_hash_resize_shift)) {
1685 1685 dnlc_dir_adjust_fhash(dcp);
1686 1686 }
1687 1687 }
1688 1688 /*
1689 1689 * Search for an entry of the appropriate size
1690 1690 * on a first fit basis.
1691 1691 */
1692 1692 fhtsize = dcp->dc_fhash_mask + 1;
1693 1693 for (i = 0; i < fhtsize; i++) { /* for each hash bucket */
1694 1694 prevpp = &(dcp->dc_freehash[i]);
1695 1695 while (*prevpp != NULL) {
1696 1696 if ((*prevpp)->df_len >= len) {
1697 1697 *handlep = (*prevpp)->df_handle;
1698 1698 tfp = *prevpp;
1699 1699 *prevpp = (*prevpp)->df_next;
1700 1700 dcp->dc_num_free--;
1701 1701 mutex_exit(&dcap->dca_lock);
1702 1702 kmem_cache_free(dnlc_dir_space_cache,
1703 1703 tfp);
1704 1704 ncs.ncs_dir_num_ents.value.ui64--;
1705 1705 return (DFOUND);
1706 1706 }
1707 1707 prevpp = &((*prevpp)->df_next);
1708 1708 }
1709 1709 }
1710 1710 if (dcp->dc_complete) {
1711 1711 ret = DNOENT;
1712 1712 } else {
1713 1713 ret = DNOCACHE;
1714 1714 }
1715 1715 mutex_exit(&dcap->dca_lock);
1716 1716 return (ret);
1717 1717 } else {
1718 1718 mutex_exit(&dcap->dca_lock);
1719 1719 return (DNOCACHE);
1720 1720 }
1721 1721 }
1722 1722
1723 1723 /*
1724 1724 * Remove free space with the given handle from a complete or partial
1725 1725 * directory cache.
1726 1726 */
1727 1727 dcret_t
1728 1728 dnlc_dir_rem_space_by_handle(dcanchor_t *dcap, uint64_t handle)
1729 1729 {
1730 1730 dircache_t *dcp;
1731 1731 dcfree_t **prevpp, *tfp;
1732 1732 uint_t capacity;
1733 1733 int ret;
1734 1734
1735 1735 if (!dnlc_dir_enable) {
1736 1736 return (DNOCACHE);
1737 1737 }
1738 1738
1739 1739 mutex_enter(&dcap->dca_lock);
1740 1740 dcp = (dircache_t *)dcap->dca_dircache;
1741 1741 if (VALID_DIR_CACHE(dcp)) {
1742 1742 dcp->dc_actime = ddi_get_lbolt64();
1743 1743 if (dcp->dc_fhash_mask > 0) { /* ie not minimum */
1744 1744 capacity = (dcp->dc_fhash_mask + 1) <<
1745 1745 dnlc_dir_hash_size_shift;
1746 1746 if (dcp->dc_num_free <=
1747 1747 (capacity >> dnlc_dir_hash_resize_shift)) {
1748 1748 dnlc_dir_adjust_fhash(dcp);
1749 1749 }
1750 1750 }
1751 1751
1752 1752 /*
1753 1753 * search for the exact entry
1754 1754 */
1755 1755 prevpp = &(dcp->dc_freehash[DDFHASH(handle, dcp)]);
1756 1756 while (*prevpp != NULL) {
1757 1757 if ((*prevpp)->df_handle == handle) {
1758 1758 tfp = *prevpp;
1759 1759 *prevpp = (*prevpp)->df_next;
1760 1760 dcp->dc_num_free--;
1761 1761 mutex_exit(&dcap->dca_lock);
1762 1762 kmem_cache_free(dnlc_dir_space_cache, tfp);
1763 1763 ncs.ncs_dir_num_ents.value.ui64--;
1764 1764 return (DFOUND);
1765 1765 }
1766 1766 prevpp = &((*prevpp)->df_next);
1767 1767 }
1768 1768 if (dcp->dc_complete) {
1769 1769 ncs.ncs_dir_rems_fai.value.ui64++;
1770 1770 ret = DNOENT;
1771 1771 } else {
1772 1772 ret = DNOCACHE;
1773 1773 }
1774 1774 mutex_exit(&dcap->dca_lock);
1775 1775 return (ret);
1776 1776 } else {
1777 1777 mutex_exit(&dcap->dca_lock);
1778 1778 return (DNOCACHE);
1779 1779 }
1780 1780 }
1781 1781
1782 1782 /*
1783 1783 * Update the handle of an directory cache entry.
1784 1784 */
1785 1785 dcret_t
1786 1786 dnlc_dir_update(dcanchor_t *dcap, const char *name, uint64_t handle)
1787 1787 {
1788 1788 dircache_t *dcp;
1789 1789 dcentry_t *dep;
1790 1790 int hash;
1791 1791 int ret;
1792 1792 uchar_t namlen;
1793 1793
1794 1794 if (!dnlc_dir_enable) {
1795 1795 return (DNOCACHE);
1796 1796 }
1797 1797
1798 1798 mutex_enter(&dcap->dca_lock);
1799 1799 dcp = (dircache_t *)dcap->dca_dircache;
1800 1800 if (VALID_DIR_CACHE(dcp)) {
1801 1801 dcp->dc_actime = ddi_get_lbolt64();
1802 1802 DNLC_DIR_HASH(name, hash, namlen);
1803 1803 dep = dcp->dc_namehash[hash & dcp->dc_nhash_mask];
1804 1804 while (dep != NULL) {
1805 1805 if ((dep->de_hash == hash) &&
1806 1806 (namlen == dep->de_namelen) &&
1807 1807 bcmp(dep->de_name, name, namlen) == 0) {
1808 1808 dep->de_handle = handle;
1809 1809 mutex_exit(&dcap->dca_lock);
1810 1810 return (DFOUND);
1811 1811 }
1812 1812 dep = dep->de_next;
1813 1813 }
1814 1814 if (dcp->dc_complete) {
1815 1815 ncs.ncs_dir_upd_fail.value.ui64++;
1816 1816 ret = DNOENT;
1817 1817 } else {
1818 1818 ret = DNOCACHE;
1819 1819 }
1820 1820 mutex_exit(&dcap->dca_lock);
1821 1821 return (ret);
1822 1822 } else {
1823 1823 mutex_exit(&dcap->dca_lock);
1824 1824 return (DNOCACHE);
1825 1825 }
1826 1826 }
1827 1827
1828 1828 void
1829 1829 dnlc_dir_fini(dcanchor_t *dcap)
1830 1830 {
1831 1831 dircache_t *dcp;
1832 1832
1833 1833 mutex_enter(&dc_head.dch_lock);
1834 1834 mutex_enter(&dcap->dca_lock);
1835 1835 dcp = (dircache_t *)dcap->dca_dircache;
1836 1836 if (VALID_DIR_CACHE(dcp)) {
1837 1837 /*
1838 1838 * Unchain from global list
1839 1839 */
1840 1840 ncs.ncs_dir_finipurg.value.ui64++;
1841 1841 dcp->dc_prev->dc_next = dcp->dc_next;
1842 1842 dcp->dc_next->dc_prev = dcp->dc_prev;
1843 1843 } else {
1844 1844 dcp = NULL;
1845 1845 }
1846 1846 dcap->dca_dircache = NULL;
1847 1847 mutex_exit(&dcap->dca_lock);
1848 1848 mutex_exit(&dc_head.dch_lock);
1849 1849 mutex_destroy(&dcap->dca_lock);
1850 1850 if (dcp) {
1851 1851 dnlc_dir_abort(dcp);
1852 1852 }
1853 1853 }
1854 1854
1855 1855 /*
1856 1856 * Reclaim callback for dnlc directory caching.
1857 1857 * Invoked by the kernel memory allocator when memory gets tight.
1858 1858 * This is a pretty serious condition and can lead easily lead to system
1859 1859 * hangs if not enough space is returned.
1860 1860 *
1861 1861 * Deciding which directory (or directories) to purge is tricky.
1862 1862 * Purging everything is an overkill, but purging just the oldest used
1863 1863 * was found to lead to hangs. The largest cached directories use the
1864 1864 * most memory, but take the most effort to rebuild, whereas the smaller
1865 1865 * ones have little value and give back little space. So what to do?
1866 1866 *
1867 1867 * The current policy is to continue purging the oldest used directories
1868 1868 * until at least dnlc_dir_min_reclaim directory entries have been purged.
1869 1869 */
1870 1870 /*ARGSUSED*/
1871 1871 static void
1872 1872 dnlc_dir_reclaim(void *unused)
1873 1873 {
1874 1874 dircache_t *dcp, *oldest;
1875 1875 uint_t dirent_cnt = 0;
1876 1876
1877 1877 mutex_enter(&dc_head.dch_lock);
1878 1878 while (dirent_cnt < dnlc_dir_min_reclaim) {
1879 1879 dcp = dc_head.dch_next;
1880 1880 oldest = NULL;
1881 1881 while (dcp != (dircache_t *)&dc_head) {
1882 1882 if (oldest == NULL) {
1883 1883 oldest = dcp;
1884 1884 } else {
1885 1885 if (dcp->dc_actime < oldest->dc_actime) {
1886 1886 oldest = dcp;
1887 1887 }
1888 1888 }
1889 1889 dcp = dcp->dc_next;
1890 1890 }
1891 1891 if (oldest == NULL) {
1892 1892 /* nothing to delete */
1893 1893 mutex_exit(&dc_head.dch_lock);
1894 1894 return;
1895 1895 }
1896 1896 /*
1897 1897 * remove from directory chain and purge
1898 1898 */
1899 1899 oldest->dc_prev->dc_next = oldest->dc_next;
1900 1900 oldest->dc_next->dc_prev = oldest->dc_prev;
1901 1901 mutex_enter(&oldest->dc_anchor->dca_lock);
1902 1902 /*
1903 1903 * If this was the last entry then it must be too large.
1904 1904 * Mark it as such by saving a special dircache_t
1905 1905 * pointer (DC_RET_LOW_MEM) in the anchor. The error DNOMEM
1906 1906 * will be presented to the caller of dnlc_dir_start()
1907 1907 */
1908 1908 if (oldest->dc_next == oldest->dc_prev) {
1909 1909 oldest->dc_anchor->dca_dircache = DC_RET_LOW_MEM;
1910 1910 ncs.ncs_dir_rec_last.value.ui64++;
1911 1911 } else {
1912 1912 oldest->dc_anchor->dca_dircache = NULL;
1913 1913 ncs.ncs_dir_recl_any.value.ui64++;
1914 1914 }
1915 1915 mutex_exit(&oldest->dc_anchor->dca_lock);
1916 1916 dirent_cnt += oldest->dc_num_entries;
1917 1917 dnlc_dir_abort(oldest);
1918 1918 }
1919 1919 mutex_exit(&dc_head.dch_lock);
1920 1920 }
1921 1921
1922 1922 /*
1923 1923 * Dynamically grow or shrink the size of the name hash table
1924 1924 */
1925 1925 static void
1926 1926 dnlc_dir_adjust_nhash(dircache_t *dcp)
1927 1927 {
1928 1928 dcentry_t **newhash, *dep, **nhp, *tep;
1929 1929 uint_t newsize;
1930 1930 uint_t oldsize;
1931 1931 uint_t newsizemask;
1932 1932 int i;
1933 1933
1934 1934 /*
1935 1935 * Allocate new hash table
1936 1936 */
1937 1937 newsize = dcp->dc_num_entries >> dnlc_dir_hash_size_shift;
1938 1938 newhash = kmem_zalloc(sizeof (dcentry_t *) * newsize, KM_NOSLEEP);
1939 1939 if (newhash == NULL) {
1940 1940 /*
1941 1941 * System is short on memory just return
1942 1942 * Note, the old hash table is still usable.
1943 1943 * This return is unlikely to repeatedy occur, because
1944 1944 * either some other directory caches will be reclaimed
1945 1945 * due to memory shortage, thus freeing memory, or this
1946 1946 * directory cahe will be reclaimed.
1947 1947 */
1948 1948 return;
1949 1949 }
1950 1950 oldsize = dcp->dc_nhash_mask + 1;
1951 1951 dcp->dc_nhash_mask = newsizemask = newsize - 1;
1952 1952
1953 1953 /*
1954 1954 * Move entries from the old table to the new
1955 1955 */
1956 1956 for (i = 0; i < oldsize; i++) { /* for each hash bucket */
1957 1957 dep = dcp->dc_namehash[i];
1958 1958 while (dep != NULL) { /* for each chained entry */
1959 1959 tep = dep;
1960 1960 dep = dep->de_next;
1961 1961 nhp = &newhash[tep->de_hash & newsizemask];
1962 1962 tep->de_next = *nhp;
1963 1963 *nhp = tep;
1964 1964 }
1965 1965 }
1966 1966
1967 1967 /*
1968 1968 * delete old hash table and set new one in place
1969 1969 */
1970 1970 kmem_free(dcp->dc_namehash, sizeof (dcentry_t *) * oldsize);
1971 1971 dcp->dc_namehash = newhash;
1972 1972 }
1973 1973
1974 1974 /*
1975 1975 * Dynamically grow or shrink the size of the free space hash table
1976 1976 */
1977 1977 static void
1978 1978 dnlc_dir_adjust_fhash(dircache_t *dcp)
1979 1979 {
1980 1980 dcfree_t **newhash, *dfp, **nhp, *tfp;
1981 1981 uint_t newsize;
1982 1982 uint_t oldsize;
1983 1983 int i;
1984 1984
1985 1985 /*
1986 1986 * Allocate new hash table
1987 1987 */
1988 1988 newsize = dcp->dc_num_free >> dnlc_dir_hash_size_shift;
1989 1989 newhash = kmem_zalloc(sizeof (dcfree_t *) * newsize, KM_NOSLEEP);
1990 1990 if (newhash == NULL) {
1991 1991 /*
1992 1992 * System is short on memory just return
1993 1993 * Note, the old hash table is still usable.
1994 1994 * This return is unlikely to repeatedy occur, because
1995 1995 * either some other directory caches will be reclaimed
1996 1996 * due to memory shortage, thus freeing memory, or this
1997 1997 * directory cahe will be reclaimed.
1998 1998 */
1999 1999 return;
2000 2000 }
2001 2001 oldsize = dcp->dc_fhash_mask + 1;
2002 2002 dcp->dc_fhash_mask = newsize - 1;
2003 2003
2004 2004 /*
2005 2005 * Move entries from the old table to the new
2006 2006 */
2007 2007 for (i = 0; i < oldsize; i++) { /* for each hash bucket */
2008 2008 dfp = dcp->dc_freehash[i];
2009 2009 while (dfp != NULL) { /* for each chained entry */
2010 2010 tfp = dfp;
2011 2011 dfp = dfp->df_next;
2012 2012 nhp = &newhash[DDFHASH(tfp->df_handle, dcp)];
2013 2013 tfp->df_next = *nhp;
2014 2014 *nhp = tfp;
2015 2015 }
2016 2016 }
2017 2017
2018 2018 /*
2019 2019 * delete old hash table and set new one in place
2020 2020 */
2021 2021 kmem_free(dcp->dc_freehash, sizeof (dcfree_t *) * oldsize);
2022 2022 dcp->dc_freehash = newhash;
2023 2023 }
↓ open down ↓ |
986 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX