Print this page
3882 remove xmod & friends
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/os/flock.c
+++ new/usr/src/uts/common/os/flock.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
↓ open down ↓ |
10 lines elided |
↑ open up ↑ |
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 -/* ONC_PLUS EXTRACT START */
22 21
23 22 /*
24 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
25 24 * Use is subject to license terms.
26 25 */
27 26
28 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
29 28 /* All Rights Reserved */
30 29
31 30 #pragma ident "%Z%%M% %I% %E% SMI"
32 31
33 32 #include <sys/flock_impl.h>
34 33 #include <sys/vfs.h>
35 34 #include <sys/t_lock.h> /* for <sys/callb.h> */
36 35 #include <sys/callb.h>
37 36 #include <sys/clconf.h>
38 37 #include <sys/cladm.h>
39 38 #include <sys/nbmlock.h>
40 39 #include <sys/cred.h>
41 40 #include <sys/policy.h>
42 41
43 42 /*
44 43 * The following four variables are for statistics purposes and they are
45 44 * not protected by locks. They may not be accurate but will at least be
46 45 * close to the actual value.
47 46 */
48 47
49 48 int flk_lock_allocs;
50 49 int flk_lock_frees;
51 50 int edge_allocs;
52 51 int edge_frees;
53 52 int flk_proc_vertex_allocs;
54 53 int flk_proc_edge_allocs;
55 54 int flk_proc_vertex_frees;
56 55 int flk_proc_edge_frees;
57 56
58 57 static kmutex_t flock_lock;
59 58
60 59 #ifdef DEBUG
61 60 int check_debug = 0;
62 61 #define CHECK_ACTIVE_LOCKS(gp) if (check_debug) \
63 62 check_active_locks(gp);
64 63 #define CHECK_SLEEPING_LOCKS(gp) if (check_debug) \
65 64 check_sleeping_locks(gp);
66 65 #define CHECK_OWNER_LOCKS(gp, pid, sysid, vp) \
67 66 if (check_debug) \
68 67 check_owner_locks(gp, pid, sysid, vp);
69 68 #define CHECK_LOCK_TRANSITION(old_state, new_state) \
70 69 { \
71 70 if (check_lock_transition(old_state, new_state)) { \
72 71 cmn_err(CE_PANIC, "Illegal lock transition \
73 72 from %d to %d", old_state, new_state); \
74 73 } \
75 74 }
76 75 #else
77 76
78 77 #define CHECK_ACTIVE_LOCKS(gp)
79 78 #define CHECK_SLEEPING_LOCKS(gp)
80 79 #define CHECK_OWNER_LOCKS(gp, pid, sysid, vp)
81 80 #define CHECK_LOCK_TRANSITION(old_state, new_state)
82 81
83 82 #endif /* DEBUG */
84 83
85 84 struct kmem_cache *flk_edge_cache;
86 85
87 86 graph_t *lock_graph[HASH_SIZE];
88 87 proc_graph_t pgraph;
89 88
90 89 /*
91 90 * Clustering.
92 91 *
93 92 * NLM REGISTRY TYPE IMPLEMENTATION
94 93 *
95 94 * Assumptions:
96 95 * 1. Nodes in a cluster are numbered starting at 1; always non-negative
97 96 * integers; maximum node id is returned by clconf_maximum_nodeid().
98 97 * 2. We use this node id to identify the node an NLM server runs on.
99 98 */
100 99
101 100 /*
102 101 * NLM registry object keeps track of NLM servers via their
103 102 * nlmids (which are the node ids of the node in the cluster they run on)
104 103 * that have requested locks at this LLM with which this registry is
105 104 * associated.
106 105 *
107 106 * Representation of abstraction:
108 107 * rep = record[ states: array[nlm_state],
109 108 * lock: mutex]
110 109 *
111 110 * Representation invariants:
112 111 * 1. index i of rep.states is between 0 and n - 1 where n is number
113 112 * of elements in the array, which happen to be the maximum number
114 113 * of nodes in the cluster configuration + 1.
115 114 * 2. map nlmid to index i of rep.states
116 115 * 0 -> 0
117 116 * 1 -> 1
118 117 * 2 -> 2
119 118 * n-1 -> clconf_maximum_nodeid()+1
120 119 * 3. This 1-1 mapping is quite convenient and it avoids errors resulting
121 120 * from forgetting to subtract 1 from the index.
122 121 * 4. The reason we keep the 0th index is the following. A legitimate
123 122 * cluster configuration includes making a UFS file system NFS
124 123 * exportable. The code is structured so that if you're in a cluster
125 124 * you do one thing; otherwise, you do something else. The problem
126 125 * is what to do if you think you're in a cluster with PXFS loaded,
127 126 * but you're using UFS not PXFS? The upper two bytes of the sysid
128 127 * encode the node id of the node where NLM server runs; these bytes
129 128 * are zero for UFS. Since the nodeid is used to index into the
130 129 * registry, we can record the NLM server state information at index
131 130 * 0 using the same mechanism used for PXFS file locks!
132 131 */
133 132 static flk_nlm_status_t *nlm_reg_status = NULL; /* state array 0..N-1 */
134 133 static kmutex_t nlm_reg_lock; /* lock to protect arrary */
135 134 static uint_t nlm_status_size; /* size of state array */
136 135
137 136 /*
138 137 * Although we need a global lock dependency graph (and associated data
139 138 * structures), we also need a per-zone notion of whether the lock manager is
140 139 * running, and so whether to allow lock manager requests or not.
141 140 *
142 141 * Thus, on a per-zone basis we maintain a ``global'' variable
143 142 * (flk_lockmgr_status), protected by flock_lock, and set when the lock
144 143 * manager is determined to be changing state (starting or stopping).
145 144 *
146 145 * Each graph/zone pair also has a copy of this variable, which is protected by
147 146 * the graph's mutex.
148 147 *
149 148 * The per-graph copies are used to synchronize lock requests with shutdown
150 149 * requests. The global copy is used to initialize the per-graph field when a
151 150 * new graph is created.
152 151 */
153 152 struct flock_globals {
154 153 flk_lockmgr_status_t flk_lockmgr_status;
155 154 flk_lockmgr_status_t lockmgr_status[HASH_SIZE];
156 155 };
157 156
158 157 zone_key_t flock_zone_key;
159 158
160 159 static void create_flock(lock_descriptor_t *, flock64_t *);
161 160 static lock_descriptor_t *flk_get_lock(void);
162 161 static void flk_free_lock(lock_descriptor_t *lock);
163 162 static void flk_get_first_blocking_lock(lock_descriptor_t *request);
164 163 static int flk_process_request(lock_descriptor_t *);
165 164 static int flk_add_edge(lock_descriptor_t *, lock_descriptor_t *, int, int);
166 165 static edge_t *flk_get_edge(void);
167 166 static int flk_wait_execute_request(lock_descriptor_t *);
168 167 static int flk_relation(lock_descriptor_t *, lock_descriptor_t *);
169 168 static void flk_insert_active_lock(lock_descriptor_t *);
170 169 static void flk_delete_active_lock(lock_descriptor_t *, int);
171 170 static void flk_insert_sleeping_lock(lock_descriptor_t *);
172 171 static void flk_graph_uncolor(graph_t *);
173 172 static void flk_wakeup(lock_descriptor_t *, int);
174 173 static void flk_free_edge(edge_t *);
175 174 static void flk_recompute_dependencies(lock_descriptor_t *,
176 175 lock_descriptor_t **, int, int);
177 176 static int flk_find_barriers(lock_descriptor_t *);
178 177 static void flk_update_barriers(lock_descriptor_t *);
179 178 static int flk_color_reachables(lock_descriptor_t *);
180 179 static int flk_canceled(lock_descriptor_t *);
181 180 static void flk_delete_locks_by_sysid(lock_descriptor_t *);
182 181 static void report_blocker(lock_descriptor_t *, lock_descriptor_t *);
183 182 static void wait_for_lock(lock_descriptor_t *);
184 183 static void unlock_lockmgr_granted(struct flock_globals *);
185 184 static void wakeup_sleeping_lockmgr_locks(struct flock_globals *);
186 185
187 186 /* Clustering hooks */
188 187 static void cl_flk_change_nlm_state_all_locks(int, flk_nlm_status_t);
189 188 static void cl_flk_wakeup_sleeping_nlm_locks(int);
190 189 static void cl_flk_unlock_nlm_granted(int);
191 190
192 191 #ifdef DEBUG
193 192 static int check_lock_transition(int, int);
194 193 static void check_sleeping_locks(graph_t *);
195 194 static void check_active_locks(graph_t *);
196 195 static int no_path(lock_descriptor_t *, lock_descriptor_t *);
197 196 static void path(lock_descriptor_t *, lock_descriptor_t *);
198 197 static void check_owner_locks(graph_t *, pid_t, int, vnode_t *);
199 198 static int level_one_path(lock_descriptor_t *, lock_descriptor_t *);
200 199 static int level_two_path(lock_descriptor_t *, lock_descriptor_t *, int);
201 200 #endif
202 201
203 202 /* proc_graph function definitions */
204 203 static int flk_check_deadlock(lock_descriptor_t *);
205 204 static void flk_proc_graph_uncolor(void);
206 205 static proc_vertex_t *flk_get_proc_vertex(lock_descriptor_t *);
207 206 static proc_edge_t *flk_get_proc_edge(void);
208 207 static void flk_proc_release(proc_vertex_t *);
209 208 static void flk_free_proc_edge(proc_edge_t *);
210 209 static void flk_update_proc_graph(edge_t *, int);
211 210
212 211 /* Non-blocking mandatory locking */
213 212 static int lock_blocks_io(nbl_op_t, u_offset_t, ssize_t, int, u_offset_t,
214 213 u_offset_t);
215 214
216 215 static struct flock_globals *
217 216 flk_get_globals(void)
218 217 {
219 218 /*
220 219 * The KLM module had better be loaded if we're attempting to handle
221 220 * lockmgr requests.
222 221 */
223 222 ASSERT(flock_zone_key != ZONE_KEY_UNINITIALIZED);
224 223 return (zone_getspecific(flock_zone_key, curproc->p_zone));
225 224 }
226 225
227 226 static flk_lockmgr_status_t
228 227 flk_get_lockmgr_status(void)
229 228 {
230 229 struct flock_globals *fg;
231 230
232 231 ASSERT(MUTEX_HELD(&flock_lock));
233 232
234 233 if (flock_zone_key == ZONE_KEY_UNINITIALIZED) {
235 234 /*
236 235 * KLM module not loaded; lock manager definitely not running.
237 236 */
238 237 return (FLK_LOCKMGR_DOWN);
239 238 }
240 239 fg = flk_get_globals();
241 240 return (fg->flk_lockmgr_status);
242 241 }
243 242
244 243 /*
245 244 * Routine called from fs_frlock in fs/fs_subr.c
246 245 */
247 246
248 247 int
249 248 reclock(vnode_t *vp,
250 249 flock64_t *lckdat,
251 250 int cmd,
252 251 int flag,
253 252 u_offset_t offset,
254 253 flk_callback_t *flk_cbp)
255 254 {
256 255 lock_descriptor_t stack_lock_request;
257 256 lock_descriptor_t *lock_request;
258 257 int error = 0;
259 258 graph_t *gp;
260 259 int nlmid;
261 260
262 261 /*
263 262 * Check access permissions
264 263 */
265 264 if ((cmd & SETFLCK) &&
266 265 ((lckdat->l_type == F_RDLCK && (flag & FREAD) == 0) ||
267 266 (lckdat->l_type == F_WRLCK && (flag & FWRITE) == 0)))
268 267 return (EBADF);
269 268
270 269 /*
271 270 * for query and unlock we use the stack_lock_request
272 271 */
273 272
274 273 if ((lckdat->l_type == F_UNLCK) ||
275 274 !((cmd & INOFLCK) || (cmd & SETFLCK))) {
276 275 lock_request = &stack_lock_request;
277 276 (void) bzero((caddr_t)lock_request,
278 277 sizeof (lock_descriptor_t));
279 278
280 279 /*
281 280 * following is added to make the assertions in
282 281 * flk_execute_request() to pass through
283 282 */
284 283
285 284 lock_request->l_edge.edge_in_next = &lock_request->l_edge;
286 285 lock_request->l_edge.edge_in_prev = &lock_request->l_edge;
287 286 lock_request->l_edge.edge_adj_next = &lock_request->l_edge;
288 287 lock_request->l_edge.edge_adj_prev = &lock_request->l_edge;
289 288 lock_request->l_status = FLK_INITIAL_STATE;
290 289 } else {
291 290 lock_request = flk_get_lock();
292 291 }
293 292 lock_request->l_state = 0;
294 293 lock_request->l_vnode = vp;
295 294 lock_request->l_zoneid = getzoneid();
296 295
297 296 /*
298 297 * Convert the request range into the canonical start and end
299 298 * values. The NLM protocol supports locking over the entire
300 299 * 32-bit range, so there's no range checking for remote requests,
301 300 * but we still need to verify that local requests obey the rules.
302 301 */
303 302 /* Clustering */
304 303 if ((cmd & (RCMDLCK | PCMDLCK)) != 0) {
305 304 ASSERT(lckdat->l_whence == 0);
306 305 lock_request->l_start = lckdat->l_start;
307 306 lock_request->l_end = (lckdat->l_len == 0) ? MAX_U_OFFSET_T :
308 307 lckdat->l_start + (lckdat->l_len - 1);
309 308 } else {
310 309 /* check the validity of the lock range */
311 310 error = flk_convert_lock_data(vp, lckdat,
312 311 &lock_request->l_start, &lock_request->l_end,
313 312 offset);
314 313 if (error) {
315 314 goto done;
316 315 }
317 316 error = flk_check_lock_data(lock_request->l_start,
318 317 lock_request->l_end, MAXEND);
319 318 if (error) {
320 319 goto done;
321 320 }
322 321 }
323 322
324 323 ASSERT(lock_request->l_end >= lock_request->l_start);
325 324
326 325 lock_request->l_type = lckdat->l_type;
327 326 if (cmd & INOFLCK)
328 327 lock_request->l_state |= IO_LOCK;
329 328 if (cmd & SLPFLCK)
330 329 lock_request->l_state |= WILLING_TO_SLEEP_LOCK;
331 330 if (cmd & RCMDLCK)
332 331 lock_request->l_state |= LOCKMGR_LOCK;
333 332 if (cmd & NBMLCK)
334 333 lock_request->l_state |= NBMAND_LOCK;
335 334 /*
336 335 * Clustering: set flag for PXFS locks
337 336 * We do not _only_ check for the PCMDLCK flag because PXFS locks could
338 337 * also be of type 'RCMDLCK'.
339 338 * We do not _only_ check the GETPXFSID() macro because local PXFS
340 339 * clients use a pxfsid of zero to permit deadlock detection in the LLM.
341 340 */
342 341
343 342 if ((cmd & PCMDLCK) || (GETPXFSID(lckdat->l_sysid) != 0)) {
344 343 lock_request->l_state |= PXFS_LOCK;
345 344 }
346 345 if (!((cmd & SETFLCK) || (cmd & INOFLCK))) {
347 346 if (lock_request->l_type == F_RDLCK ||
348 347 lock_request->l_type == F_WRLCK)
349 348 lock_request->l_state |= QUERY_LOCK;
350 349 }
351 350 lock_request->l_flock = (*lckdat);
352 351 lock_request->l_callbacks = flk_cbp;
353 352
354 353 /*
355 354 * We are ready for processing the request
356 355 */
357 356 if (IS_LOCKMGR(lock_request)) {
358 357 /*
359 358 * If the lock request is an NLM server request ....
360 359 */
361 360 if (nlm_status_size == 0) { /* not booted as cluster */
362 361 mutex_enter(&flock_lock);
363 362 /*
364 363 * Bail out if this is a lock manager request and the
365 364 * lock manager is not supposed to be running.
366 365 */
367 366 if (flk_get_lockmgr_status() != FLK_LOCKMGR_UP) {
368 367 mutex_exit(&flock_lock);
369 368 error = ENOLCK;
370 369 goto done;
371 370 }
372 371 mutex_exit(&flock_lock);
373 372 } else { /* booted as a cluster */
374 373 nlmid = GETNLMID(lock_request->l_flock.l_sysid);
375 374 ASSERT(nlmid <= nlm_status_size && nlmid >= 0);
376 375
377 376 mutex_enter(&nlm_reg_lock);
378 377 /*
379 378 * If the NLM registry does not know about this
380 379 * NLM server making the request, add its nlmid
381 380 * to the registry.
382 381 */
383 382 if (FLK_REGISTRY_IS_NLM_UNKNOWN(nlm_reg_status,
384 383 nlmid)) {
385 384 FLK_REGISTRY_ADD_NLMID(nlm_reg_status, nlmid);
386 385 } else if (!FLK_REGISTRY_IS_NLM_UP(nlm_reg_status,
387 386 nlmid)) {
388 387 /*
389 388 * If the NLM server is already known (has made
390 389 * previous lock requests) and its state is
391 390 * not NLM_UP (means that NLM server is
392 391 * shutting down), then bail out with an
393 392 * error to deny the lock request.
394 393 */
395 394 mutex_exit(&nlm_reg_lock);
396 395 error = ENOLCK;
397 396 goto done;
398 397 }
399 398 mutex_exit(&nlm_reg_lock);
400 399 }
401 400 }
402 401
403 402 /* Now get the lock graph for a particular vnode */
404 403 gp = flk_get_lock_graph(vp, FLK_INIT_GRAPH);
405 404
406 405 /*
407 406 * We drop rwlock here otherwise this might end up causing a
408 407 * deadlock if this IOLOCK sleeps. (bugid # 1183392).
409 408 */
410 409
411 410 if (IS_IO_LOCK(lock_request)) {
412 411 VOP_RWUNLOCK(vp,
413 412 (lock_request->l_type == F_RDLCK) ?
414 413 V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL);
415 414 }
416 415 mutex_enter(&gp->gp_mutex);
417 416
418 417 lock_request->l_state |= REFERENCED_LOCK;
419 418 lock_request->l_graph = gp;
420 419
421 420 switch (lock_request->l_type) {
422 421 case F_RDLCK:
423 422 case F_WRLCK:
424 423 if (IS_QUERY_LOCK(lock_request)) {
425 424 flk_get_first_blocking_lock(lock_request);
426 425 (*lckdat) = lock_request->l_flock;
427 426 break;
428 427 }
429 428
430 429 /* process the request now */
431 430
432 431 error = flk_process_request(lock_request);
433 432 break;
434 433
435 434 case F_UNLCK:
436 435 /* unlock request will not block so execute it immediately */
437 436
438 437 if (IS_LOCKMGR(lock_request) &&
439 438 flk_canceled(lock_request)) {
440 439 error = 0;
441 440 } else {
442 441 error = flk_execute_request(lock_request);
443 442 }
444 443 break;
445 444
446 445 case F_UNLKSYS:
447 446 /*
448 447 * Recovery mechanism to release lock manager locks when
449 448 * NFS client crashes and restart. NFS server will clear
450 449 * old locks and grant new locks.
451 450 */
452 451
453 452 if (lock_request->l_flock.l_sysid == 0) {
454 453 mutex_exit(&gp->gp_mutex);
455 454 return (EINVAL);
456 455 }
457 456 if (secpolicy_nfs(CRED()) != 0) {
458 457 mutex_exit(&gp->gp_mutex);
459 458 return (EPERM);
460 459 }
461 460 flk_delete_locks_by_sysid(lock_request);
462 461 lock_request->l_state &= ~REFERENCED_LOCK;
463 462 flk_set_state(lock_request, FLK_DEAD_STATE);
464 463 flk_free_lock(lock_request);
465 464 mutex_exit(&gp->gp_mutex);
466 465 return (0);
467 466
468 467 default:
469 468 error = EINVAL;
470 469 break;
471 470 }
472 471
473 472 /* Clustering: For blocked PXFS locks, return */
474 473 if (error == PXFS_LOCK_BLOCKED) {
475 474 lock_request->l_state &= ~REFERENCED_LOCK;
476 475 mutex_exit(&gp->gp_mutex);
477 476 return (error);
478 477 }
479 478
480 479 /*
481 480 * Now that we have seen the status of locks in the system for
482 481 * this vnode we acquire the rwlock if it is an IO_LOCK.
483 482 */
484 483
485 484 if (IS_IO_LOCK(lock_request)) {
486 485 (void) VOP_RWLOCK(vp,
487 486 (lock_request->l_type == F_RDLCK) ?
488 487 V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL);
489 488 if (!error) {
490 489 lckdat->l_type = F_UNLCK;
491 490
492 491 /*
493 492 * This wake up is needed otherwise
494 493 * if IO_LOCK has slept the dependents on this
495 494 * will not be woken up at all. (bugid # 1185482).
496 495 */
497 496
498 497 flk_wakeup(lock_request, 1);
499 498 flk_set_state(lock_request, FLK_DEAD_STATE);
500 499 flk_free_lock(lock_request);
501 500 }
502 501 /*
503 502 * else if error had occurred either flk_process_request()
504 503 * has returned EDEADLK in which case there will be no
505 504 * dependents for this lock or EINTR from flk_wait_execute_
506 505 * request() in which case flk_cancel_sleeping_lock()
507 506 * would have been done. same is true with EBADF.
508 507 */
509 508 }
510 509
511 510 if (lock_request == &stack_lock_request) {
512 511 flk_set_state(lock_request, FLK_DEAD_STATE);
513 512 } else {
514 513 lock_request->l_state &= ~REFERENCED_LOCK;
515 514 if ((error != 0) || IS_DELETED(lock_request)) {
516 515 flk_set_state(lock_request, FLK_DEAD_STATE);
517 516 flk_free_lock(lock_request);
518 517 }
519 518 }
520 519
521 520 mutex_exit(&gp->gp_mutex);
522 521 return (error);
523 522
524 523 done:
525 524 flk_set_state(lock_request, FLK_DEAD_STATE);
526 525 if (lock_request != &stack_lock_request)
527 526 flk_free_lock(lock_request);
528 527 return (error);
529 528 }
530 529
531 530 /*
532 531 * Invoke the callbacks in the given list. If before sleeping, invoke in
533 532 * list order. If after sleeping, invoke in reverse order.
534 533 *
535 534 * CPR (suspend/resume) support: if one of the callbacks returns a
536 535 * callb_cpr_t, return it. This will be used to make the thread CPR-safe
537 536 * while it is sleeping. There should be at most one callb_cpr_t for the
538 537 * thread.
539 538 * XXX This is unnecessarily complicated. The CPR information should just
540 539 * get passed in directly through VOP_FRLOCK and reclock, rather than
541 540 * sneaking it in via a callback.
542 541 */
543 542
544 543 callb_cpr_t *
545 544 flk_invoke_callbacks(flk_callback_t *cblist, flk_cb_when_t when)
546 545 {
547 546 callb_cpr_t *cpr_callbackp = NULL;
548 547 callb_cpr_t *one_result;
549 548 flk_callback_t *cb;
550 549
551 550 if (cblist == NULL)
552 551 return (NULL);
553 552
554 553 if (when == FLK_BEFORE_SLEEP) {
555 554 cb = cblist;
556 555 do {
557 556 one_result = (*cb->cb_callback)(when, cb->cb_data);
558 557 if (one_result != NULL) {
559 558 ASSERT(cpr_callbackp == NULL);
560 559 cpr_callbackp = one_result;
561 560 }
562 561 cb = cb->cb_next;
563 562 } while (cb != cblist);
564 563 } else {
565 564 cb = cblist->cb_prev;
566 565 do {
567 566 one_result = (*cb->cb_callback)(when, cb->cb_data);
568 567 if (one_result != NULL) {
569 568 cpr_callbackp = one_result;
570 569 }
571 570 cb = cb->cb_prev;
572 571 } while (cb != cblist->cb_prev);
573 572 }
574 573
575 574 return (cpr_callbackp);
576 575 }
577 576
578 577 /*
579 578 * Initialize a flk_callback_t to hold the given callback.
580 579 */
581 580
582 581 void
583 582 flk_init_callback(flk_callback_t *flk_cb,
584 583 callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *), void *cbdata)
585 584 {
586 585 flk_cb->cb_next = flk_cb;
587 586 flk_cb->cb_prev = flk_cb;
588 587 flk_cb->cb_callback = cb_fcn;
589 588 flk_cb->cb_data = cbdata;
590 589 }
591 590
592 591 /*
593 592 * Initialize an flk_callback_t and then link it into the head of an
594 593 * existing list (which may be NULL).
595 594 */
596 595
597 596 void
598 597 flk_add_callback(flk_callback_t *newcb,
599 598 callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *),
600 599 void *cbdata, flk_callback_t *cblist)
601 600 {
↓ open down ↓ |
570 lines elided |
↑ open up ↑ |
602 601 flk_init_callback(newcb, cb_fcn, cbdata);
603 602
604 603 if (cblist == NULL)
605 604 return;
606 605
607 606 newcb->cb_prev = cblist->cb_prev;
608 607 newcb->cb_next = cblist;
609 608 cblist->cb_prev->cb_next = newcb;
610 609 cblist->cb_prev = newcb;
611 610 }
612 -/* ONC_PLUS EXTRACT END */
613 611
614 612 /*
615 613 * Initialize the flk_edge_cache data structure and create the
616 614 * nlm_reg_status array.
617 615 */
618 616
619 617 void
620 618 flk_init(void)
621 619 {
622 620 uint_t i;
623 621
624 622 flk_edge_cache = kmem_cache_create("flk_edges",
625 623 sizeof (struct edge), 0, NULL, NULL, NULL, NULL, NULL, 0);
626 624 if (flk_edge_cache == NULL) {
627 625 cmn_err(CE_PANIC, "Couldn't create flk_edge_cache\n");
628 626 }
629 627 /*
630 628 * Create the NLM registry object.
631 629 */
632 630
633 631 if (cluster_bootflags & CLUSTER_BOOTED) {
634 632 /*
635 633 * This routine tells you the maximum node id that will be used
636 634 * in the cluster. This number will be the size of the nlm
637 635 * registry status array. We add 1 because we will be using
638 636 * all entries indexed from 0 to maxnodeid; e.g., from 0
639 637 * to 64, for a total of 65 entries.
640 638 */
641 639 nlm_status_size = clconf_maximum_nodeid() + 1;
642 640 } else {
643 641 nlm_status_size = 0;
644 642 }
645 643
646 644 if (nlm_status_size != 0) { /* booted as a cluster */
647 645 nlm_reg_status = (flk_nlm_status_t *)
648 646 kmem_alloc(sizeof (flk_nlm_status_t) * nlm_status_size,
649 647 KM_SLEEP);
650 648
651 649 /* initialize all NLM states in array to NLM_UNKNOWN */
652 650 for (i = 0; i < nlm_status_size; i++) {
653 651 nlm_reg_status[i] = FLK_NLM_UNKNOWN;
654 652 }
655 653 }
656 654 }
657 655
658 656 /*
659 657 * Zone constructor/destructor callbacks to be executed when a zone is
660 658 * created/destroyed.
661 659 */
662 660 /* ARGSUSED */
663 661 void *
664 662 flk_zone_init(zoneid_t zoneid)
665 663 {
666 664 struct flock_globals *fg;
667 665 uint_t i;
668 666
669 667 fg = kmem_alloc(sizeof (*fg), KM_SLEEP);
670 668 fg->flk_lockmgr_status = FLK_LOCKMGR_UP;
671 669 for (i = 0; i < HASH_SIZE; i++)
672 670 fg->lockmgr_status[i] = FLK_LOCKMGR_UP;
673 671 return (fg);
674 672 }
675 673
676 674 /* ARGSUSED */
677 675 void
678 676 flk_zone_fini(zoneid_t zoneid, void *data)
679 677 {
680 678 struct flock_globals *fg = data;
681 679
682 680 kmem_free(fg, sizeof (*fg));
683 681 }
684 682
685 683 /*
686 684 * Get a lock_descriptor structure with initialization of edge lists.
687 685 */
688 686
689 687 static lock_descriptor_t *
690 688 flk_get_lock(void)
691 689 {
692 690 lock_descriptor_t *l;
693 691
694 692 l = kmem_zalloc(sizeof (lock_descriptor_t), KM_SLEEP);
695 693
696 694 cv_init(&l->l_cv, NULL, CV_DRIVER, NULL);
697 695 l->l_edge.edge_in_next = &l->l_edge;
698 696 l->l_edge.edge_in_prev = &l->l_edge;
699 697 l->l_edge.edge_adj_next = &l->l_edge;
700 698 l->l_edge.edge_adj_prev = &l->l_edge;
701 699 l->pvertex = -1;
702 700 l->l_status = FLK_INITIAL_STATE;
703 701 flk_lock_allocs++;
704 702 return (l);
705 703 }
706 704
707 705 /*
708 706 * Free a lock_descriptor structure. Just sets the DELETED_LOCK flag
709 707 * when some thread has a reference to it as in reclock().
710 708 */
711 709
712 710 void
713 711 flk_free_lock(lock_descriptor_t *lock)
714 712 {
715 713 ASSERT(IS_DEAD(lock));
716 714 if (IS_REFERENCED(lock)) {
717 715 lock->l_state |= DELETED_LOCK;
718 716 return;
719 717 }
720 718 flk_lock_frees++;
721 719 kmem_free((void *)lock, sizeof (lock_descriptor_t));
722 720 }
723 721
724 722 void
725 723 flk_set_state(lock_descriptor_t *lock, int new_state)
726 724 {
727 725 /*
728 726 * Locks in the sleeping list may be woken up in a number of ways,
729 727 * and more than once. If a sleeping lock is signaled awake more
730 728 * than once, then it may or may not change state depending on its
731 729 * current state.
732 730 * Also note that NLM locks that are sleeping could be moved to an
733 731 * interrupted state more than once if the unlock request is
734 732 * retransmitted by the NLM client - the second time around, this is
735 733 * just a nop.
736 734 * The ordering of being signaled awake is:
737 735 * INTERRUPTED_STATE > CANCELLED_STATE > GRANTED_STATE.
738 736 * The checks below implement this ordering.
739 737 */
740 738 if (IS_INTERRUPTED(lock)) {
741 739 if ((new_state == FLK_CANCELLED_STATE) ||
742 740 (new_state == FLK_GRANTED_STATE) ||
743 741 (new_state == FLK_INTERRUPTED_STATE)) {
744 742 return;
745 743 }
746 744 }
747 745 if (IS_CANCELLED(lock)) {
748 746 if ((new_state == FLK_GRANTED_STATE) ||
749 747 (new_state == FLK_CANCELLED_STATE)) {
750 748 return;
751 749 }
752 750 }
753 751 CHECK_LOCK_TRANSITION(lock->l_status, new_state);
754 752 if (IS_PXFS(lock)) {
755 753 cl_flk_state_transition_notify(lock, lock->l_status, new_state);
756 754 }
757 755 lock->l_status = new_state;
758 756 }
759 757
760 758 /*
761 759 * Routine that checks whether there are any blocking locks in the system.
762 760 *
763 761 * The policy followed is if a write lock is sleeping we don't allow read
764 762 * locks before this write lock even though there may not be any active
765 763 * locks corresponding to the read locks' region.
766 764 *
767 765 * flk_add_edge() function adds an edge between l1 and l2 iff there
768 766 * is no path between l1 and l2. This is done to have a "minimum
769 767 * storage representation" of the dependency graph.
770 768 *
771 769 * Another property of the graph is since only the new request throws
772 770 * edges to the existing locks in the graph, the graph is always topologically
773 771 * ordered.
774 772 */
775 773
776 774 static int
777 775 flk_process_request(lock_descriptor_t *request)
778 776 {
779 777 graph_t *gp = request->l_graph;
780 778 lock_descriptor_t *lock;
781 779 int request_blocked_by_active = 0;
782 780 int request_blocked_by_granted = 0;
783 781 int request_blocked_by_sleeping = 0;
784 782 vnode_t *vp = request->l_vnode;
785 783 int error = 0;
786 784 int request_will_wait = 0;
787 785 int found_covering_lock = 0;
788 786 lock_descriptor_t *covered_by = NULL;
789 787
790 788 ASSERT(MUTEX_HELD(&gp->gp_mutex));
791 789 request_will_wait = IS_WILLING_TO_SLEEP(request);
792 790
793 791 /*
794 792 * check active locks
795 793 */
796 794
797 795 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
798 796
799 797
800 798 if (lock) {
801 799 do {
802 800 if (BLOCKS(lock, request)) {
803 801 if (!request_will_wait)
804 802 return (EAGAIN);
805 803 request_blocked_by_active = 1;
806 804 break;
807 805 }
808 806 /*
809 807 * Grant lock if it is for the same owner holding active
810 808 * lock that covers the request.
811 809 */
812 810
813 811 if (SAME_OWNER(lock, request) &&
814 812 COVERS(lock, request) &&
815 813 (request->l_type == F_RDLCK))
816 814 return (flk_execute_request(request));
817 815 lock = lock->l_next;
818 816 } while (lock->l_vnode == vp);
819 817 }
820 818
821 819 if (!request_blocked_by_active) {
822 820 lock_descriptor_t *lk[1];
823 821 lock_descriptor_t *first_glock = NULL;
824 822 /*
825 823 * Shall we grant this?! NO!!
826 824 * What about those locks that were just granted and still
827 825 * in sleep queue. Those threads are woken up and so locks
828 826 * are almost active.
829 827 */
830 828 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
831 829 if (lock) {
832 830 do {
833 831 if (BLOCKS(lock, request)) {
834 832 if (IS_GRANTED(lock)) {
835 833 request_blocked_by_granted = 1;
836 834 } else {
837 835 request_blocked_by_sleeping = 1;
838 836 }
839 837 }
840 838
841 839 lock = lock->l_next;
842 840 } while ((lock->l_vnode == vp));
843 841 first_glock = lock->l_prev;
844 842 ASSERT(first_glock->l_vnode == vp);
845 843 }
846 844
847 845 if (request_blocked_by_granted)
848 846 goto block;
849 847
850 848 if (!request_blocked_by_sleeping) {
851 849 /*
852 850 * If the request isn't going to be blocked by a
853 851 * sleeping request, we know that it isn't going to
854 852 * be blocked; we can just execute the request --
855 853 * without performing costly deadlock detection.
856 854 */
857 855 ASSERT(!request_blocked_by_active);
858 856 return (flk_execute_request(request));
859 857 } else if (request->l_type == F_RDLCK) {
860 858 /*
861 859 * If we have a sleeping writer in the requested
862 860 * lock's range, block.
863 861 */
864 862 goto block;
865 863 }
866 864
867 865 lk[0] = request;
868 866 request->l_state |= RECOMPUTE_LOCK;
869 867 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
870 868 if (lock) {
871 869 do {
872 870 flk_recompute_dependencies(lock, lk, 1, 0);
873 871 lock = lock->l_next;
874 872 } while (lock->l_vnode == vp);
875 873 }
876 874 lock = first_glock;
877 875 if (lock) {
878 876 do {
879 877 if (IS_GRANTED(lock)) {
880 878 flk_recompute_dependencies(lock, lk, 1, 0);
881 879 }
882 880 lock = lock->l_prev;
883 881 } while ((lock->l_vnode == vp));
884 882 }
885 883 request->l_state &= ~RECOMPUTE_LOCK;
886 884 if (!NO_DEPENDENTS(request) && flk_check_deadlock(request))
887 885 return (EDEADLK);
888 886 return (flk_execute_request(request));
889 887 }
890 888
891 889 block:
892 890 if (request_will_wait)
893 891 flk_graph_uncolor(gp);
894 892
895 893 /* check sleeping locks */
896 894
897 895 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
898 896
899 897 /*
900 898 * If we find a sleeping write lock that is a superset of the
901 899 * region wanted by request we can be assured that by adding an
902 900 * edge to this write lock we have paths to all locks in the
903 901 * graph that blocks the request except in one case and that is why
904 902 * another check for SAME_OWNER in the loop below. The exception
905 903 * case is when this process that owns the sleeping write lock 'l1'
906 904 * has other locks l2, l3, l4 that are in the system and arrived
907 905 * before l1. l1 does not have path to these locks as they are from
908 906 * same process. We break when we find a second covering sleeping
909 907 * lock l5 owned by a process different from that owning l1, because
910 908 * there cannot be any of l2, l3, l4, etc., arrived before l5, and if
911 909 * it has l1 would have produced a deadlock already.
912 910 */
913 911
914 912 if (lock) {
915 913 do {
916 914 if (BLOCKS(lock, request)) {
917 915 if (!request_will_wait)
918 916 return (EAGAIN);
919 917 if (COVERS(lock, request) &&
920 918 lock->l_type == F_WRLCK) {
921 919 if (found_covering_lock &&
922 920 !SAME_OWNER(lock, covered_by)) {
923 921 found_covering_lock++;
924 922 break;
925 923 }
926 924 found_covering_lock = 1;
927 925 covered_by = lock;
928 926 }
929 927 if (found_covering_lock &&
930 928 !SAME_OWNER(lock, covered_by)) {
931 929 lock = lock->l_next;
932 930 continue;
933 931 }
934 932 if ((error = flk_add_edge(request, lock,
935 933 !found_covering_lock, 0)))
936 934 return (error);
937 935 }
938 936 lock = lock->l_next;
939 937 } while (lock->l_vnode == vp);
940 938 }
941 939
942 940 /*
943 941 * found_covering_lock == 2 iff at this point 'request' has paths
944 942 * to all locks that blocks 'request'. found_covering_lock == 1 iff at this
945 943 * point 'request' has paths to all locks that blocks 'request' whose owners
946 944 * are not same as the one that covers 'request' (covered_by above) and
947 945 * we can have locks whose owner is same as covered_by in the active list.
948 946 */
949 947
950 948 if (request_blocked_by_active && found_covering_lock != 2) {
951 949 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
952 950 ASSERT(lock != NULL);
953 951 do {
954 952 if (BLOCKS(lock, request)) {
955 953 if (found_covering_lock &&
956 954 !SAME_OWNER(lock, covered_by)) {
957 955 lock = lock->l_next;
958 956 continue;
959 957 }
960 958 if ((error = flk_add_edge(request, lock,
961 959 CHECK_CYCLE, 0)))
962 960 return (error);
963 961 }
964 962 lock = lock->l_next;
965 963 } while (lock->l_vnode == vp);
966 964 }
967 965
968 966 if (NOT_BLOCKED(request)) {
969 967 /*
970 968 * request not dependent on any other locks
971 969 * so execute this request
972 970 */
973 971 return (flk_execute_request(request));
974 972 } else {
975 973 /*
976 974 * check for deadlock
↓ open down ↓ |
354 lines elided |
↑ open up ↑ |
977 975 */
978 976 if (flk_check_deadlock(request))
979 977 return (EDEADLK);
980 978 /*
981 979 * this thread has to sleep
982 980 */
983 981 return (flk_wait_execute_request(request));
984 982 }
985 983 }
986 984
987 -/* ONC_PLUS EXTRACT START */
988 985 /*
989 986 * The actual execution of the request in the simple case is only to
990 987 * insert the 'request' in the list of active locks if it is not an
991 988 * UNLOCK.
992 989 * We have to consider the existing active locks' relation to
993 990 * this 'request' if they are owned by same process. flk_relation() does
994 991 * this job and sees to that the dependency graph information is maintained
995 992 * properly.
996 993 */
997 994
998 995 int
999 996 flk_execute_request(lock_descriptor_t *request)
1000 997 {
1001 998 graph_t *gp = request->l_graph;
1002 999 vnode_t *vp = request->l_vnode;
1003 1000 lock_descriptor_t *lock, *lock1;
1004 1001 int done_searching = 0;
1005 1002
1006 1003 CHECK_SLEEPING_LOCKS(gp);
1007 1004 CHECK_ACTIVE_LOCKS(gp);
1008 1005
1009 1006 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1010 1007
1011 1008 flk_set_state(request, FLK_START_STATE);
1012 1009
1013 1010 ASSERT(NOT_BLOCKED(request));
1014 1011
1015 1012 /* IO_LOCK requests are only to check status */
1016 1013
1017 1014 if (IS_IO_LOCK(request))
1018 1015 return (0);
1019 1016
1020 1017 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
1021 1018
1022 1019 if (lock == NULL && request->l_type == F_UNLCK)
1023 1020 return (0);
1024 1021 if (lock == NULL) {
1025 1022 flk_insert_active_lock(request);
1026 1023 return (0);
1027 1024 }
1028 1025
1029 1026 do {
1030 1027 lock1 = lock->l_next;
1031 1028 if (SAME_OWNER(request, lock)) {
1032 1029 done_searching = flk_relation(lock, request);
1033 1030 }
1034 1031 lock = lock1;
1035 1032 } while (lock->l_vnode == vp && !done_searching);
↓ open down ↓ |
38 lines elided |
↑ open up ↑ |
1036 1033
1037 1034 /*
1038 1035 * insert in active queue
1039 1036 */
1040 1037
1041 1038 if (request->l_type != F_UNLCK)
1042 1039 flk_insert_active_lock(request);
1043 1040
1044 1041 return (0);
1045 1042 }
1046 -/* ONC_PLUS EXTRACT END */
1047 1043
1048 1044 /*
1049 1045 * 'request' is blocked by some one therefore we put it into sleep queue.
1050 1046 */
1051 1047 static int
1052 1048 flk_wait_execute_request(lock_descriptor_t *request)
1053 1049 {
1054 1050 graph_t *gp = request->l_graph;
1055 1051 callb_cpr_t *cprp; /* CPR info from callback */
1056 1052 struct flock_globals *fg;
1057 1053 int index;
1058 1054
1059 1055 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1060 1056 ASSERT(IS_WILLING_TO_SLEEP(request));
1061 1057
1062 1058 flk_insert_sleeping_lock(request);
1063 1059
1064 1060 if (IS_LOCKMGR(request)) {
1065 1061 index = HASH_INDEX(request->l_vnode);
1066 1062 fg = flk_get_globals();
1067 1063
1068 1064 if (nlm_status_size == 0) { /* not booted as a cluster */
1069 1065 if (fg->lockmgr_status[index] != FLK_LOCKMGR_UP) {
1070 1066 flk_cancel_sleeping_lock(request, 1);
1071 1067 return (ENOLCK);
1072 1068 }
1073 1069 } else { /* booted as a cluster */
1074 1070 /*
1075 1071 * If the request is an NLM server lock request,
1076 1072 * and the NLM state of the lock request is not
1077 1073 * NLM_UP (because the NLM server is shutting
1078 1074 * down), then cancel the sleeping lock and
1079 1075 * return error ENOLCK that will encourage the
1080 1076 * client to retransmit.
1081 1077 */
1082 1078 if (!IS_NLM_UP(request)) {
1083 1079 flk_cancel_sleeping_lock(request, 1);
1084 1080 return (ENOLCK);
1085 1081 }
1086 1082 }
1087 1083 }
1088 1084
1089 1085 /* Clustering: For blocking PXFS locks, return */
1090 1086 if (IS_PXFS(request)) {
1091 1087 /*
1092 1088 * PXFS locks sleep on the client side.
1093 1089 * The callback argument is used to wake up the sleeper
1094 1090 * when the lock is granted.
1095 1091 * We return -1 (rather than an errno value) to indicate
1096 1092 * the client side should sleep
1097 1093 */
1098 1094 return (PXFS_LOCK_BLOCKED);
1099 1095 }
1100 1096
1101 1097 if (request->l_callbacks != NULL) {
1102 1098 /*
1103 1099 * To make sure the shutdown code works correctly, either
1104 1100 * the callback must happen after putting the lock on the
1105 1101 * sleep list, or we must check the shutdown status after
1106 1102 * returning from the callback (and before sleeping). At
1107 1103 * least for now, we'll use the first option. If a
1108 1104 * shutdown or signal or whatever happened while the graph
1109 1105 * mutex was dropped, that will be detected by
1110 1106 * wait_for_lock().
1111 1107 */
1112 1108 mutex_exit(&gp->gp_mutex);
1113 1109
1114 1110 cprp = flk_invoke_callbacks(request->l_callbacks,
1115 1111 FLK_BEFORE_SLEEP);
1116 1112
1117 1113 mutex_enter(&gp->gp_mutex);
1118 1114
1119 1115 if (cprp == NULL) {
1120 1116 wait_for_lock(request);
1121 1117 } else {
1122 1118 mutex_enter(cprp->cc_lockp);
1123 1119 CALLB_CPR_SAFE_BEGIN(cprp);
1124 1120 mutex_exit(cprp->cc_lockp);
1125 1121 wait_for_lock(request);
1126 1122 mutex_enter(cprp->cc_lockp);
1127 1123 CALLB_CPR_SAFE_END(cprp, cprp->cc_lockp);
1128 1124 mutex_exit(cprp->cc_lockp);
1129 1125 }
1130 1126
1131 1127 mutex_exit(&gp->gp_mutex);
1132 1128 (void) flk_invoke_callbacks(request->l_callbacks,
1133 1129 FLK_AFTER_SLEEP);
1134 1130 mutex_enter(&gp->gp_mutex);
1135 1131 } else {
1136 1132 wait_for_lock(request);
1137 1133 }
1138 1134
1139 1135 if (IS_LOCKMGR(request)) {
1140 1136 /*
1141 1137 * If the lock manager is shutting down, return an
1142 1138 * error that will encourage the client to retransmit.
1143 1139 */
1144 1140 if (fg->lockmgr_status[index] != FLK_LOCKMGR_UP &&
1145 1141 !IS_GRANTED(request)) {
1146 1142 flk_cancel_sleeping_lock(request, 1);
1147 1143 return (ENOLCK);
1148 1144 }
1149 1145 }
1150 1146
1151 1147 if (IS_INTERRUPTED(request)) {
1152 1148 /* we got a signal, or act like we did */
1153 1149 flk_cancel_sleeping_lock(request, 1);
1154 1150 return (EINTR);
1155 1151 }
1156 1152
1157 1153 /* Cancelled if some other thread has closed the file */
1158 1154
1159 1155 if (IS_CANCELLED(request)) {
1160 1156 flk_cancel_sleeping_lock(request, 1);
1161 1157 return (EBADF);
1162 1158 }
1163 1159
1164 1160 request->l_state &= ~GRANTED_LOCK;
1165 1161 REMOVE_SLEEP_QUEUE(request);
1166 1162 return (flk_execute_request(request));
1167 1163 }
1168 1164
1169 1165 /*
1170 1166 * This routine adds an edge between from and to because from depends
1171 1167 * to. If asked to check for deadlock it checks whether there are any
1172 1168 * reachable locks from "from_lock" that is owned by the same process
1173 1169 * as "from_lock".
1174 1170 * NOTE: It is the caller's responsibility to make sure that the color
1175 1171 * of the graph is consistent between the calls to flk_add_edge as done
1176 1172 * in flk_process_request. This routine does not color and check for
1177 1173 * deadlock explicitly.
1178 1174 */
1179 1175
1180 1176 static int
1181 1177 flk_add_edge(lock_descriptor_t *from_lock, lock_descriptor_t *to_lock,
1182 1178 int check_cycle, int update_graph)
1183 1179 {
1184 1180 edge_t *edge;
1185 1181 edge_t *ep;
1186 1182 lock_descriptor_t *vertex;
1187 1183 lock_descriptor_t *vertex_stack;
1188 1184
1189 1185 STACK_INIT(vertex_stack);
1190 1186
1191 1187 /*
1192 1188 * if to vertex already has mark_color just return
1193 1189 * don't add an edge as it is reachable from from vertex
1194 1190 * before itself.
1195 1191 */
1196 1192
1197 1193 if (COLORED(to_lock))
1198 1194 return (0);
1199 1195
1200 1196 edge = flk_get_edge();
1201 1197
1202 1198 /*
1203 1199 * set the from and to vertex
1204 1200 */
1205 1201
1206 1202 edge->from_vertex = from_lock;
1207 1203 edge->to_vertex = to_lock;
1208 1204
1209 1205 /*
1210 1206 * put in adjacency list of from vertex
1211 1207 */
1212 1208
1213 1209 from_lock->l_edge.edge_adj_next->edge_adj_prev = edge;
1214 1210 edge->edge_adj_next = from_lock->l_edge.edge_adj_next;
1215 1211 edge->edge_adj_prev = &from_lock->l_edge;
1216 1212 from_lock->l_edge.edge_adj_next = edge;
1217 1213
1218 1214 /*
1219 1215 * put in in list of to vertex
1220 1216 */
1221 1217
1222 1218 to_lock->l_edge.edge_in_next->edge_in_prev = edge;
1223 1219 edge->edge_in_next = to_lock->l_edge.edge_in_next;
1224 1220 to_lock->l_edge.edge_in_next = edge;
1225 1221 edge->edge_in_prev = &to_lock->l_edge;
1226 1222
1227 1223
1228 1224 if (update_graph) {
1229 1225 flk_update_proc_graph(edge, 0);
1230 1226 return (0);
1231 1227 }
1232 1228 if (!check_cycle) {
1233 1229 return (0);
1234 1230 }
1235 1231
1236 1232 STACK_PUSH(vertex_stack, from_lock, l_stack);
1237 1233
1238 1234 while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
1239 1235
1240 1236 STACK_POP(vertex_stack, l_stack);
1241 1237
1242 1238 for (ep = FIRST_ADJ(vertex);
1243 1239 ep != HEAD(vertex);
1244 1240 ep = NEXT_ADJ(ep)) {
1245 1241 if (COLORED(ep->to_vertex))
1246 1242 continue;
1247 1243 COLOR(ep->to_vertex);
1248 1244 if (SAME_OWNER(ep->to_vertex, from_lock))
1249 1245 goto dead_lock;
1250 1246 STACK_PUSH(vertex_stack, ep->to_vertex, l_stack);
1251 1247 }
1252 1248 }
1253 1249 return (0);
1254 1250
1255 1251 dead_lock:
1256 1252
1257 1253 /*
1258 1254 * remove all edges
1259 1255 */
1260 1256
1261 1257 ep = FIRST_ADJ(from_lock);
1262 1258
1263 1259 while (ep != HEAD(from_lock)) {
1264 1260 IN_LIST_REMOVE(ep);
1265 1261 from_lock->l_sedge = NEXT_ADJ(ep);
1266 1262 ADJ_LIST_REMOVE(ep);
1267 1263 flk_free_edge(ep);
1268 1264 ep = from_lock->l_sedge;
1269 1265 }
1270 1266 return (EDEADLK);
1271 1267 }
1272 1268
1273 1269 /*
1274 1270 * Get an edge structure for representing the dependency between two locks.
1275 1271 */
1276 1272
1277 1273 static edge_t *
1278 1274 flk_get_edge()
1279 1275 {
1280 1276 edge_t *ep;
1281 1277
1282 1278 ASSERT(flk_edge_cache != NULL);
1283 1279
1284 1280 ep = kmem_cache_alloc(flk_edge_cache, KM_SLEEP);
1285 1281 edge_allocs++;
1286 1282 return (ep);
1287 1283 }
1288 1284
1289 1285 /*
1290 1286 * Free the edge structure.
1291 1287 */
1292 1288
1293 1289 static void
1294 1290 flk_free_edge(edge_t *ep)
1295 1291 {
1296 1292 edge_frees++;
1297 1293 kmem_cache_free(flk_edge_cache, (void *)ep);
1298 1294 }
1299 1295
1300 1296 /*
1301 1297 * Check the relationship of request with lock and perform the
1302 1298 * recomputation of dependencies, break lock if required, and return
1303 1299 * 1 if request cannot have any more relationship with the next
1304 1300 * active locks.
1305 1301 * The 'lock' and 'request' are compared and in case of overlap we
1306 1302 * delete the 'lock' and form new locks to represent the non-overlapped
1307 1303 * portion of original 'lock'. This function has side effects such as
1308 1304 * 'lock' will be freed, new locks will be added to the active list.
1309 1305 */
1310 1306
1311 1307 static int
1312 1308 flk_relation(lock_descriptor_t *lock, lock_descriptor_t *request)
1313 1309 {
1314 1310 int lock_effect;
1315 1311 lock_descriptor_t *lock1, *lock2;
1316 1312 lock_descriptor_t *topology[3];
1317 1313 int nvertex = 0;
1318 1314 int i;
1319 1315 edge_t *ep;
1320 1316 graph_t *gp = (lock->l_graph);
1321 1317
1322 1318
1323 1319 CHECK_SLEEPING_LOCKS(gp);
1324 1320 CHECK_ACTIVE_LOCKS(gp);
1325 1321
1326 1322 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1327 1323
1328 1324 topology[0] = topology[1] = topology[2] = NULL;
1329 1325
1330 1326 if (request->l_type == F_UNLCK)
1331 1327 lock_effect = FLK_UNLOCK;
1332 1328 else if (request->l_type == F_RDLCK &&
1333 1329 lock->l_type == F_WRLCK)
1334 1330 lock_effect = FLK_DOWNGRADE;
1335 1331 else if (request->l_type == F_WRLCK &&
1336 1332 lock->l_type == F_RDLCK)
1337 1333 lock_effect = FLK_UPGRADE;
1338 1334 else
1339 1335 lock_effect = FLK_STAY_SAME;
1340 1336
1341 1337 if (lock->l_end < request->l_start) {
1342 1338 if (lock->l_end == request->l_start - 1 &&
1343 1339 lock_effect == FLK_STAY_SAME) {
1344 1340 topology[0] = request;
1345 1341 request->l_start = lock->l_start;
1346 1342 nvertex = 1;
1347 1343 goto recompute;
1348 1344 } else {
1349 1345 return (0);
1350 1346 }
1351 1347 }
1352 1348
1353 1349 if (lock->l_start > request->l_end) {
1354 1350 if (request->l_end == lock->l_start - 1 &&
1355 1351 lock_effect == FLK_STAY_SAME) {
1356 1352 topology[0] = request;
1357 1353 request->l_end = lock->l_end;
1358 1354 nvertex = 1;
1359 1355 goto recompute;
1360 1356 } else {
1361 1357 return (1);
1362 1358 }
1363 1359 }
1364 1360
1365 1361 if (request->l_end < lock->l_end) {
1366 1362 if (request->l_start > lock->l_start) {
1367 1363 if (lock_effect == FLK_STAY_SAME) {
1368 1364 request->l_start = lock->l_start;
1369 1365 request->l_end = lock->l_end;
1370 1366 topology[0] = request;
1371 1367 nvertex = 1;
1372 1368 } else {
1373 1369 lock1 = flk_get_lock();
1374 1370 lock2 = flk_get_lock();
1375 1371 COPY(lock1, lock);
1376 1372 COPY(lock2, lock);
1377 1373 lock1->l_start = lock->l_start;
1378 1374 lock1->l_end = request->l_start - 1;
1379 1375 lock2->l_start = request->l_end + 1;
1380 1376 lock2->l_end = lock->l_end;
1381 1377 topology[0] = lock1;
1382 1378 topology[1] = lock2;
1383 1379 topology[2] = request;
1384 1380 nvertex = 3;
1385 1381 }
1386 1382 } else if (request->l_start < lock->l_start) {
1387 1383 if (lock_effect == FLK_STAY_SAME) {
1388 1384 request->l_end = lock->l_end;
1389 1385 topology[0] = request;
1390 1386 nvertex = 1;
1391 1387 } else {
1392 1388 lock1 = flk_get_lock();
1393 1389 COPY(lock1, lock);
1394 1390 lock1->l_start = request->l_end + 1;
1395 1391 topology[0] = lock1;
1396 1392 topology[1] = request;
1397 1393 nvertex = 2;
1398 1394 }
1399 1395 } else {
1400 1396 if (lock_effect == FLK_STAY_SAME) {
1401 1397 request->l_start = lock->l_start;
1402 1398 request->l_end = lock->l_end;
1403 1399 topology[0] = request;
1404 1400 nvertex = 1;
1405 1401 } else {
1406 1402 lock1 = flk_get_lock();
1407 1403 COPY(lock1, lock);
1408 1404 lock1->l_start = request->l_end + 1;
1409 1405 topology[0] = lock1;
1410 1406 topology[1] = request;
1411 1407 nvertex = 2;
1412 1408 }
1413 1409 }
1414 1410 } else if (request->l_end > lock->l_end) {
1415 1411 if (request->l_start > lock->l_start) {
1416 1412 if (lock_effect == FLK_STAY_SAME) {
1417 1413 request->l_start = lock->l_start;
1418 1414 topology[0] = request;
1419 1415 nvertex = 1;
1420 1416 } else {
1421 1417 lock1 = flk_get_lock();
1422 1418 COPY(lock1, lock);
1423 1419 lock1->l_end = request->l_start - 1;
1424 1420 topology[0] = lock1;
1425 1421 topology[1] = request;
1426 1422 nvertex = 2;
1427 1423 }
1428 1424 } else if (request->l_start < lock->l_start) {
1429 1425 topology[0] = request;
1430 1426 nvertex = 1;
1431 1427 } else {
1432 1428 topology[0] = request;
1433 1429 nvertex = 1;
1434 1430 }
1435 1431 } else {
1436 1432 if (request->l_start > lock->l_start) {
1437 1433 if (lock_effect == FLK_STAY_SAME) {
1438 1434 request->l_start = lock->l_start;
1439 1435 topology[0] = request;
1440 1436 nvertex = 1;
1441 1437 } else {
1442 1438 lock1 = flk_get_lock();
1443 1439 COPY(lock1, lock);
1444 1440 lock1->l_end = request->l_start - 1;
1445 1441 topology[0] = lock1;
1446 1442 topology[1] = request;
1447 1443 nvertex = 2;
1448 1444 }
1449 1445 } else if (request->l_start < lock->l_start) {
1450 1446 topology[0] = request;
1451 1447 nvertex = 1;
1452 1448 } else {
1453 1449 if (lock_effect != FLK_UNLOCK) {
1454 1450 topology[0] = request;
1455 1451 nvertex = 1;
1456 1452 } else {
1457 1453 flk_delete_active_lock(lock, 0);
1458 1454 flk_wakeup(lock, 1);
1459 1455 flk_free_lock(lock);
1460 1456 CHECK_SLEEPING_LOCKS(gp);
1461 1457 CHECK_ACTIVE_LOCKS(gp);
1462 1458 return (1);
1463 1459 }
1464 1460 }
1465 1461 }
1466 1462
1467 1463 recompute:
1468 1464
1469 1465 /*
1470 1466 * For unlock we don't send the 'request' to for recomputing
1471 1467 * dependencies because no lock will add an edge to this.
1472 1468 */
1473 1469
1474 1470 if (lock_effect == FLK_UNLOCK) {
1475 1471 topology[nvertex-1] = NULL;
1476 1472 nvertex--;
1477 1473 }
1478 1474 for (i = 0; i < nvertex; i++) {
1479 1475 topology[i]->l_state |= RECOMPUTE_LOCK;
1480 1476 topology[i]->l_color = NO_COLOR;
1481 1477 }
1482 1478
1483 1479 ASSERT(FIRST_ADJ(lock) == HEAD(lock));
1484 1480
1485 1481 /*
1486 1482 * we remove the adjacent edges for all vertices' to this vertex
1487 1483 * 'lock'.
1488 1484 */
1489 1485
1490 1486 ep = FIRST_IN(lock);
1491 1487 while (ep != HEAD(lock)) {
1492 1488 ADJ_LIST_REMOVE(ep);
1493 1489 ep = NEXT_IN(ep);
1494 1490 }
1495 1491
1496 1492 flk_delete_active_lock(lock, 0);
1497 1493
1498 1494 /* We are ready for recomputing the dependencies now */
1499 1495
1500 1496 flk_recompute_dependencies(lock, topology, nvertex, 1);
1501 1497
1502 1498 for (i = 0; i < nvertex; i++) {
1503 1499 topology[i]->l_state &= ~RECOMPUTE_LOCK;
1504 1500 topology[i]->l_color = NO_COLOR;
1505 1501 }
1506 1502
1507 1503
1508 1504 if (lock_effect == FLK_UNLOCK) {
1509 1505 nvertex++;
1510 1506 }
1511 1507 for (i = 0; i < nvertex - 1; i++) {
1512 1508 flk_insert_active_lock(topology[i]);
1513 1509 }
1514 1510
1515 1511
1516 1512 if (lock_effect == FLK_DOWNGRADE || lock_effect == FLK_UNLOCK) {
1517 1513 flk_wakeup(lock, 0);
1518 1514 } else {
1519 1515 ep = FIRST_IN(lock);
1520 1516 while (ep != HEAD(lock)) {
1521 1517 lock->l_sedge = NEXT_IN(ep);
1522 1518 IN_LIST_REMOVE(ep);
1523 1519 flk_update_proc_graph(ep, 1);
1524 1520 flk_free_edge(ep);
1525 1521 ep = lock->l_sedge;
1526 1522 }
1527 1523 }
1528 1524 flk_free_lock(lock);
1529 1525
1530 1526 CHECK_SLEEPING_LOCKS(gp);
1531 1527 CHECK_ACTIVE_LOCKS(gp);
1532 1528 return (0);
1533 1529 }
1534 1530
1535 1531 /*
1536 1532 * Insert a lock into the active queue.
1537 1533 */
1538 1534
1539 1535 static void
1540 1536 flk_insert_active_lock(lock_descriptor_t *new_lock)
1541 1537 {
1542 1538 graph_t *gp = new_lock->l_graph;
1543 1539 vnode_t *vp = new_lock->l_vnode;
1544 1540 lock_descriptor_t *first_lock, *lock;
1545 1541
1546 1542 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1547 1543
1548 1544 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
1549 1545 first_lock = lock;
1550 1546
1551 1547 if (first_lock != NULL) {
1552 1548 for (; (lock->l_vnode == vp &&
1553 1549 lock->l_start < new_lock->l_start); lock = lock->l_next)
1554 1550 ;
1555 1551 } else {
1556 1552 lock = ACTIVE_HEAD(gp);
1557 1553 }
1558 1554
1559 1555 lock->l_prev->l_next = new_lock;
1560 1556 new_lock->l_next = lock;
1561 1557 new_lock->l_prev = lock->l_prev;
1562 1558 lock->l_prev = new_lock;
1563 1559
1564 1560 if (first_lock == NULL || (new_lock->l_start <= first_lock->l_start)) {
1565 1561 vp->v_filocks = (struct filock *)new_lock;
1566 1562 }
1567 1563 flk_set_state(new_lock, FLK_ACTIVE_STATE);
1568 1564 new_lock->l_state |= ACTIVE_LOCK;
1569 1565
1570 1566 CHECK_ACTIVE_LOCKS(gp);
1571 1567 CHECK_SLEEPING_LOCKS(gp);
1572 1568 }
1573 1569
1574 1570 /*
1575 1571 * Delete the active lock : Performs two functions depending on the
1576 1572 * value of second parameter. One is to remove from the active lists
1577 1573 * only and other is to both remove and free the lock.
1578 1574 */
1579 1575
1580 1576 static void
1581 1577 flk_delete_active_lock(lock_descriptor_t *lock, int free_lock)
1582 1578 {
1583 1579 vnode_t *vp = lock->l_vnode;
1584 1580 graph_t *gp = lock->l_graph;
1585 1581
1586 1582 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1587 1583 if (free_lock)
1588 1584 ASSERT(NO_DEPENDENTS(lock));
1589 1585 ASSERT(NOT_BLOCKED(lock));
1590 1586 ASSERT(IS_ACTIVE(lock));
1591 1587
1592 1588 ASSERT((vp->v_filocks != NULL));
1593 1589
1594 1590 if (vp->v_filocks == (struct filock *)lock) {
1595 1591 vp->v_filocks = (struct filock *)
1596 1592 ((lock->l_next->l_vnode == vp) ? lock->l_next :
1597 1593 NULL);
1598 1594 }
1599 1595 lock->l_next->l_prev = lock->l_prev;
1600 1596 lock->l_prev->l_next = lock->l_next;
1601 1597 lock->l_next = lock->l_prev = NULL;
1602 1598 flk_set_state(lock, FLK_DEAD_STATE);
1603 1599 lock->l_state &= ~ACTIVE_LOCK;
1604 1600
1605 1601 if (free_lock)
1606 1602 flk_free_lock(lock);
1607 1603 CHECK_ACTIVE_LOCKS(gp);
1608 1604 CHECK_SLEEPING_LOCKS(gp);
1609 1605 }
1610 1606
1611 1607 /*
1612 1608 * Insert into the sleep queue.
1613 1609 */
1614 1610
1615 1611 static void
1616 1612 flk_insert_sleeping_lock(lock_descriptor_t *request)
1617 1613 {
1618 1614 graph_t *gp = request->l_graph;
1619 1615 vnode_t *vp = request->l_vnode;
1620 1616 lock_descriptor_t *lock;
1621 1617
1622 1618 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1623 1619 ASSERT(IS_INITIAL(request));
1624 1620
1625 1621 for (lock = gp->sleeping_locks.l_next; (lock != &gp->sleeping_locks &&
1626 1622 lock->l_vnode < vp); lock = lock->l_next)
1627 1623 ;
1628 1624
1629 1625 lock->l_prev->l_next = request;
1630 1626 request->l_prev = lock->l_prev;
1631 1627 lock->l_prev = request;
1632 1628 request->l_next = lock;
1633 1629 flk_set_state(request, FLK_SLEEPING_STATE);
1634 1630 request->l_state |= SLEEPING_LOCK;
1635 1631 }
1636 1632
1637 1633 /*
1638 1634 * Cancelling a sleeping lock implies removing a vertex from the
1639 1635 * dependency graph and therefore we should recompute the dependencies
1640 1636 * of all vertices that have a path to this vertex, w.r.t. all
1641 1637 * vertices reachable from this vertex.
1642 1638 */
1643 1639
1644 1640 void
1645 1641 flk_cancel_sleeping_lock(lock_descriptor_t *request, int remove_from_queue)
1646 1642 {
1647 1643 graph_t *gp = request->l_graph;
1648 1644 vnode_t *vp = request->l_vnode;
1649 1645 lock_descriptor_t **topology = NULL;
1650 1646 edge_t *ep;
1651 1647 lock_descriptor_t *vertex, *lock;
1652 1648 int nvertex = 0;
1653 1649 int i;
1654 1650 lock_descriptor_t *vertex_stack;
1655 1651
1656 1652 STACK_INIT(vertex_stack);
1657 1653
1658 1654 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1659 1655 /*
1660 1656 * count number of vertex pointers that has to be allocated
1661 1657 * All vertices that are reachable from request.
1662 1658 */
1663 1659
1664 1660 STACK_PUSH(vertex_stack, request, l_stack);
1665 1661
1666 1662 while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
1667 1663 STACK_POP(vertex_stack, l_stack);
1668 1664 for (ep = FIRST_ADJ(vertex); ep != HEAD(vertex);
1669 1665 ep = NEXT_ADJ(ep)) {
1670 1666 if (IS_RECOMPUTE(ep->to_vertex))
1671 1667 continue;
1672 1668 ep->to_vertex->l_state |= RECOMPUTE_LOCK;
1673 1669 STACK_PUSH(vertex_stack, ep->to_vertex, l_stack);
1674 1670 nvertex++;
1675 1671 }
1676 1672 }
1677 1673
1678 1674 /*
1679 1675 * allocate memory for holding the vertex pointers
1680 1676 */
1681 1677
1682 1678 if (nvertex) {
1683 1679 topology = kmem_zalloc(nvertex * sizeof (lock_descriptor_t *),
1684 1680 KM_SLEEP);
1685 1681 }
1686 1682
1687 1683 /*
1688 1684 * one more pass to actually store the vertices in the
1689 1685 * allocated array.
1690 1686 * We first check sleeping locks and then active locks
1691 1687 * so that topology array will be in a topological
1692 1688 * order.
1693 1689 */
1694 1690
1695 1691 nvertex = 0;
1696 1692 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
1697 1693
1698 1694 if (lock) {
1699 1695 do {
1700 1696 if (IS_RECOMPUTE(lock)) {
1701 1697 lock->l_index = nvertex;
1702 1698 topology[nvertex++] = lock;
1703 1699 }
1704 1700 lock->l_color = NO_COLOR;
1705 1701 lock = lock->l_next;
1706 1702 } while (lock->l_vnode == vp);
1707 1703 }
1708 1704
1709 1705 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
1710 1706
1711 1707 if (lock) {
1712 1708 do {
1713 1709 if (IS_RECOMPUTE(lock)) {
1714 1710 lock->l_index = nvertex;
1715 1711 topology[nvertex++] = lock;
1716 1712 }
1717 1713 lock->l_color = NO_COLOR;
1718 1714 lock = lock->l_next;
1719 1715 } while (lock->l_vnode == vp);
1720 1716 }
1721 1717
1722 1718 /*
1723 1719 * remove in and out edges of request
1724 1720 * They are freed after updating proc_graph below.
1725 1721 */
1726 1722
1727 1723 for (ep = FIRST_IN(request); ep != HEAD(request); ep = NEXT_IN(ep)) {
1728 1724 ADJ_LIST_REMOVE(ep);
1729 1725 }
1730 1726
1731 1727
1732 1728 if (remove_from_queue)
1733 1729 REMOVE_SLEEP_QUEUE(request);
1734 1730
1735 1731 /* we are ready to recompute */
1736 1732
1737 1733 flk_recompute_dependencies(request, topology, nvertex, 1);
1738 1734
1739 1735 ep = FIRST_ADJ(request);
1740 1736 while (ep != HEAD(request)) {
1741 1737 IN_LIST_REMOVE(ep);
1742 1738 request->l_sedge = NEXT_ADJ(ep);
1743 1739 ADJ_LIST_REMOVE(ep);
1744 1740 flk_update_proc_graph(ep, 1);
1745 1741 flk_free_edge(ep);
1746 1742 ep = request->l_sedge;
1747 1743 }
1748 1744
1749 1745
1750 1746 /*
1751 1747 * unset the RECOMPUTE flag in those vertices
1752 1748 */
1753 1749
1754 1750 for (i = 0; i < nvertex; i++) {
1755 1751 topology[i]->l_state &= ~RECOMPUTE_LOCK;
1756 1752 }
1757 1753
1758 1754 /*
1759 1755 * free the topology
1760 1756 */
1761 1757 if (nvertex)
1762 1758 kmem_free((void *)topology,
1763 1759 (nvertex * sizeof (lock_descriptor_t *)));
1764 1760 /*
1765 1761 * Possibility of some locks unblocked now
1766 1762 */
1767 1763
1768 1764 flk_wakeup(request, 0);
1769 1765
1770 1766 /*
1771 1767 * we expect to have a correctly recomputed graph now.
1772 1768 */
1773 1769 flk_set_state(request, FLK_DEAD_STATE);
1774 1770 flk_free_lock(request);
1775 1771 CHECK_SLEEPING_LOCKS(gp);
1776 1772 CHECK_ACTIVE_LOCKS(gp);
1777 1773
1778 1774 }
1779 1775
1780 1776 /*
1781 1777 * Uncoloring the graph is simply to increment the mark value of the graph
1782 1778 * And only when wrap round takes place will we color all vertices in
1783 1779 * the graph explicitly.
1784 1780 */
1785 1781
1786 1782 static void
1787 1783 flk_graph_uncolor(graph_t *gp)
1788 1784 {
1789 1785 lock_descriptor_t *lock;
1790 1786
1791 1787 if (gp->mark == UINT_MAX) {
1792 1788 gp->mark = 1;
1793 1789 for (lock = ACTIVE_HEAD(gp)->l_next; lock != ACTIVE_HEAD(gp);
1794 1790 lock = lock->l_next)
1795 1791 lock->l_color = 0;
1796 1792
1797 1793 for (lock = SLEEPING_HEAD(gp)->l_next; lock != SLEEPING_HEAD(gp);
1798 1794 lock = lock->l_next)
1799 1795 lock->l_color = 0;
1800 1796 } else {
1801 1797 gp->mark++;
1802 1798 }
1803 1799 }
1804 1800
1805 1801 /*
1806 1802 * Wake up locks that are blocked on the given lock.
1807 1803 */
1808 1804
1809 1805 static void
1810 1806 flk_wakeup(lock_descriptor_t *lock, int adj_list_remove)
1811 1807 {
1812 1808 edge_t *ep;
1813 1809 graph_t *gp = lock->l_graph;
1814 1810 lock_descriptor_t *lck;
1815 1811
1816 1812 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1817 1813 if (NO_DEPENDENTS(lock))
1818 1814 return;
1819 1815 ep = FIRST_IN(lock);
1820 1816 do {
1821 1817 /*
1822 1818 * delete the edge from the adjacency list
1823 1819 * of from vertex. if no more adjacent edges
1824 1820 * for this vertex wake this process.
1825 1821 */
1826 1822 lck = ep->from_vertex;
1827 1823 if (adj_list_remove)
1828 1824 ADJ_LIST_REMOVE(ep);
1829 1825 flk_update_proc_graph(ep, 1);
1830 1826 if (NOT_BLOCKED(lck)) {
1831 1827 GRANT_WAKEUP(lck);
1832 1828 }
1833 1829 lock->l_sedge = NEXT_IN(ep);
1834 1830 IN_LIST_REMOVE(ep);
1835 1831 flk_free_edge(ep);
1836 1832 ep = lock->l_sedge;
1837 1833 } while (ep != HEAD(lock));
1838 1834 ASSERT(NO_DEPENDENTS(lock));
1839 1835 }
1840 1836
1841 1837 /*
1842 1838 * The dependents of request, is checked for its dependency against the
1843 1839 * locks in topology (called topology because the array is and should be in
1844 1840 * topological order for this algorithm, if not in topological order the
1845 1841 * inner loop below might add more edges than necessary. Topological ordering
1846 1842 * of vertices satisfies the property that all edges will be from left to
1847 1843 * right i.e., topology[i] can have an edge to topology[j], iff i<j)
1848 1844 * If lock l1 in the dependent set of request is dependent (blocked by)
1849 1845 * on lock l2 in topology but does not have a path to it, we add an edge
1850 1846 * in the inner loop below.
1851 1847 *
1852 1848 * We don't want to add an edge between l1 and l2 if there exists
1853 1849 * already a path from l1 to l2, so care has to be taken for those vertices
1854 1850 * that have two paths to 'request'. These vertices are referred to here
1855 1851 * as barrier locks.
1856 1852 *
1857 1853 * The barriers has to be found (those vertex that originally had two paths
1858 1854 * to request) because otherwise we may end up adding edges unnecessarily
1859 1855 * to vertices in topology, and thus barrier vertices can have an edge
1860 1856 * to a vertex in topology as well a path to it.
1861 1857 */
1862 1858
1863 1859 static void
1864 1860 flk_recompute_dependencies(lock_descriptor_t *request,
1865 1861 lock_descriptor_t **topology,
1866 1862 int nvertex, int update_graph)
1867 1863 {
1868 1864 lock_descriptor_t *vertex, *lock;
1869 1865 graph_t *gp = request->l_graph;
1870 1866 int i, count;
1871 1867 int barrier_found = 0;
1872 1868 edge_t *ep;
1873 1869 lock_descriptor_t *vertex_stack;
1874 1870
1875 1871 STACK_INIT(vertex_stack);
1876 1872
1877 1873 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1878 1874 if (nvertex == 0)
1879 1875 return;
1880 1876 flk_graph_uncolor(request->l_graph);
1881 1877 barrier_found = flk_find_barriers(request);
1882 1878 request->l_state |= RECOMPUTE_DONE;
1883 1879
1884 1880 STACK_PUSH(vertex_stack, request, l_stack);
1885 1881 request->l_sedge = FIRST_IN(request);
1886 1882
1887 1883
1888 1884 while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
1889 1885 if (vertex->l_state & RECOMPUTE_DONE) {
1890 1886 count = 0;
1891 1887 goto next_in_edge;
1892 1888 }
1893 1889 if (IS_BARRIER(vertex)) {
1894 1890 /* decrement the barrier count */
1895 1891 if (vertex->l_index) {
1896 1892 vertex->l_index--;
1897 1893 /* this guy will be pushed again anyway ? */
1898 1894 STACK_POP(vertex_stack, l_stack);
1899 1895 if (vertex->l_index == 0) {
1900 1896 /*
1901 1897 * barrier is over we can recompute
1902 1898 * dependencies for this lock in the
1903 1899 * next stack pop
1904 1900 */
1905 1901 vertex->l_state &= ~BARRIER_LOCK;
1906 1902 }
1907 1903 continue;
1908 1904 }
1909 1905 }
1910 1906 vertex->l_state |= RECOMPUTE_DONE;
1911 1907 flk_graph_uncolor(gp);
1912 1908 count = flk_color_reachables(vertex);
1913 1909 for (i = 0; i < nvertex; i++) {
1914 1910 lock = topology[i];
1915 1911 if (COLORED(lock))
1916 1912 continue;
1917 1913 if (BLOCKS(lock, vertex)) {
1918 1914 (void) flk_add_edge(vertex, lock,
1919 1915 NO_CHECK_CYCLE, update_graph);
1920 1916 COLOR(lock);
1921 1917 count++;
1922 1918 count += flk_color_reachables(lock);
1923 1919 }
1924 1920
1925 1921 }
1926 1922
1927 1923 next_in_edge:
1928 1924 if (count == nvertex ||
1929 1925 vertex->l_sedge == HEAD(vertex)) {
1930 1926 /* prune the tree below this */
1931 1927 STACK_POP(vertex_stack, l_stack);
1932 1928 vertex->l_state &= ~RECOMPUTE_DONE;
1933 1929 /* update the barrier locks below this! */
1934 1930 if (vertex->l_sedge != HEAD(vertex) && barrier_found) {
1935 1931 flk_graph_uncolor(gp);
1936 1932 flk_update_barriers(vertex);
1937 1933 }
1938 1934 continue;
1939 1935 }
1940 1936
1941 1937 ep = vertex->l_sedge;
1942 1938 lock = ep->from_vertex;
1943 1939 STACK_PUSH(vertex_stack, lock, l_stack);
1944 1940 lock->l_sedge = FIRST_IN(lock);
1945 1941 vertex->l_sedge = NEXT_IN(ep);
1946 1942 }
1947 1943
1948 1944 }
1949 1945
1950 1946 /*
1951 1947 * Color all reachable vertices from vertex that belongs to topology (here
1952 1948 * those that have RECOMPUTE_LOCK set in their state) and yet uncolored.
1953 1949 *
1954 1950 * Note: we need to use a different stack_link l_stack1 because this is
1955 1951 * called from flk_recompute_dependencies() that already uses a stack with
1956 1952 * l_stack as stack_link.
1957 1953 */
1958 1954
1959 1955 static int
1960 1956 flk_color_reachables(lock_descriptor_t *vertex)
1961 1957 {
1962 1958 lock_descriptor_t *ver, *lock;
1963 1959 int count;
1964 1960 edge_t *ep;
1965 1961 lock_descriptor_t *vertex_stack;
1966 1962
1967 1963 STACK_INIT(vertex_stack);
1968 1964
1969 1965 STACK_PUSH(vertex_stack, vertex, l_stack1);
1970 1966 count = 0;
1971 1967 while ((ver = STACK_TOP(vertex_stack)) != NULL) {
1972 1968
1973 1969 STACK_POP(vertex_stack, l_stack1);
1974 1970 for (ep = FIRST_ADJ(ver); ep != HEAD(ver);
1975 1971 ep = NEXT_ADJ(ep)) {
1976 1972 lock = ep->to_vertex;
1977 1973 if (COLORED(lock))
1978 1974 continue;
1979 1975 COLOR(lock);
1980 1976 if (IS_RECOMPUTE(lock))
1981 1977 count++;
1982 1978 STACK_PUSH(vertex_stack, lock, l_stack1);
1983 1979 }
1984 1980
1985 1981 }
1986 1982 return (count);
1987 1983 }
1988 1984
1989 1985 /*
1990 1986 * Called from flk_recompute_dependencies() this routine decrements
1991 1987 * the barrier count of barrier vertices that are reachable from lock.
1992 1988 */
1993 1989
1994 1990 static void
1995 1991 flk_update_barriers(lock_descriptor_t *lock)
1996 1992 {
1997 1993 lock_descriptor_t *vertex, *lck;
1998 1994 edge_t *ep;
1999 1995 lock_descriptor_t *vertex_stack;
2000 1996
2001 1997 STACK_INIT(vertex_stack);
2002 1998
2003 1999 STACK_PUSH(vertex_stack, lock, l_stack1);
2004 2000
2005 2001 while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
2006 2002 STACK_POP(vertex_stack, l_stack1);
2007 2003 for (ep = FIRST_IN(vertex); ep != HEAD(vertex);
2008 2004 ep = NEXT_IN(ep)) {
2009 2005 lck = ep->from_vertex;
2010 2006 if (COLORED(lck)) {
2011 2007 if (IS_BARRIER(lck)) {
2012 2008 ASSERT(lck->l_index > 0);
2013 2009 lck->l_index--;
2014 2010 if (lck->l_index == 0)
2015 2011 lck->l_state &= ~BARRIER_LOCK;
2016 2012 }
2017 2013 continue;
2018 2014 }
2019 2015 COLOR(lck);
2020 2016 if (IS_BARRIER(lck)) {
2021 2017 ASSERT(lck->l_index > 0);
2022 2018 lck->l_index--;
2023 2019 if (lck->l_index == 0)
2024 2020 lck->l_state &= ~BARRIER_LOCK;
2025 2021 }
2026 2022 STACK_PUSH(vertex_stack, lck, l_stack1);
2027 2023 }
2028 2024 }
2029 2025 }
2030 2026
2031 2027 /*
2032 2028 * Finds all vertices that are reachable from 'lock' more than once and
2033 2029 * mark them as barrier vertices and increment their barrier count.
2034 2030 * The barrier count is one minus the total number of paths from lock
2035 2031 * to that vertex.
2036 2032 */
2037 2033
2038 2034 static int
2039 2035 flk_find_barriers(lock_descriptor_t *lock)
2040 2036 {
2041 2037 lock_descriptor_t *vertex, *lck;
2042 2038 int found = 0;
2043 2039 edge_t *ep;
2044 2040 lock_descriptor_t *vertex_stack;
2045 2041
2046 2042 STACK_INIT(vertex_stack);
2047 2043
2048 2044 STACK_PUSH(vertex_stack, lock, l_stack1);
2049 2045
2050 2046 while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
2051 2047 STACK_POP(vertex_stack, l_stack1);
2052 2048 for (ep = FIRST_IN(vertex); ep != HEAD(vertex);
2053 2049 ep = NEXT_IN(ep)) {
2054 2050 lck = ep->from_vertex;
2055 2051 if (COLORED(lck)) {
2056 2052 /* this is a barrier */
2057 2053 lck->l_state |= BARRIER_LOCK;
2058 2054 /* index will have barrier count */
2059 2055 lck->l_index++;
2060 2056 if (!found)
2061 2057 found = 1;
2062 2058 continue;
2063 2059 }
2064 2060 COLOR(lck);
2065 2061 lck->l_index = 0;
2066 2062 STACK_PUSH(vertex_stack, lck, l_stack1);
2067 2063 }
2068 2064 }
2069 2065 return (found);
2070 2066 }
2071 2067
2072 2068 /*
2073 2069 * Finds the first lock that is mainly responsible for blocking this
2074 2070 * request. If there is no such lock, request->l_flock.l_type is set to
2075 2071 * F_UNLCK. Otherwise, request->l_flock is filled in with the particulars
2076 2072 * of the blocking lock.
2077 2073 *
2078 2074 * Note: It is possible a request is blocked by a sleeping lock because
2079 2075 * of the fairness policy used in flk_process_request() to construct the
2080 2076 * dependencies. (see comments before flk_process_request()).
2081 2077 */
2082 2078
2083 2079 static void
2084 2080 flk_get_first_blocking_lock(lock_descriptor_t *request)
2085 2081 {
2086 2082 graph_t *gp = request->l_graph;
2087 2083 vnode_t *vp = request->l_vnode;
2088 2084 lock_descriptor_t *lock, *blocker;
2089 2085
2090 2086 ASSERT(MUTEX_HELD(&gp->gp_mutex));
2091 2087 blocker = NULL;
2092 2088 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2093 2089
2094 2090 if (lock) {
2095 2091 do {
2096 2092 if (BLOCKS(lock, request)) {
2097 2093 blocker = lock;
2098 2094 break;
2099 2095 }
2100 2096 lock = lock->l_next;
2101 2097 } while (lock->l_vnode == vp);
2102 2098 }
2103 2099
2104 2100 if (blocker) {
2105 2101 report_blocker(blocker, request);
2106 2102 } else
2107 2103 request->l_flock.l_type = F_UNLCK;
2108 2104 }
2109 2105
2110 2106 /*
2111 2107 * Get the graph_t structure associated with a vnode.
2112 2108 * If 'initialize' is non-zero, and the graph_t structure for this vnode has
2113 2109 * not yet been initialized, then a new element is allocated and returned.
2114 2110 */
2115 2111 graph_t *
2116 2112 flk_get_lock_graph(vnode_t *vp, int initialize)
2117 2113 {
2118 2114 graph_t *gp;
2119 2115 graph_t *gp_alloc = NULL;
2120 2116 int index = HASH_INDEX(vp);
2121 2117
2122 2118 if (initialize == FLK_USE_GRAPH) {
2123 2119 mutex_enter(&flock_lock);
2124 2120 gp = lock_graph[index];
2125 2121 mutex_exit(&flock_lock);
2126 2122 return (gp);
2127 2123 }
2128 2124
2129 2125 ASSERT(initialize == FLK_INIT_GRAPH);
2130 2126
2131 2127 if (lock_graph[index] == NULL) {
2132 2128
2133 2129 gp_alloc = kmem_zalloc(sizeof (graph_t), KM_SLEEP);
2134 2130
2135 2131 /* Initialize the graph */
2136 2132
2137 2133 gp_alloc->active_locks.l_next =
2138 2134 gp_alloc->active_locks.l_prev =
2139 2135 (lock_descriptor_t *)ACTIVE_HEAD(gp_alloc);
2140 2136 gp_alloc->sleeping_locks.l_next =
2141 2137 gp_alloc->sleeping_locks.l_prev =
2142 2138 (lock_descriptor_t *)SLEEPING_HEAD(gp_alloc);
2143 2139 gp_alloc->index = index;
2144 2140 mutex_init(&gp_alloc->gp_mutex, NULL, MUTEX_DEFAULT, NULL);
2145 2141 }
2146 2142
2147 2143 mutex_enter(&flock_lock);
2148 2144
2149 2145 gp = lock_graph[index];
2150 2146
2151 2147 /* Recheck the value within flock_lock */
2152 2148 if (gp == NULL) {
2153 2149 struct flock_globals *fg;
2154 2150
2155 2151 /* We must have previously allocated the graph_t structure */
2156 2152 ASSERT(gp_alloc != NULL);
2157 2153 lock_graph[index] = gp = gp_alloc;
2158 2154 /*
2159 2155 * The lockmgr status is only needed if KLM is loaded.
2160 2156 */
2161 2157 if (flock_zone_key != ZONE_KEY_UNINITIALIZED) {
2162 2158 fg = flk_get_globals();
2163 2159 fg->lockmgr_status[index] = fg->flk_lockmgr_status;
2164 2160 }
2165 2161 }
2166 2162
2167 2163 mutex_exit(&flock_lock);
2168 2164
2169 2165 if ((gp_alloc != NULL) && (gp != gp_alloc)) {
2170 2166 /* There was a race to allocate the graph_t and we lost */
2171 2167 mutex_destroy(&gp_alloc->gp_mutex);
2172 2168 kmem_free(gp_alloc, sizeof (graph_t));
2173 2169 }
2174 2170
2175 2171 return (gp);
2176 2172 }
2177 2173
2178 2174 /*
2179 2175 * PSARC case 1997/292
2180 2176 */
2181 2177 int
2182 2178 cl_flk_has_remote_locks_for_nlmid(vnode_t *vp, int nlmid)
2183 2179 {
2184 2180 lock_descriptor_t *lock;
2185 2181 int result = 0;
2186 2182 graph_t *gp;
2187 2183 int lock_nlmid;
2188 2184
2189 2185 /*
2190 2186 * Check to see if node is booted as a cluster. If not, return.
2191 2187 */
2192 2188 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
2193 2189 return (0);
2194 2190 }
2195 2191
2196 2192 gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
2197 2193 if (gp == NULL) {
2198 2194 return (0);
2199 2195 }
2200 2196
2201 2197 mutex_enter(&gp->gp_mutex);
2202 2198
2203 2199 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2204 2200
2205 2201 if (lock) {
2206 2202 while (lock->l_vnode == vp) {
2207 2203 /* get NLM id from sysid */
2208 2204 lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
2209 2205
2210 2206 /*
2211 2207 * If NLM server request _and_ nlmid of lock matches
2212 2208 * nlmid of argument, then we've found a remote lock.
2213 2209 */
2214 2210 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
2215 2211 result = 1;
2216 2212 goto done;
2217 2213 }
2218 2214 lock = lock->l_next;
2219 2215 }
2220 2216 }
2221 2217
2222 2218 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2223 2219
2224 2220 if (lock) {
2225 2221 while (lock->l_vnode == vp) {
2226 2222 /* get NLM id from sysid */
2227 2223 lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
2228 2224
2229 2225 /*
2230 2226 * If NLM server request _and_ nlmid of lock matches
2231 2227 * nlmid of argument, then we've found a remote lock.
2232 2228 */
2233 2229 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
2234 2230 result = 1;
2235 2231 goto done;
↓ open down ↓ |
1179 lines elided |
↑ open up ↑ |
2236 2232 }
2237 2233 lock = lock->l_next;
2238 2234 }
2239 2235 }
2240 2236
2241 2237 done:
2242 2238 mutex_exit(&gp->gp_mutex);
2243 2239 return (result);
2244 2240 }
2245 2241
2246 -/* ONC_PLUS EXTRACT START */
2247 2242 /*
2248 2243 * Determine whether there are any locks for the given vnode with a remote
2249 2244 * sysid. Returns zero if not, non-zero if there are.
2250 2245 *
2251 2246 * Note that the return value from this function is potentially invalid
2252 2247 * once it has been returned. The caller is responsible for providing its
2253 2248 * own synchronization mechanism to ensure that the return value is useful
2254 2249 * (e.g., see nfs_lockcompletion()).
2255 2250 */
2256 2251 int
2257 2252 flk_has_remote_locks(vnode_t *vp)
2258 2253 {
2259 2254 lock_descriptor_t *lock;
2260 2255 int result = 0;
2261 2256 graph_t *gp;
2262 2257
2263 2258 gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
2264 2259 if (gp == NULL) {
2265 2260 return (0);
2266 2261 }
2267 2262
2268 2263 mutex_enter(&gp->gp_mutex);
2269 2264
2270 2265 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2271 2266
2272 2267 if (lock) {
2273 2268 while (lock->l_vnode == vp) {
2274 2269 if (IS_REMOTE(lock)) {
2275 2270 result = 1;
2276 2271 goto done;
2277 2272 }
2278 2273 lock = lock->l_next;
2279 2274 }
2280 2275 }
2281 2276
2282 2277 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2283 2278
2284 2279 if (lock) {
2285 2280 while (lock->l_vnode == vp) {
2286 2281 if (IS_REMOTE(lock)) {
2287 2282 result = 1;
2288 2283 goto done;
2289 2284 }
2290 2285 lock = lock->l_next;
2291 2286 }
2292 2287 }
2293 2288
2294 2289 done:
2295 2290 mutex_exit(&gp->gp_mutex);
2296 2291 return (result);
2297 2292 }
2298 2293
2299 2294 /*
2300 2295 * Determine if there are any locks owned by the given sysid.
2301 2296 * Returns zero if not, non-zero if there are. Note that this return code
2302 2297 * could be derived from flk_get_{sleeping,active}_locks, but this routine
2303 2298 * avoids all the memory allocations of those routines.
2304 2299 *
2305 2300 * This routine has the same synchronization issues as
2306 2301 * flk_has_remote_locks.
2307 2302 */
2308 2303
2309 2304 int
2310 2305 flk_sysid_has_locks(int sysid, int lck_type)
2311 2306 {
2312 2307 int has_locks = 0;
2313 2308 lock_descriptor_t *lock;
2314 2309 graph_t *gp;
2315 2310 int i;
2316 2311
2317 2312 for (i = 0; i < HASH_SIZE && !has_locks; i++) {
2318 2313 mutex_enter(&flock_lock);
2319 2314 gp = lock_graph[i];
2320 2315 mutex_exit(&flock_lock);
2321 2316 if (gp == NULL) {
2322 2317 continue;
2323 2318 }
2324 2319
2325 2320 mutex_enter(&gp->gp_mutex);
2326 2321
2327 2322 if (lck_type & FLK_QUERY_ACTIVE) {
2328 2323 for (lock = ACTIVE_HEAD(gp)->l_next;
2329 2324 lock != ACTIVE_HEAD(gp) && !has_locks;
2330 2325 lock = lock->l_next) {
2331 2326 if (lock->l_flock.l_sysid == sysid)
2332 2327 has_locks = 1;
2333 2328 }
2334 2329 }
2335 2330
2336 2331 if (lck_type & FLK_QUERY_SLEEPING) {
2337 2332 for (lock = SLEEPING_HEAD(gp)->l_next;
2338 2333 lock != SLEEPING_HEAD(gp) && !has_locks;
2339 2334 lock = lock->l_next) {
2340 2335 if (lock->l_flock.l_sysid == sysid)
2341 2336 has_locks = 1;
2342 2337 }
2343 2338 }
2344 2339 mutex_exit(&gp->gp_mutex);
2345 2340 }
2346 2341
2347 2342 return (has_locks);
2348 2343 }
2349 2344
2350 2345
2351 2346 /*
2352 2347 * PSARC case 1997/292
2353 2348 *
2354 2349 * Requires: "sysid" is a pair [nlmid, sysid]. The lower half is 16-bit
2355 2350 * quantity, the real sysid generated by the NLM server; the upper half
2356 2351 * identifies the node of the cluster where the NLM server ran.
2357 2352 * This routine is only called by an NLM server running in a cluster.
2358 2353 * Effects: Remove all locks held on behalf of the client identified
2359 2354 * by "sysid."
2360 2355 */
2361 2356 void
2362 2357 cl_flk_remove_locks_by_sysid(int sysid)
2363 2358 {
2364 2359 graph_t *gp;
2365 2360 int i;
2366 2361 lock_descriptor_t *lock, *nlock;
2367 2362
2368 2363 /*
2369 2364 * Check to see if node is booted as a cluster. If not, return.
2370 2365 */
2371 2366 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
2372 2367 return;
2373 2368 }
2374 2369
2375 2370 ASSERT(sysid != 0);
2376 2371 for (i = 0; i < HASH_SIZE; i++) {
2377 2372 mutex_enter(&flock_lock);
2378 2373 gp = lock_graph[i];
2379 2374 mutex_exit(&flock_lock);
2380 2375
2381 2376 if (gp == NULL)
2382 2377 continue;
2383 2378
2384 2379 mutex_enter(&gp->gp_mutex); /* get mutex on lock graph */
2385 2380
2386 2381 /* signal sleeping requests so that they bail out */
2387 2382 lock = SLEEPING_HEAD(gp)->l_next;
2388 2383 while (lock != SLEEPING_HEAD(gp)) {
2389 2384 nlock = lock->l_next;
2390 2385 if (lock->l_flock.l_sysid == sysid) {
2391 2386 INTERRUPT_WAKEUP(lock);
2392 2387 }
2393 2388 lock = nlock;
2394 2389 }
2395 2390
2396 2391 /* delete active locks */
2397 2392 lock = ACTIVE_HEAD(gp)->l_next;
2398 2393 while (lock != ACTIVE_HEAD(gp)) {
2399 2394 nlock = lock->l_next;
2400 2395 if (lock->l_flock.l_sysid == sysid) {
2401 2396 flk_delete_active_lock(lock, 0);
2402 2397 flk_wakeup(lock, 1);
2403 2398 flk_free_lock(lock);
2404 2399 }
2405 2400 lock = nlock;
2406 2401 }
2407 2402 mutex_exit(&gp->gp_mutex); /* release mutex on lock graph */
2408 2403 }
2409 2404 }
2410 2405
2411 2406 /*
2412 2407 * Delete all locks in the system that belongs to the sysid of the request.
2413 2408 */
2414 2409
2415 2410 static void
2416 2411 flk_delete_locks_by_sysid(lock_descriptor_t *request)
2417 2412 {
2418 2413 int sysid = request->l_flock.l_sysid;
2419 2414 lock_descriptor_t *lock, *nlock;
2420 2415 graph_t *gp;
2421 2416 int i;
2422 2417
2423 2418 ASSERT(MUTEX_HELD(&request->l_graph->gp_mutex));
2424 2419 ASSERT(sysid != 0);
2425 2420
2426 2421 mutex_exit(&request->l_graph->gp_mutex);
2427 2422
2428 2423 for (i = 0; i < HASH_SIZE; i++) {
2429 2424 mutex_enter(&flock_lock);
2430 2425 gp = lock_graph[i];
2431 2426 mutex_exit(&flock_lock);
2432 2427
2433 2428 if (gp == NULL)
2434 2429 continue;
2435 2430
2436 2431 mutex_enter(&gp->gp_mutex);
2437 2432
2438 2433 /* signal sleeping requests so that they bail out */
2439 2434 lock = SLEEPING_HEAD(gp)->l_next;
2440 2435 while (lock != SLEEPING_HEAD(gp)) {
2441 2436 nlock = lock->l_next;
2442 2437 if (lock->l_flock.l_sysid == sysid) {
2443 2438 INTERRUPT_WAKEUP(lock);
2444 2439 }
2445 2440 lock = nlock;
2446 2441 }
2447 2442
2448 2443 /* delete active locks */
2449 2444 lock = ACTIVE_HEAD(gp)->l_next;
2450 2445 while (lock != ACTIVE_HEAD(gp)) {
2451 2446 nlock = lock->l_next;
2452 2447 if (lock->l_flock.l_sysid == sysid) {
2453 2448 flk_delete_active_lock(lock, 0);
2454 2449 flk_wakeup(lock, 1);
2455 2450 flk_free_lock(lock);
2456 2451 }
2457 2452 lock = nlock;
2458 2453 }
2459 2454 mutex_exit(&gp->gp_mutex);
2460 2455 }
2461 2456
2462 2457 mutex_enter(&request->l_graph->gp_mutex);
2463 2458 }
2464 2459
2465 2460 /*
2466 2461 * Clustering: Deletes PXFS locks
2467 2462 * Effects: Delete all locks on files in the given file system and with the
2468 2463 * given PXFS id.
2469 2464 */
2470 2465 void
2471 2466 cl_flk_delete_pxfs_locks(struct vfs *vfsp, int pxfsid)
2472 2467 {
2473 2468 lock_descriptor_t *lock, *nlock;
2474 2469 graph_t *gp;
2475 2470 int i;
2476 2471
2477 2472 for (i = 0; i < HASH_SIZE; i++) {
2478 2473 mutex_enter(&flock_lock);
2479 2474 gp = lock_graph[i];
2480 2475 mutex_exit(&flock_lock);
2481 2476
2482 2477 if (gp == NULL)
2483 2478 continue;
2484 2479
2485 2480 mutex_enter(&gp->gp_mutex);
2486 2481
2487 2482 /* signal sleeping requests so that they bail out */
2488 2483 lock = SLEEPING_HEAD(gp)->l_next;
2489 2484 while (lock != SLEEPING_HEAD(gp)) {
2490 2485 nlock = lock->l_next;
2491 2486 if (lock->l_vnode->v_vfsp == vfsp) {
2492 2487 ASSERT(IS_PXFS(lock));
2493 2488 if (GETPXFSID(lock->l_flock.l_sysid) ==
2494 2489 pxfsid) {
2495 2490 flk_set_state(lock,
2496 2491 FLK_CANCELLED_STATE);
2497 2492 flk_cancel_sleeping_lock(lock, 1);
2498 2493 }
2499 2494 }
2500 2495 lock = nlock;
2501 2496 }
2502 2497
2503 2498 /* delete active locks */
2504 2499 lock = ACTIVE_HEAD(gp)->l_next;
2505 2500 while (lock != ACTIVE_HEAD(gp)) {
2506 2501 nlock = lock->l_next;
2507 2502 if (lock->l_vnode->v_vfsp == vfsp) {
2508 2503 ASSERT(IS_PXFS(lock));
2509 2504 if (GETPXFSID(lock->l_flock.l_sysid) ==
2510 2505 pxfsid) {
2511 2506 flk_delete_active_lock(lock, 0);
2512 2507 flk_wakeup(lock, 1);
2513 2508 flk_free_lock(lock);
2514 2509 }
2515 2510 }
2516 2511 lock = nlock;
2517 2512 }
2518 2513 mutex_exit(&gp->gp_mutex);
2519 2514 }
2520 2515 }
2521 2516
2522 2517 /*
2523 2518 * Search for a sleeping lock manager lock which matches exactly this lock
2524 2519 * request; if one is found, fake a signal to cancel it.
2525 2520 *
2526 2521 * Return 1 if a matching lock was found, 0 otherwise.
2527 2522 */
2528 2523
2529 2524 static int
2530 2525 flk_canceled(lock_descriptor_t *request)
2531 2526 {
2532 2527 lock_descriptor_t *lock, *nlock;
2533 2528 graph_t *gp = request->l_graph;
2534 2529 vnode_t *vp = request->l_vnode;
2535 2530
2536 2531 ASSERT(MUTEX_HELD(&gp->gp_mutex));
2537 2532 ASSERT(IS_LOCKMGR(request));
2538 2533 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2539 2534
2540 2535 if (lock) {
2541 2536 while (lock->l_vnode == vp) {
2542 2537 nlock = lock->l_next;
2543 2538 if (SAME_OWNER(lock, request) &&
2544 2539 lock->l_start == request->l_start &&
2545 2540 lock->l_end == request->l_end) {
2546 2541 INTERRUPT_WAKEUP(lock);
2547 2542 return (1);
2548 2543 }
2549 2544 lock = nlock;
2550 2545 }
2551 2546 }
2552 2547 return (0);
2553 2548 }
2554 2549
2555 2550 /*
2556 2551 * Remove all the locks for the vnode belonging to the given pid and sysid.
2557 2552 */
2558 2553
2559 2554 void
2560 2555 cleanlocks(vnode_t *vp, pid_t pid, int sysid)
2561 2556 {
2562 2557 graph_t *gp;
2563 2558 lock_descriptor_t *lock, *nlock;
2564 2559 lock_descriptor_t *link_stack;
2565 2560
2566 2561 STACK_INIT(link_stack);
2567 2562
2568 2563 gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
2569 2564
2570 2565 if (gp == NULL)
2571 2566 return;
2572 2567 mutex_enter(&gp->gp_mutex);
2573 2568
2574 2569 CHECK_SLEEPING_LOCKS(gp);
2575 2570 CHECK_ACTIVE_LOCKS(gp);
2576 2571
2577 2572 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2578 2573
2579 2574 if (lock) {
2580 2575 do {
2581 2576 nlock = lock->l_next;
2582 2577 if ((lock->l_flock.l_pid == pid ||
2583 2578 pid == IGN_PID) &&
2584 2579 lock->l_flock.l_sysid == sysid) {
2585 2580 CANCEL_WAKEUP(lock);
2586 2581 }
2587 2582 lock = nlock;
2588 2583 } while (lock->l_vnode == vp);
2589 2584 }
2590 2585
2591 2586 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2592 2587
2593 2588 if (lock) {
2594 2589 do {
2595 2590 nlock = lock->l_next;
2596 2591 if ((lock->l_flock.l_pid == pid ||
2597 2592 pid == IGN_PID) &&
2598 2593 lock->l_flock.l_sysid == sysid) {
2599 2594 flk_delete_active_lock(lock, 0);
2600 2595 STACK_PUSH(link_stack, lock, l_stack);
2601 2596 }
2602 2597 lock = nlock;
2603 2598 } while (lock->l_vnode == vp);
2604 2599 }
2605 2600
2606 2601 while ((lock = STACK_TOP(link_stack)) != NULL) {
↓ open down ↓ |
350 lines elided |
↑ open up ↑ |
2607 2602 STACK_POP(link_stack, l_stack);
2608 2603 flk_wakeup(lock, 1);
2609 2604 flk_free_lock(lock);
2610 2605 }
2611 2606
2612 2607 CHECK_SLEEPING_LOCKS(gp);
2613 2608 CHECK_ACTIVE_LOCKS(gp);
2614 2609 CHECK_OWNER_LOCKS(gp, pid, sysid, vp);
2615 2610 mutex_exit(&gp->gp_mutex);
2616 2611 }
2617 -/* ONC_PLUS EXTRACT END */
2618 2612
2619 2613
2620 2614 /*
2621 2615 * Called from 'fs' read and write routines for files that have mandatory
2622 2616 * locking enabled.
2623 2617 */
2624 2618
2625 2619 int
2626 2620 chklock(
2627 2621 struct vnode *vp,
2628 2622 int iomode,
2629 2623 u_offset_t offset,
2630 2624 ssize_t len,
2631 2625 int fmode,
2632 2626 caller_context_t *ct)
2633 2627 {
2634 2628 register int i;
2635 2629 struct flock64 bf;
2636 2630 int error = 0;
2637 2631
2638 2632 bf.l_type = (iomode & FWRITE) ? F_WRLCK : F_RDLCK;
2639 2633 bf.l_whence = 0;
2640 2634 bf.l_start = offset;
2641 2635 bf.l_len = len;
2642 2636 if (ct == NULL) {
2643 2637 bf.l_pid = curproc->p_pid;
2644 2638 bf.l_sysid = 0;
2645 2639 } else {
↓ open down ↓ |
18 lines elided |
↑ open up ↑ |
2646 2640 bf.l_pid = ct->cc_pid;
2647 2641 bf.l_sysid = ct->cc_sysid;
2648 2642 }
2649 2643 i = (fmode & (FNDELAY|FNONBLOCK)) ? INOFLCK : INOFLCK|SLPFLCK;
2650 2644 if ((i = reclock(vp, &bf, i, 0, offset, NULL)) != 0 ||
2651 2645 bf.l_type != F_UNLCK)
2652 2646 error = i ? i : EAGAIN;
2653 2647 return (error);
2654 2648 }
2655 2649
2656 -/* ONC_PLUS EXTRACT START */
2657 2650 /*
2658 2651 * convoff - converts the given data (start, whence) to the
2659 2652 * given whence.
2660 2653 */
2661 2654 int
2662 2655 convoff(vp, lckdat, whence, offset)
2663 2656 struct vnode *vp;
2664 2657 struct flock64 *lckdat;
2665 2658 int whence;
2666 2659 offset_t offset;
2667 2660 {
2668 2661 int error;
2669 2662 struct vattr vattr;
2670 2663
2671 2664 if ((lckdat->l_whence == 2) || (whence == 2)) {
2672 2665 vattr.va_mask = AT_SIZE;
2673 2666 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
2674 2667 return (error);
2675 2668 }
2676 2669
2677 2670 switch (lckdat->l_whence) {
2678 2671 case 1:
2679 2672 lckdat->l_start += offset;
2680 2673 break;
2681 2674 case 2:
2682 2675 lckdat->l_start += vattr.va_size;
2683 2676 /* FALLTHRU */
2684 2677 case 0:
2685 2678 break;
2686 2679 default:
2687 2680 return (EINVAL);
2688 2681 }
2689 2682
2690 2683 if (lckdat->l_start < 0)
2691 2684 return (EINVAL);
2692 2685
2693 2686 switch (whence) {
2694 2687 case 1:
2695 2688 lckdat->l_start -= offset;
2696 2689 break;
2697 2690 case 2:
2698 2691 lckdat->l_start -= vattr.va_size;
↓ open down ↓ |
32 lines elided |
↑ open up ↑ |
2699 2692 /* FALLTHRU */
2700 2693 case 0:
2701 2694 break;
2702 2695 default:
2703 2696 return (EINVAL);
2704 2697 }
2705 2698
2706 2699 lckdat->l_whence = (short)whence;
2707 2700 return (0);
2708 2701 }
2709 -/* ONC_PLUS EXTRACT END */
2710 2702
2711 2703
2712 2704 /* proc_graph function definitions */
2713 2705
2714 2706 /*
2715 2707 * Function checks for deadlock due to the new 'lock'. If deadlock found
2716 2708 * edges of this lock are freed and returned.
2717 2709 */
2718 2710
2719 2711 static int
2720 2712 flk_check_deadlock(lock_descriptor_t *lock)
2721 2713 {
2722 2714 proc_vertex_t *start_vertex, *pvertex;
2723 2715 proc_vertex_t *dvertex;
2724 2716 proc_edge_t *pep, *ppep;
2725 2717 edge_t *ep, *nep;
2726 2718 proc_vertex_t *process_stack;
2727 2719
2728 2720 STACK_INIT(process_stack);
2729 2721
2730 2722 mutex_enter(&flock_lock);
2731 2723 start_vertex = flk_get_proc_vertex(lock);
2732 2724 ASSERT(start_vertex != NULL);
2733 2725
2734 2726 /* construct the edges from this process to other processes */
2735 2727
2736 2728 ep = FIRST_ADJ(lock);
2737 2729 while (ep != HEAD(lock)) {
2738 2730 proc_vertex_t *adj_proc;
2739 2731
2740 2732 adj_proc = flk_get_proc_vertex(ep->to_vertex);
2741 2733 for (pep = start_vertex->edge; pep != NULL; pep = pep->next) {
2742 2734 if (pep->to_proc == adj_proc) {
2743 2735 ASSERT(pep->refcount);
2744 2736 pep->refcount++;
2745 2737 break;
2746 2738 }
2747 2739 }
2748 2740 if (pep == NULL) {
2749 2741 pep = flk_get_proc_edge();
2750 2742 pep->to_proc = adj_proc;
2751 2743 pep->refcount = 1;
2752 2744 adj_proc->incount++;
2753 2745 pep->next = start_vertex->edge;
2754 2746 start_vertex->edge = pep;
2755 2747 }
2756 2748 ep = NEXT_ADJ(ep);
2757 2749 }
2758 2750
2759 2751 ep = FIRST_IN(lock);
2760 2752
2761 2753 while (ep != HEAD(lock)) {
2762 2754 proc_vertex_t *in_proc;
2763 2755
2764 2756 in_proc = flk_get_proc_vertex(ep->from_vertex);
2765 2757
2766 2758 for (pep = in_proc->edge; pep != NULL; pep = pep->next) {
2767 2759 if (pep->to_proc == start_vertex) {
2768 2760 ASSERT(pep->refcount);
2769 2761 pep->refcount++;
2770 2762 break;
2771 2763 }
2772 2764 }
2773 2765 if (pep == NULL) {
2774 2766 pep = flk_get_proc_edge();
2775 2767 pep->to_proc = start_vertex;
2776 2768 pep->refcount = 1;
2777 2769 start_vertex->incount++;
2778 2770 pep->next = in_proc->edge;
2779 2771 in_proc->edge = pep;
2780 2772 }
2781 2773 ep = NEXT_IN(ep);
2782 2774 }
2783 2775
2784 2776 if (start_vertex->incount == 0) {
2785 2777 mutex_exit(&flock_lock);
2786 2778 return (0);
2787 2779 }
2788 2780
2789 2781 flk_proc_graph_uncolor();
2790 2782
2791 2783 start_vertex->p_sedge = start_vertex->edge;
2792 2784
2793 2785 STACK_PUSH(process_stack, start_vertex, p_stack);
2794 2786
2795 2787 while ((pvertex = STACK_TOP(process_stack)) != NULL) {
2796 2788 for (pep = pvertex->p_sedge; pep != NULL; pep = pep->next) {
2797 2789 dvertex = pep->to_proc;
2798 2790 if (!PROC_ARRIVED(dvertex)) {
2799 2791 STACK_PUSH(process_stack, dvertex, p_stack);
2800 2792 dvertex->p_sedge = dvertex->edge;
2801 2793 PROC_ARRIVE(pvertex);
2802 2794 pvertex->p_sedge = pep->next;
2803 2795 break;
2804 2796 }
2805 2797 if (!PROC_DEPARTED(dvertex))
2806 2798 goto deadlock;
2807 2799 }
2808 2800 if (pep == NULL) {
2809 2801 PROC_DEPART(pvertex);
2810 2802 STACK_POP(process_stack, p_stack);
2811 2803 }
2812 2804 }
2813 2805 mutex_exit(&flock_lock);
2814 2806 return (0);
2815 2807
2816 2808 deadlock:
2817 2809
2818 2810 /* we remove all lock edges and proc edges */
2819 2811
2820 2812 ep = FIRST_ADJ(lock);
2821 2813 while (ep != HEAD(lock)) {
2822 2814 proc_vertex_t *adj_proc;
2823 2815 adj_proc = flk_get_proc_vertex(ep->to_vertex);
2824 2816 nep = NEXT_ADJ(ep);
2825 2817 IN_LIST_REMOVE(ep);
2826 2818 ADJ_LIST_REMOVE(ep);
2827 2819 flk_free_edge(ep);
2828 2820 ppep = start_vertex->edge;
2829 2821 for (pep = start_vertex->edge; pep != NULL; ppep = pep,
2830 2822 pep = ppep->next) {
2831 2823 if (pep->to_proc == adj_proc) {
2832 2824 pep->refcount--;
2833 2825 if (pep->refcount == 0) {
2834 2826 if (pep == ppep) {
2835 2827 start_vertex->edge = pep->next;
2836 2828 } else {
2837 2829 ppep->next = pep->next;
2838 2830 }
2839 2831 adj_proc->incount--;
2840 2832 flk_proc_release(adj_proc);
2841 2833 flk_free_proc_edge(pep);
2842 2834 }
2843 2835 break;
2844 2836 }
2845 2837 }
2846 2838 ep = nep;
2847 2839 }
2848 2840 ep = FIRST_IN(lock);
2849 2841 while (ep != HEAD(lock)) {
2850 2842 proc_vertex_t *in_proc;
2851 2843 in_proc = flk_get_proc_vertex(ep->from_vertex);
2852 2844 nep = NEXT_IN(ep);
2853 2845 IN_LIST_REMOVE(ep);
2854 2846 ADJ_LIST_REMOVE(ep);
2855 2847 flk_free_edge(ep);
2856 2848 ppep = in_proc->edge;
2857 2849 for (pep = in_proc->edge; pep != NULL; ppep = pep,
2858 2850 pep = ppep->next) {
2859 2851 if (pep->to_proc == start_vertex) {
2860 2852 pep->refcount--;
2861 2853 if (pep->refcount == 0) {
2862 2854 if (pep == ppep) {
2863 2855 in_proc->edge = pep->next;
2864 2856 } else {
2865 2857 ppep->next = pep->next;
2866 2858 }
2867 2859 start_vertex->incount--;
2868 2860 flk_proc_release(in_proc);
2869 2861 flk_free_proc_edge(pep);
2870 2862 }
2871 2863 break;
2872 2864 }
2873 2865 }
2874 2866 ep = nep;
2875 2867 }
2876 2868 flk_proc_release(start_vertex);
2877 2869 mutex_exit(&flock_lock);
2878 2870 return (1);
2879 2871 }
2880 2872
2881 2873 /*
2882 2874 * Get a proc vertex. If lock's pvertex value gets a correct proc vertex
2883 2875 * from the list we return that, otherwise we allocate one. If necessary,
2884 2876 * we grow the list of vertices also.
2885 2877 */
2886 2878
2887 2879 static proc_vertex_t *
2888 2880 flk_get_proc_vertex(lock_descriptor_t *lock)
2889 2881 {
2890 2882 int i;
2891 2883 proc_vertex_t *pv;
2892 2884 proc_vertex_t **palloc;
2893 2885
2894 2886 ASSERT(MUTEX_HELD(&flock_lock));
2895 2887 if (lock->pvertex != -1) {
2896 2888 ASSERT(lock->pvertex >= 0);
2897 2889 pv = pgraph.proc[lock->pvertex];
2898 2890 if (pv != NULL && PROC_SAME_OWNER(lock, pv)) {
2899 2891 return (pv);
2900 2892 }
2901 2893 }
2902 2894 for (i = 0; i < pgraph.gcount; i++) {
2903 2895 pv = pgraph.proc[i];
2904 2896 if (pv != NULL && PROC_SAME_OWNER(lock, pv)) {
2905 2897 lock->pvertex = pv->index = i;
2906 2898 return (pv);
2907 2899 }
2908 2900 }
2909 2901 pv = kmem_zalloc(sizeof (struct proc_vertex), KM_SLEEP);
2910 2902 pv->pid = lock->l_flock.l_pid;
2911 2903 pv->sysid = lock->l_flock.l_sysid;
2912 2904 flk_proc_vertex_allocs++;
2913 2905 if (pgraph.free != 0) {
2914 2906 for (i = 0; i < pgraph.gcount; i++) {
2915 2907 if (pgraph.proc[i] == NULL) {
2916 2908 pgraph.proc[i] = pv;
2917 2909 lock->pvertex = pv->index = i;
2918 2910 pgraph.free--;
2919 2911 return (pv);
2920 2912 }
2921 2913 }
2922 2914 }
2923 2915 palloc = kmem_zalloc((pgraph.gcount + PROC_CHUNK) *
2924 2916 sizeof (proc_vertex_t *), KM_SLEEP);
2925 2917
2926 2918 if (pgraph.proc) {
2927 2919 bcopy(pgraph.proc, palloc,
2928 2920 pgraph.gcount * sizeof (proc_vertex_t *));
2929 2921
2930 2922 kmem_free(pgraph.proc,
2931 2923 pgraph.gcount * sizeof (proc_vertex_t *));
2932 2924 }
2933 2925 pgraph.proc = palloc;
2934 2926 pgraph.free += (PROC_CHUNK - 1);
2935 2927 pv->index = lock->pvertex = pgraph.gcount;
2936 2928 pgraph.gcount += PROC_CHUNK;
2937 2929 pgraph.proc[pv->index] = pv;
2938 2930 return (pv);
2939 2931 }
2940 2932
2941 2933 /*
2942 2934 * Allocate a proc edge.
2943 2935 */
2944 2936
2945 2937 static proc_edge_t *
2946 2938 flk_get_proc_edge()
2947 2939 {
2948 2940 proc_edge_t *pep;
2949 2941
2950 2942 pep = kmem_zalloc(sizeof (proc_edge_t), KM_SLEEP);
2951 2943 flk_proc_edge_allocs++;
2952 2944 return (pep);
2953 2945 }
2954 2946
2955 2947 /*
2956 2948 * Free the proc edge. Called whenever its reference count goes to zero.
2957 2949 */
2958 2950
2959 2951 static void
2960 2952 flk_free_proc_edge(proc_edge_t *pep)
2961 2953 {
2962 2954 ASSERT(pep->refcount == 0);
2963 2955 kmem_free((void *)pep, sizeof (proc_edge_t));
2964 2956 flk_proc_edge_frees++;
2965 2957 }
2966 2958
2967 2959 /*
2968 2960 * Color the graph explicitly done only when the mark value hits max value.
2969 2961 */
2970 2962
2971 2963 static void
2972 2964 flk_proc_graph_uncolor()
2973 2965 {
2974 2966 int i;
2975 2967
2976 2968 if (pgraph.mark == UINT_MAX) {
2977 2969 for (i = 0; i < pgraph.gcount; i++)
2978 2970 if (pgraph.proc[i] != NULL) {
2979 2971 pgraph.proc[i]->atime = 0;
2980 2972 pgraph.proc[i]->dtime = 0;
2981 2973 }
2982 2974 pgraph.mark = 1;
2983 2975 } else {
2984 2976 pgraph.mark++;
2985 2977 }
2986 2978 }
2987 2979
2988 2980 /*
2989 2981 * Release the proc vertex iff both there are no in edges and out edges
2990 2982 */
2991 2983
2992 2984 static void
2993 2985 flk_proc_release(proc_vertex_t *proc)
2994 2986 {
2995 2987 ASSERT(MUTEX_HELD(&flock_lock));
2996 2988 if (proc->edge == NULL && proc->incount == 0) {
2997 2989 pgraph.proc[proc->index] = NULL;
2998 2990 pgraph.free++;
2999 2991 kmem_free(proc, sizeof (proc_vertex_t));
3000 2992 flk_proc_vertex_frees++;
3001 2993 }
3002 2994 }
3003 2995
3004 2996 /*
3005 2997 * Updates process graph to reflect change in a lock_graph.
3006 2998 * Note: We should call this function only after we have a correctly
3007 2999 * recomputed lock graph. Otherwise we might miss a deadlock detection.
3008 3000 * eg: in function flk_relation() we call this function after flk_recompute_
3009 3001 * dependencies() otherwise if a process tries to lock a vnode hashed
3010 3002 * into another graph it might sleep for ever.
3011 3003 */
3012 3004
3013 3005 static void
3014 3006 flk_update_proc_graph(edge_t *ep, int delete)
3015 3007 {
3016 3008 proc_vertex_t *toproc, *fromproc;
3017 3009 proc_edge_t *pep, *prevpep;
3018 3010
3019 3011 mutex_enter(&flock_lock);
3020 3012 toproc = flk_get_proc_vertex(ep->to_vertex);
3021 3013 fromproc = flk_get_proc_vertex(ep->from_vertex);
3022 3014
3023 3015 if (!delete)
3024 3016 goto add;
3025 3017 pep = prevpep = fromproc->edge;
3026 3018
3027 3019 ASSERT(pep != NULL);
3028 3020 while (pep != NULL) {
3029 3021 if (pep->to_proc == toproc) {
3030 3022 ASSERT(pep->refcount > 0);
3031 3023 pep->refcount--;
3032 3024 if (pep->refcount == 0) {
3033 3025 if (pep == prevpep) {
3034 3026 fromproc->edge = pep->next;
3035 3027 } else {
3036 3028 prevpep->next = pep->next;
3037 3029 }
3038 3030 toproc->incount--;
3039 3031 flk_proc_release(toproc);
3040 3032 flk_free_proc_edge(pep);
3041 3033 }
3042 3034 break;
3043 3035 }
3044 3036 prevpep = pep;
3045 3037 pep = pep->next;
3046 3038 }
3047 3039 flk_proc_release(fromproc);
3048 3040 mutex_exit(&flock_lock);
3049 3041 return;
3050 3042 add:
3051 3043
3052 3044 pep = fromproc->edge;
3053 3045
3054 3046 while (pep != NULL) {
3055 3047 if (pep->to_proc == toproc) {
3056 3048 ASSERT(pep->refcount > 0);
3057 3049 pep->refcount++;
3058 3050 break;
3059 3051 }
3060 3052 pep = pep->next;
3061 3053 }
3062 3054 if (pep == NULL) {
↓ open down ↓ |
343 lines elided |
↑ open up ↑ |
3063 3055 pep = flk_get_proc_edge();
3064 3056 pep->to_proc = toproc;
3065 3057 pep->refcount = 1;
3066 3058 toproc->incount++;
3067 3059 pep->next = fromproc->edge;
3068 3060 fromproc->edge = pep;
3069 3061 }
3070 3062 mutex_exit(&flock_lock);
3071 3063 }
3072 3064
3073 -/* ONC_PLUS EXTRACT START */
3074 3065 /*
3075 3066 * Set the control status for lock manager requests.
3076 3067 *
3077 3068 */
3078 3069
3079 3070 /*
3080 3071 * PSARC case 1997/292
3081 3072 *
3082 3073 * Requires: "nlmid" must be >= 1 and <= clconf_maximum_nodeid().
3083 3074 * Effects: Set the state of the NLM server identified by "nlmid"
3084 3075 * in the NLM registry to state "nlm_state."
3085 3076 * Raises exception no_such_nlm if "nlmid" doesn't identify a known
3086 3077 * NLM server to this LLM.
3087 3078 * Note that when this routine is called with NLM_SHUTTING_DOWN there
3088 3079 * may be locks requests that have gotten started but not finished. In
3089 3080 * particular, there may be blocking requests that are in the callback code
3090 3081 * before sleeping (so they're not holding the lock for the graph). If
3091 3082 * such a thread reacquires the graph's lock (to go to sleep) after
3092 3083 * NLM state in the NLM registry is set to a non-up value,
3093 3084 * it will notice the status and bail out. If the request gets
3094 3085 * granted before the thread can check the NLM registry, let it
3095 3086 * continue normally. It will get flushed when we are called with NLM_DOWN.
3096 3087 *
3097 3088 * Modifies: nlm_reg_obj (global)
3098 3089 * Arguments:
3099 3090 * nlmid (IN): id uniquely identifying an NLM server
3100 3091 * nlm_state (IN): NLM server state to change "nlmid" to
3101 3092 */
3102 3093 void
3103 3094 cl_flk_set_nlm_status(int nlmid, flk_nlm_status_t nlm_state)
3104 3095 {
3105 3096 /*
3106 3097 * Check to see if node is booted as a cluster. If not, return.
3107 3098 */
3108 3099 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
3109 3100 return;
3110 3101 }
3111 3102
3112 3103 /*
3113 3104 * Check for development/debugging. It is possible to boot a node
3114 3105 * in non-cluster mode, and then run a special script, currently
3115 3106 * available only to developers, to bring up the node as part of a
3116 3107 * cluster. The problem is that running such a script does not
3117 3108 * result in the routine flk_init() being called and hence global array
3118 3109 * nlm_reg_status is NULL. The NLM thinks it's in cluster mode,
3119 3110 * but the LLM needs to do an additional check to see if the global
3120 3111 * array has been created or not. If nlm_reg_status is NULL, then
3121 3112 * return, else continue.
3122 3113 */
3123 3114 if (nlm_reg_status == NULL) {
3124 3115 return;
3125 3116 }
3126 3117
3127 3118 ASSERT(nlmid <= nlm_status_size && nlmid >= 0);
3128 3119 mutex_enter(&nlm_reg_lock);
3129 3120
3130 3121 if (FLK_REGISTRY_IS_NLM_UNKNOWN(nlm_reg_status, nlmid)) {
3131 3122 /*
3132 3123 * If the NLM server "nlmid" is unknown in the NLM registry,
3133 3124 * add it to the registry in the nlm shutting down state.
3134 3125 */
3135 3126 FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid,
3136 3127 FLK_NLM_SHUTTING_DOWN);
3137 3128 } else {
3138 3129 /*
3139 3130 * Change the state of the NLM server identified by "nlmid"
3140 3131 * in the NLM registry to the argument "nlm_state."
3141 3132 */
3142 3133 FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid,
3143 3134 nlm_state);
3144 3135 }
3145 3136
3146 3137 /*
3147 3138 * The reason we must register the NLM server that is shutting down
3148 3139 * with an LLM that doesn't already know about it (never sent a lock
3149 3140 * request) is to handle correctly a race between shutdown and a new
3150 3141 * lock request. Suppose that a shutdown request from the NLM server
3151 3142 * invokes this routine at the LLM, and a thread is spawned to
3152 3143 * service the request. Now suppose a new lock request is in
3153 3144 * progress and has already passed the first line of defense in
3154 3145 * reclock(), which denies new locks requests from NLM servers
3155 3146 * that are not in the NLM_UP state. After the current routine
3156 3147 * is invoked for both phases of shutdown, the routine will return,
3157 3148 * having done nothing, and the lock request will proceed and
3158 3149 * probably be granted. The problem is that the shutdown was ignored
3159 3150 * by the lock request because there was no record of that NLM server
3160 3151 * shutting down. We will be in the peculiar position of thinking
3161 3152 * that we've shutdown the NLM server and all locks at all LLMs have
3162 3153 * been discarded, but in fact there's still one lock held.
3163 3154 * The solution is to record the existence of NLM server and change
3164 3155 * its state immediately to NLM_SHUTTING_DOWN. The lock request in
3165 3156 * progress may proceed because the next phase NLM_DOWN will catch
3166 3157 * this lock and discard it.
3167 3158 */
3168 3159 mutex_exit(&nlm_reg_lock);
3169 3160
3170 3161 switch (nlm_state) {
3171 3162 case FLK_NLM_UP:
3172 3163 /*
3173 3164 * Change the NLM state of all locks still held on behalf of
3174 3165 * the NLM server identified by "nlmid" to NLM_UP.
3175 3166 */
3176 3167 cl_flk_change_nlm_state_all_locks(nlmid, FLK_NLM_UP);
3177 3168 break;
3178 3169
3179 3170 case FLK_NLM_SHUTTING_DOWN:
3180 3171 /*
3181 3172 * Wake up all sleeping locks for the NLM server identified
3182 3173 * by "nlmid." Note that eventually all woken threads will
3183 3174 * have their lock requests cancelled and descriptors
3184 3175 * removed from the sleeping lock list. Note that the NLM
3185 3176 * server state associated with each lock descriptor is
3186 3177 * changed to FLK_NLM_SHUTTING_DOWN.
3187 3178 */
3188 3179 cl_flk_wakeup_sleeping_nlm_locks(nlmid);
3189 3180 break;
3190 3181
3191 3182 case FLK_NLM_DOWN:
3192 3183 /*
3193 3184 * Discard all active, granted locks for this NLM server
3194 3185 * identified by "nlmid."
3195 3186 */
3196 3187 cl_flk_unlock_nlm_granted(nlmid);
3197 3188 break;
3198 3189
3199 3190 default:
3200 3191 panic("cl_set_nlm_status: bad status (%d)", nlm_state);
3201 3192 }
3202 3193 }
3203 3194
3204 3195 /*
3205 3196 * Set the control status for lock manager requests.
3206 3197 *
3207 3198 * Note that when this routine is called with FLK_WAKEUP_SLEEPERS, there
3208 3199 * may be locks requests that have gotten started but not finished. In
3209 3200 * particular, there may be blocking requests that are in the callback code
3210 3201 * before sleeping (so they're not holding the lock for the graph). If
3211 3202 * such a thread reacquires the graph's lock (to go to sleep) after
3212 3203 * flk_lockmgr_status is set to a non-up value, it will notice the status
3213 3204 * and bail out. If the request gets granted before the thread can check
3214 3205 * flk_lockmgr_status, let it continue normally. It will get flushed when
3215 3206 * we are called with FLK_LOCKMGR_DOWN.
3216 3207 */
3217 3208
3218 3209 void
3219 3210 flk_set_lockmgr_status(flk_lockmgr_status_t status)
3220 3211 {
3221 3212 int i;
3222 3213 graph_t *gp;
3223 3214 struct flock_globals *fg;
3224 3215
3225 3216 fg = flk_get_globals();
3226 3217 ASSERT(fg != NULL);
3227 3218
3228 3219 mutex_enter(&flock_lock);
3229 3220 fg->flk_lockmgr_status = status;
3230 3221 mutex_exit(&flock_lock);
3231 3222
3232 3223 /*
3233 3224 * If the lock manager is coming back up, all that's needed is to
3234 3225 * propagate this information to the graphs. If the lock manager
3235 3226 * is going down, additional action is required, and each graph's
3236 3227 * copy of the state is updated atomically with this other action.
3237 3228 */
3238 3229 switch (status) {
3239 3230 case FLK_LOCKMGR_UP:
3240 3231 for (i = 0; i < HASH_SIZE; i++) {
3241 3232 mutex_enter(&flock_lock);
3242 3233 gp = lock_graph[i];
3243 3234 mutex_exit(&flock_lock);
3244 3235 if (gp == NULL)
3245 3236 continue;
3246 3237 mutex_enter(&gp->gp_mutex);
3247 3238 fg->lockmgr_status[i] = status;
3248 3239 mutex_exit(&gp->gp_mutex);
3249 3240 }
3250 3241 break;
3251 3242 case FLK_WAKEUP_SLEEPERS:
3252 3243 wakeup_sleeping_lockmgr_locks(fg);
3253 3244 break;
3254 3245 case FLK_LOCKMGR_DOWN:
3255 3246 unlock_lockmgr_granted(fg);
3256 3247 break;
3257 3248 default:
3258 3249 panic("flk_set_lockmgr_status: bad status (%d)", status);
3259 3250 break;
3260 3251 }
3261 3252 }
3262 3253
3263 3254 /*
3264 3255 * This routine returns all the locks that are active or sleeping and are
3265 3256 * associated with a particular set of identifiers. If lock_state != 0, then
3266 3257 * only locks that match the lock_state are returned. If lock_state == 0, then
3267 3258 * all locks are returned. If pid == NOPID, the pid is ignored. If
3268 3259 * use_sysid is FALSE, then the sysid is ignored. If vp is NULL, then the
3269 3260 * vnode pointer is ignored.
3270 3261 *
3271 3262 * A list containing the vnode pointer and an flock structure
3272 3263 * describing the lock is returned. Each element in the list is
3273 3264 * dynamically allocated and must be freed by the caller. The
3274 3265 * last item in the list is denoted by a NULL value in the ll_next
3275 3266 * field.
3276 3267 *
3277 3268 * The vnode pointers returned are held. The caller is responsible
3278 3269 * for releasing these. Note that the returned list is only a snapshot of
3279 3270 * the current lock information, and that it is a snapshot of a moving
3280 3271 * target (only one graph is locked at a time).
3281 3272 */
3282 3273
3283 3274 locklist_t *
3284 3275 get_lock_list(int list_type, int lock_state, int sysid, boolean_t use_sysid,
3285 3276 pid_t pid, const vnode_t *vp, zoneid_t zoneid)
3286 3277 {
3287 3278 lock_descriptor_t *lock;
3288 3279 lock_descriptor_t *graph_head;
3289 3280 locklist_t listhead;
3290 3281 locklist_t *llheadp;
3291 3282 locklist_t *llp;
3292 3283 locklist_t *lltp;
3293 3284 graph_t *gp;
3294 3285 int i;
3295 3286 int first_index; /* graph index */
3296 3287 int num_indexes; /* graph index */
3297 3288
3298 3289 ASSERT((list_type == FLK_ACTIVE_STATE) ||
3299 3290 (list_type == FLK_SLEEPING_STATE));
3300 3291
3301 3292 /*
3302 3293 * Get a pointer to something to use as a list head while building
3303 3294 * the rest of the list.
3304 3295 */
3305 3296 llheadp = &listhead;
3306 3297 lltp = llheadp;
3307 3298 llheadp->ll_next = (locklist_t *)NULL;
3308 3299
3309 3300 /* Figure out which graphs we want to look at. */
3310 3301 if (vp == NULL) {
3311 3302 first_index = 0;
3312 3303 num_indexes = HASH_SIZE;
3313 3304 } else {
3314 3305 first_index = HASH_INDEX(vp);
3315 3306 num_indexes = 1;
3316 3307 }
3317 3308
3318 3309 for (i = first_index; i < first_index + num_indexes; i++) {
3319 3310 mutex_enter(&flock_lock);
3320 3311 gp = lock_graph[i];
3321 3312 mutex_exit(&flock_lock);
3322 3313 if (gp == NULL) {
3323 3314 continue;
3324 3315 }
3325 3316
3326 3317 mutex_enter(&gp->gp_mutex);
3327 3318 graph_head = (list_type == FLK_ACTIVE_STATE) ?
3328 3319 ACTIVE_HEAD(gp) : SLEEPING_HEAD(gp);
3329 3320 for (lock = graph_head->l_next;
3330 3321 lock != graph_head;
3331 3322 lock = lock->l_next) {
3332 3323 if (use_sysid && lock->l_flock.l_sysid != sysid)
3333 3324 continue;
3334 3325 if (pid != NOPID && lock->l_flock.l_pid != pid)
3335 3326 continue;
3336 3327 if (vp != NULL && lock->l_vnode != vp)
3337 3328 continue;
3338 3329 if (lock_state && !(lock_state & lock->l_state))
3339 3330 continue;
3340 3331 if (zoneid != lock->l_zoneid && zoneid != ALL_ZONES)
3341 3332 continue;
3342 3333 /*
3343 3334 * A matching lock was found. Allocate
3344 3335 * space for a new locklist entry and fill
3345 3336 * it in.
3346 3337 */
3347 3338 llp = kmem_alloc(sizeof (locklist_t), KM_SLEEP);
3348 3339 lltp->ll_next = llp;
3349 3340 VN_HOLD(lock->l_vnode);
3350 3341 llp->ll_vp = lock->l_vnode;
3351 3342 create_flock(lock, &(llp->ll_flock));
3352 3343 llp->ll_next = (locklist_t *)NULL;
3353 3344 lltp = llp;
3354 3345 }
3355 3346 mutex_exit(&gp->gp_mutex);
3356 3347 }
3357 3348
3358 3349 llp = llheadp->ll_next;
3359 3350 return (llp);
3360 3351 }
3361 3352
3362 3353 /*
3363 3354 * These two functions are simply interfaces to get_lock_list. They return
3364 3355 * a list of sleeping or active locks for the given sysid and pid. See
3365 3356 * get_lock_list for details.
3366 3357 *
3367 3358 * In either case we don't particularly care to specify the zone of interest;
3368 3359 * the sysid-space is global across zones, so the sysid will map to exactly one
3369 3360 * zone, and we'll return information for that zone.
3370 3361 */
3371 3362
3372 3363 locklist_t *
3373 3364 flk_get_sleeping_locks(int sysid, pid_t pid)
3374 3365 {
3375 3366 return (get_lock_list(FLK_SLEEPING_STATE, 0, sysid, B_TRUE, pid, NULL,
3376 3367 ALL_ZONES));
3377 3368 }
3378 3369
3379 3370 locklist_t *
3380 3371 flk_get_active_locks(int sysid, pid_t pid)
3381 3372 {
3382 3373 return (get_lock_list(FLK_ACTIVE_STATE, 0, sysid, B_TRUE, pid, NULL,
3383 3374 ALL_ZONES));
3384 3375 }
3385 3376
3386 3377 /*
3387 3378 * Another interface to get_lock_list. This one returns all the active
3388 3379 * locks for a given vnode. Again, see get_lock_list for details.
3389 3380 *
3390 3381 * We don't need to specify which zone's locks we're interested in. The matter
3391 3382 * would only be interesting if the vnode belonged to NFS, and NFS vnodes can't
3392 3383 * be used by multiple zones, so the list of locks will all be from the right
3393 3384 * zone.
3394 3385 */
3395 3386
3396 3387 locklist_t *
3397 3388 flk_active_locks_for_vp(const vnode_t *vp)
3398 3389 {
3399 3390 return (get_lock_list(FLK_ACTIVE_STATE, 0, 0, B_FALSE, NOPID, vp,
3400 3391 ALL_ZONES));
3401 3392 }
3402 3393
3403 3394 /*
3404 3395 * Another interface to get_lock_list. This one returns all the active
3405 3396 * nbmand locks for a given vnode. Again, see get_lock_list for details.
3406 3397 *
3407 3398 * See the comment for flk_active_locks_for_vp() for why we don't care to
3408 3399 * specify the particular zone of interest.
3409 3400 */
3410 3401 locklist_t *
3411 3402 flk_active_nbmand_locks_for_vp(const vnode_t *vp)
3412 3403 {
3413 3404 return (get_lock_list(FLK_ACTIVE_STATE, NBMAND_LOCK, 0, B_FALSE,
3414 3405 NOPID, vp, ALL_ZONES));
3415 3406 }
3416 3407
3417 3408 /*
3418 3409 * Another interface to get_lock_list. This one returns all the active
3419 3410 * nbmand locks for a given pid. Again, see get_lock_list for details.
3420 3411 *
3421 3412 * The zone doesn't need to be specified here; the locks held by a
3422 3413 * particular process will either be local (ie, non-NFS) or from the zone
3423 3414 * the process is executing in. This is because other parts of the system
3424 3415 * ensure that an NFS vnode can't be used in a zone other than that in
3425 3416 * which it was opened.
3426 3417 */
3427 3418 locklist_t *
3428 3419 flk_active_nbmand_locks(pid_t pid)
3429 3420 {
3430 3421 return (get_lock_list(FLK_ACTIVE_STATE, NBMAND_LOCK, 0, B_FALSE,
3431 3422 pid, NULL, ALL_ZONES));
3432 3423 }
3433 3424
3434 3425 /*
3435 3426 * Free up all entries in the locklist.
3436 3427 */
3437 3428 void
3438 3429 flk_free_locklist(locklist_t *llp)
3439 3430 {
3440 3431 locklist_t *next_llp;
3441 3432
3442 3433 while (llp) {
3443 3434 next_llp = llp->ll_next;
3444 3435 VN_RELE(llp->ll_vp);
3445 3436 kmem_free(llp, sizeof (*llp));
3446 3437 llp = next_llp;
3447 3438 }
3448 3439 }
3449 3440
3450 3441 static void
3451 3442 cl_flk_change_nlm_state_all_locks(int nlmid, flk_nlm_status_t nlm_state)
3452 3443 {
3453 3444 /*
3454 3445 * For each graph "lg" in the hash table lock_graph do
3455 3446 * a. Get the list of sleeping locks
3456 3447 * b. For each lock descriptor in the list do
3457 3448 * i. If the requested lock is an NLM server request AND
3458 3449 * the nlmid is the same as the routine argument then
3459 3450 * change the lock descriptor's state field to
3460 3451 * "nlm_state."
3461 3452 * c. Get the list of active locks
3462 3453 * d. For each lock descriptor in the list do
3463 3454 * i. If the requested lock is an NLM server request AND
3464 3455 * the nlmid is the same as the routine argument then
3465 3456 * change the lock descriptor's state field to
3466 3457 * "nlm_state."
3467 3458 */
3468 3459
3469 3460 int i;
3470 3461 graph_t *gp; /* lock graph */
3471 3462 lock_descriptor_t *lock; /* lock */
3472 3463 lock_descriptor_t *nlock = NULL; /* next lock */
3473 3464 int lock_nlmid;
3474 3465
3475 3466 for (i = 0; i < HASH_SIZE; i++) {
3476 3467 mutex_enter(&flock_lock);
3477 3468 gp = lock_graph[i];
3478 3469 mutex_exit(&flock_lock);
3479 3470 if (gp == NULL) {
3480 3471 continue;
3481 3472 }
3482 3473
3483 3474 /* Get list of sleeping locks in current lock graph. */
3484 3475 mutex_enter(&gp->gp_mutex);
3485 3476 for (lock = SLEEPING_HEAD(gp)->l_next;
3486 3477 lock != SLEEPING_HEAD(gp);
3487 3478 lock = nlock) {
3488 3479 nlock = lock->l_next;
3489 3480 /* get NLM id */
3490 3481 lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
3491 3482
3492 3483 /*
3493 3484 * If NLM server request AND nlmid of lock matches
3494 3485 * nlmid of argument, then set the NLM state of the
3495 3486 * lock to "nlm_state."
3496 3487 */
3497 3488 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
3498 3489 SET_NLM_STATE(lock, nlm_state);
3499 3490 }
3500 3491 }
3501 3492
3502 3493 /* Get list of active locks in current lock graph. */
3503 3494 for (lock = ACTIVE_HEAD(gp)->l_next;
3504 3495 lock != ACTIVE_HEAD(gp);
3505 3496 lock = nlock) {
3506 3497 nlock = lock->l_next;
3507 3498 /* get NLM id */
3508 3499 lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
3509 3500
3510 3501 /*
3511 3502 * If NLM server request AND nlmid of lock matches
3512 3503 * nlmid of argument, then set the NLM state of the
3513 3504 * lock to "nlm_state."
3514 3505 */
3515 3506 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
3516 3507 ASSERT(IS_ACTIVE(lock));
3517 3508 SET_NLM_STATE(lock, nlm_state);
3518 3509 }
3519 3510 }
3520 3511 mutex_exit(&gp->gp_mutex);
3521 3512 }
3522 3513 }
3523 3514
3524 3515 /*
3525 3516 * Requires: "nlmid" >= 1 and <= clconf_maximum_nodeid().
3526 3517 * Effects: Find all sleeping lock manager requests _only_ for the NLM server
3527 3518 * identified by "nlmid." Poke those lock requests.
3528 3519 */
3529 3520 static void
3530 3521 cl_flk_wakeup_sleeping_nlm_locks(int nlmid)
3531 3522 {
3532 3523 lock_descriptor_t *lock;
3533 3524 lock_descriptor_t *nlock = NULL; /* next lock */
3534 3525 int i;
3535 3526 graph_t *gp;
3536 3527 int lock_nlmid;
3537 3528
3538 3529 for (i = 0; i < HASH_SIZE; i++) {
3539 3530 mutex_enter(&flock_lock);
3540 3531 gp = lock_graph[i];
3541 3532 mutex_exit(&flock_lock);
3542 3533 if (gp == NULL) {
3543 3534 continue;
3544 3535 }
3545 3536
3546 3537 mutex_enter(&gp->gp_mutex);
3547 3538 for (lock = SLEEPING_HEAD(gp)->l_next;
3548 3539 lock != SLEEPING_HEAD(gp);
3549 3540 lock = nlock) {
3550 3541 nlock = lock->l_next;
3551 3542 /*
3552 3543 * If NLM server request _and_ nlmid of lock matches
3553 3544 * nlmid of argument, then set the NLM state of the
3554 3545 * lock to NLM_SHUTTING_DOWN, and wake up sleeping
3555 3546 * request.
3556 3547 */
3557 3548 if (IS_LOCKMGR(lock)) {
3558 3549 /* get NLM id */
3559 3550 lock_nlmid =
3560 3551 GETNLMID(lock->l_flock.l_sysid);
3561 3552 if (nlmid == lock_nlmid) {
3562 3553 SET_NLM_STATE(lock,
3563 3554 FLK_NLM_SHUTTING_DOWN);
3564 3555 INTERRUPT_WAKEUP(lock);
3565 3556 }
3566 3557 }
3567 3558 }
3568 3559 mutex_exit(&gp->gp_mutex);
3569 3560 }
3570 3561 }
3571 3562
3572 3563 /*
3573 3564 * Requires: "nlmid" >= 1 and <= clconf_maximum_nodeid()
3574 3565 * Effects: Find all active (granted) lock manager locks _only_ for the
3575 3566 * NLM server identified by "nlmid" and release them.
3576 3567 */
3577 3568 static void
3578 3569 cl_flk_unlock_nlm_granted(int nlmid)
3579 3570 {
3580 3571 lock_descriptor_t *lock;
3581 3572 lock_descriptor_t *nlock = NULL; /* next lock */
3582 3573 int i;
3583 3574 graph_t *gp;
3584 3575 int lock_nlmid;
3585 3576
3586 3577 for (i = 0; i < HASH_SIZE; i++) {
3587 3578 mutex_enter(&flock_lock);
3588 3579 gp = lock_graph[i];
3589 3580 mutex_exit(&flock_lock);
3590 3581 if (gp == NULL) {
3591 3582 continue;
3592 3583 }
3593 3584
3594 3585 mutex_enter(&gp->gp_mutex);
3595 3586 for (lock = ACTIVE_HEAD(gp)->l_next;
3596 3587 lock != ACTIVE_HEAD(gp);
3597 3588 lock = nlock) {
3598 3589 nlock = lock->l_next;
3599 3590 ASSERT(IS_ACTIVE(lock));
3600 3591
3601 3592 /*
3602 3593 * If it's an NLM server request _and_ nlmid of
3603 3594 * the lock matches nlmid of argument, then
3604 3595 * remove the active lock the list, wakup blocked
3605 3596 * threads, and free the storage for the lock.
3606 3597 * Note that there's no need to mark the NLM state
3607 3598 * of this lock to NLM_DOWN because the lock will
3608 3599 * be deleted anyway and its storage freed.
3609 3600 */
3610 3601 if (IS_LOCKMGR(lock)) {
3611 3602 /* get NLM id */
3612 3603 lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
3613 3604 if (nlmid == lock_nlmid) {
3614 3605 flk_delete_active_lock(lock, 0);
3615 3606 flk_wakeup(lock, 1);
3616 3607 flk_free_lock(lock);
3617 3608 }
3618 3609 }
3619 3610 }
3620 3611 mutex_exit(&gp->gp_mutex);
3621 3612 }
3622 3613 }
3623 3614
3624 3615 /*
3625 3616 * Find all sleeping lock manager requests and poke them.
3626 3617 */
3627 3618 static void
3628 3619 wakeup_sleeping_lockmgr_locks(struct flock_globals *fg)
3629 3620 {
3630 3621 lock_descriptor_t *lock;
3631 3622 lock_descriptor_t *nlock = NULL; /* next lock */
3632 3623 int i;
3633 3624 graph_t *gp;
3634 3625 zoneid_t zoneid = getzoneid();
3635 3626
3636 3627 for (i = 0; i < HASH_SIZE; i++) {
3637 3628 mutex_enter(&flock_lock);
3638 3629 gp = lock_graph[i];
3639 3630 mutex_exit(&flock_lock);
3640 3631 if (gp == NULL) {
3641 3632 continue;
3642 3633 }
3643 3634
3644 3635 mutex_enter(&gp->gp_mutex);
3645 3636 fg->lockmgr_status[i] = FLK_WAKEUP_SLEEPERS;
3646 3637 for (lock = SLEEPING_HEAD(gp)->l_next;
3647 3638 lock != SLEEPING_HEAD(gp);
3648 3639 lock = nlock) {
3649 3640 nlock = lock->l_next;
3650 3641 if (IS_LOCKMGR(lock) && lock->l_zoneid == zoneid) {
3651 3642 INTERRUPT_WAKEUP(lock);
3652 3643 }
3653 3644 }
3654 3645 mutex_exit(&gp->gp_mutex);
3655 3646 }
3656 3647 }
3657 3648
3658 3649
3659 3650 /*
3660 3651 * Find all active (granted) lock manager locks and release them.
3661 3652 */
3662 3653 static void
3663 3654 unlock_lockmgr_granted(struct flock_globals *fg)
3664 3655 {
3665 3656 lock_descriptor_t *lock;
3666 3657 lock_descriptor_t *nlock = NULL; /* next lock */
3667 3658 int i;
3668 3659 graph_t *gp;
3669 3660 zoneid_t zoneid = getzoneid();
3670 3661
3671 3662 for (i = 0; i < HASH_SIZE; i++) {
3672 3663 mutex_enter(&flock_lock);
3673 3664 gp = lock_graph[i];
3674 3665 mutex_exit(&flock_lock);
3675 3666 if (gp == NULL) {
3676 3667 continue;
3677 3668 }
3678 3669
3679 3670 mutex_enter(&gp->gp_mutex);
3680 3671 fg->lockmgr_status[i] = FLK_LOCKMGR_DOWN;
3681 3672 for (lock = ACTIVE_HEAD(gp)->l_next;
3682 3673 lock != ACTIVE_HEAD(gp);
3683 3674 lock = nlock) {
3684 3675 nlock = lock->l_next;
↓ open down ↓ |
601 lines elided |
↑ open up ↑ |
3685 3676 if (IS_LOCKMGR(lock) && lock->l_zoneid == zoneid) {
3686 3677 ASSERT(IS_ACTIVE(lock));
3687 3678 flk_delete_active_lock(lock, 0);
3688 3679 flk_wakeup(lock, 1);
3689 3680 flk_free_lock(lock);
3690 3681 }
3691 3682 }
3692 3683 mutex_exit(&gp->gp_mutex);
3693 3684 }
3694 3685 }
3695 -/* ONC_PLUS EXTRACT END */
3696 3686
3697 3687
3698 3688 /*
3699 3689 * Wait until a lock is granted, cancelled, or interrupted.
3700 3690 */
3701 3691
3702 3692 static void
3703 3693 wait_for_lock(lock_descriptor_t *request)
3704 3694 {
3705 3695 graph_t *gp = request->l_graph;
3706 3696
3707 3697 ASSERT(MUTEX_HELD(&gp->gp_mutex));
↓ open down ↓ |
2 lines elided |
↑ open up ↑ |
3708 3698
3709 3699 while (!(IS_GRANTED(request)) && !(IS_CANCELLED(request)) &&
3710 3700 !(IS_INTERRUPTED(request))) {
3711 3701 if (!cv_wait_sig(&request->l_cv, &gp->gp_mutex)) {
3712 3702 flk_set_state(request, FLK_INTERRUPTED_STATE);
3713 3703 request->l_state |= INTERRUPTED_LOCK;
3714 3704 }
3715 3705 }
3716 3706 }
3717 3707
3718 -/* ONC_PLUS EXTRACT START */
3719 3708 /*
3720 3709 * Create an flock structure from the existing lock information
3721 3710 *
3722 3711 * This routine is used to create flock structures for the lock manager
3723 3712 * to use in a reclaim request. Since the lock was originated on this
3724 3713 * host, it must be conforming to UNIX semantics, so no checking is
3725 3714 * done to make sure it falls within the lower half of the 32-bit range.
3726 3715 */
3727 3716
3728 3717 static void
3729 3718 create_flock(lock_descriptor_t *lp, flock64_t *flp)
3730 3719 {
3731 3720 ASSERT(lp->l_end == MAX_U_OFFSET_T || lp->l_end <= MAXEND);
3732 3721 ASSERT(lp->l_end >= lp->l_start);
3733 3722
3734 3723 flp->l_type = lp->l_type;
3735 3724 flp->l_whence = 0;
3736 3725 flp->l_start = lp->l_start;
3737 3726 flp->l_len = (lp->l_end == MAX_U_OFFSET_T) ? 0 :
3738 3727 (lp->l_end - lp->l_start + 1);
3739 3728 flp->l_sysid = lp->l_flock.l_sysid;
3740 3729 flp->l_pid = lp->l_flock.l_pid;
3741 3730 }
3742 3731
3743 3732 /*
3744 3733 * Convert flock_t data describing a lock range into unsigned long starting
3745 3734 * and ending points, which are put into lock_request. Returns 0 or an
3746 3735 * errno value.
3747 3736 * Large Files: max is passed by the caller and we return EOVERFLOW
3748 3737 * as defined by LFS API.
3749 3738 */
3750 3739
3751 3740 int
3752 3741 flk_convert_lock_data(vnode_t *vp, flock64_t *flp,
3753 3742 u_offset_t *start, u_offset_t *end, offset_t offset)
3754 3743 {
3755 3744 struct vattr vattr;
3756 3745 int error;
3757 3746
3758 3747 /*
3759 3748 * Determine the starting point of the request
3760 3749 */
3761 3750 switch (flp->l_whence) {
3762 3751 case 0: /* SEEK_SET */
3763 3752 *start = (u_offset_t)flp->l_start;
3764 3753 break;
3765 3754 case 1: /* SEEK_CUR */
3766 3755 *start = (u_offset_t)(flp->l_start + offset);
3767 3756 break;
3768 3757 case 2: /* SEEK_END */
3769 3758 vattr.va_mask = AT_SIZE;
3770 3759 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
3771 3760 return (error);
3772 3761 *start = (u_offset_t)(flp->l_start + vattr.va_size);
3773 3762 break;
3774 3763 default:
3775 3764 return (EINVAL);
3776 3765 }
3777 3766
3778 3767 /*
3779 3768 * Determine the range covered by the request.
3780 3769 */
3781 3770 if (flp->l_len == 0)
3782 3771 *end = MAX_U_OFFSET_T;
3783 3772 else if ((offset_t)flp->l_len > 0) {
3784 3773 *end = (u_offset_t)(*start + (flp->l_len - 1));
3785 3774 } else {
3786 3775 /*
3787 3776 * Negative length; why do we even allow this ?
3788 3777 * Because this allows easy specification of
3789 3778 * the last n bytes of the file.
3790 3779 */
3791 3780 *end = *start;
3792 3781 *start += (u_offset_t)flp->l_len;
3793 3782 (*start)++;
3794 3783 }
3795 3784 return (0);
3796 3785 }
3797 3786
3798 3787 /*
3799 3788 * Check the validity of lock data. This can used by the NFS
3800 3789 * frlock routines to check data before contacting the server. The
3801 3790 * server must support semantics that aren't as restrictive as
3802 3791 * the UNIX API, so the NFS client is required to check.
3803 3792 * The maximum is now passed in by the caller.
3804 3793 */
3805 3794
3806 3795 int
3807 3796 flk_check_lock_data(u_offset_t start, u_offset_t end, offset_t max)
3808 3797 {
3809 3798 /*
3810 3799 * The end (length) for local locking should never be greater
3811 3800 * than MAXEND. However, the representation for
3812 3801 * the entire file is MAX_U_OFFSET_T.
3813 3802 */
3814 3803 if ((start > max) ||
3815 3804 ((end > max) && (end != MAX_U_OFFSET_T))) {
3816 3805 return (EINVAL);
3817 3806 }
3818 3807 if (start > end) {
3819 3808 return (EINVAL);
3820 3809 }
3821 3810 return (0);
3822 3811 }
3823 3812
3824 3813 /*
3825 3814 * Fill in request->l_flock with information about the lock blocking the
3826 3815 * request. The complexity here is that lock manager requests are allowed
3827 3816 * to see into the upper part of the 32-bit address range, whereas local
3828 3817 * requests are only allowed to see signed values.
3829 3818 *
3830 3819 * What should be done when "blocker" is a lock manager lock that uses the
3831 3820 * upper portion of the 32-bit range, but "request" is local? Since the
3832 3821 * request has already been determined to have been blocked by the blocker,
3833 3822 * at least some portion of "blocker" must be in the range of the request,
3834 3823 * or the request extends to the end of file. For the first case, the
3835 3824 * portion in the lower range is returned with the indication that it goes
3836 3825 * "to EOF." For the second case, the last byte of the lower range is
3837 3826 * returned with the indication that it goes "to EOF."
3838 3827 */
3839 3828
3840 3829 static void
3841 3830 report_blocker(lock_descriptor_t *blocker, lock_descriptor_t *request)
3842 3831 {
3843 3832 flock64_t *flrp; /* l_flock portion of request */
3844 3833
3845 3834 ASSERT(blocker != NULL);
3846 3835
3847 3836 flrp = &request->l_flock;
3848 3837 flrp->l_whence = 0;
3849 3838 flrp->l_type = blocker->l_type;
3850 3839 flrp->l_pid = blocker->l_flock.l_pid;
3851 3840 flrp->l_sysid = blocker->l_flock.l_sysid;
3852 3841
3853 3842 if (IS_LOCKMGR(request)) {
3854 3843 flrp->l_start = blocker->l_start;
3855 3844 if (blocker->l_end == MAX_U_OFFSET_T)
3856 3845 flrp->l_len = 0;
3857 3846 else
3858 3847 flrp->l_len = blocker->l_end - blocker->l_start + 1;
3859 3848 } else {
3860 3849 if (blocker->l_start > MAXEND) {
3861 3850 flrp->l_start = MAXEND;
3862 3851 flrp->l_len = 0;
↓ open down ↓ |
134 lines elided |
↑ open up ↑ |
3863 3852 } else {
3864 3853 flrp->l_start = blocker->l_start;
3865 3854 if (blocker->l_end == MAX_U_OFFSET_T)
3866 3855 flrp->l_len = 0;
3867 3856 else
3868 3857 flrp->l_len = blocker->l_end -
3869 3858 blocker->l_start + 1;
3870 3859 }
3871 3860 }
3872 3861 }
3873 -/* ONC_PLUS EXTRACT END */
3874 3862
3875 3863 /*
3876 3864 * PSARC case 1997/292
3877 3865 */
3878 3866 /*
3879 3867 * This is the public routine exported by flock.h.
3880 3868 */
3881 3869 void
3882 3870 cl_flk_change_nlm_state_to_unknown(int nlmid)
3883 3871 {
3884 3872 /*
3885 3873 * Check to see if node is booted as a cluster. If not, return.
3886 3874 */
3887 3875 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
3888 3876 return;
3889 3877 }
3890 3878
3891 3879 /*
3892 3880 * See comment in cl_flk_set_nlm_status().
3893 3881 */
3894 3882 if (nlm_reg_status == NULL) {
3895 3883 return;
3896 3884 }
3897 3885
3898 3886 /*
3899 3887 * protect NLM registry state with a mutex.
3900 3888 */
3901 3889 ASSERT(nlmid <= nlm_status_size && nlmid >= 0);
3902 3890 mutex_enter(&nlm_reg_lock);
3903 3891 FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid, FLK_NLM_UNKNOWN);
3904 3892 mutex_exit(&nlm_reg_lock);
3905 3893 }
3906 3894
3907 3895 /*
3908 3896 * Return non-zero if the given I/O request conflicts with an active NBMAND
3909 3897 * lock.
3910 3898 * If svmand is non-zero, it means look at all active locks, not just NBMAND
3911 3899 * locks.
3912 3900 */
3913 3901
3914 3902 int
3915 3903 nbl_lock_conflict(vnode_t *vp, nbl_op_t op, u_offset_t offset,
3916 3904 ssize_t length, int svmand, caller_context_t *ct)
3917 3905 {
3918 3906 int conflict = 0;
3919 3907 graph_t *gp;
3920 3908 lock_descriptor_t *lock;
3921 3909 pid_t pid;
3922 3910 int sysid;
3923 3911
3924 3912 if (ct == NULL) {
3925 3913 pid = curproc->p_pid;
3926 3914 sysid = 0;
3927 3915 } else {
3928 3916 pid = ct->cc_pid;
3929 3917 sysid = ct->cc_sysid;
3930 3918 }
3931 3919
3932 3920 mutex_enter(&flock_lock);
3933 3921 gp = lock_graph[HASH_INDEX(vp)];
3934 3922 mutex_exit(&flock_lock);
3935 3923 if (gp == NULL)
3936 3924 return (0);
3937 3925
3938 3926 mutex_enter(&gp->gp_mutex);
3939 3927 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
3940 3928
3941 3929 for (; lock && lock->l_vnode == vp; lock = lock->l_next) {
3942 3930 if ((svmand || (lock->l_state & NBMAND_LOCK)) &&
3943 3931 (lock->l_flock.l_sysid != sysid ||
3944 3932 lock->l_flock.l_pid != pid) &&
3945 3933 lock_blocks_io(op, offset, length,
3946 3934 lock->l_type, lock->l_start, lock->l_end)) {
3947 3935 conflict = 1;
3948 3936 break;
3949 3937 }
3950 3938 }
3951 3939 mutex_exit(&gp->gp_mutex);
3952 3940
3953 3941 return (conflict);
3954 3942 }
3955 3943
3956 3944 /*
3957 3945 * Return non-zero if the given I/O request conflicts with the given lock.
3958 3946 */
3959 3947
3960 3948 static int
3961 3949 lock_blocks_io(nbl_op_t op, u_offset_t offset, ssize_t length,
3962 3950 int lock_type, u_offset_t lock_start, u_offset_t lock_end)
3963 3951 {
3964 3952 ASSERT(op == NBL_READ || op == NBL_WRITE || op == NBL_READWRITE);
3965 3953 ASSERT(lock_type == F_RDLCK || lock_type == F_WRLCK);
3966 3954
3967 3955 if (op == NBL_READ && lock_type == F_RDLCK)
3968 3956 return (0);
3969 3957
3970 3958 if (offset <= lock_start && lock_start < offset + length)
3971 3959 return (1);
3972 3960 if (lock_start <= offset && offset <= lock_end)
3973 3961 return (1);
3974 3962
3975 3963 return (0);
3976 3964 }
3977 3965
3978 3966 #ifdef DEBUG
3979 3967 static void
3980 3968 check_active_locks(graph_t *gp)
3981 3969 {
3982 3970 lock_descriptor_t *lock, *lock1;
3983 3971 edge_t *ep;
3984 3972
3985 3973 for (lock = ACTIVE_HEAD(gp)->l_next; lock != ACTIVE_HEAD(gp);
3986 3974 lock = lock->l_next) {
3987 3975 ASSERT(IS_ACTIVE(lock));
3988 3976 ASSERT(NOT_BLOCKED(lock));
3989 3977 ASSERT(!IS_BARRIER(lock));
3990 3978
3991 3979 ep = FIRST_IN(lock);
3992 3980
3993 3981 while (ep != HEAD(lock)) {
3994 3982 ASSERT(IS_SLEEPING(ep->from_vertex));
3995 3983 ASSERT(!NOT_BLOCKED(ep->from_vertex));
3996 3984 ep = NEXT_IN(ep);
3997 3985 }
3998 3986
3999 3987 for (lock1 = lock->l_next; lock1 != ACTIVE_HEAD(gp);
4000 3988 lock1 = lock1->l_next) {
4001 3989 if (lock1->l_vnode == lock->l_vnode) {
4002 3990 if (BLOCKS(lock1, lock)) {
4003 3991 cmn_err(CE_PANIC,
4004 3992 "active lock %p blocks %p",
4005 3993 (void *)lock1, (void *)lock);
4006 3994 } else if (BLOCKS(lock, lock1)) {
4007 3995 cmn_err(CE_PANIC,
4008 3996 "active lock %p blocks %p",
4009 3997 (void *)lock, (void *)lock1);
4010 3998 }
4011 3999 }
4012 4000 }
4013 4001 }
4014 4002 }
4015 4003
4016 4004 /*
4017 4005 * Effect: This functions checks to see if the transition from 'old_state' to
4018 4006 * 'new_state' is a valid one. It returns 0 if the transition is valid
4019 4007 * and 1 if it is not.
4020 4008 * For a map of valid transitions, see sys/flock_impl.h
4021 4009 */
4022 4010 static int
4023 4011 check_lock_transition(int old_state, int new_state)
4024 4012 {
4025 4013 switch (old_state) {
4026 4014 case FLK_INITIAL_STATE:
4027 4015 if ((new_state == FLK_START_STATE) ||
4028 4016 (new_state == FLK_SLEEPING_STATE) ||
4029 4017 (new_state == FLK_ACTIVE_STATE) ||
4030 4018 (new_state == FLK_DEAD_STATE)) {
4031 4019 return (0);
4032 4020 } else {
4033 4021 return (1);
4034 4022 }
4035 4023 case FLK_START_STATE:
4036 4024 if ((new_state == FLK_ACTIVE_STATE) ||
4037 4025 (new_state == FLK_DEAD_STATE)) {
4038 4026 return (0);
4039 4027 } else {
4040 4028 return (1);
4041 4029 }
4042 4030 case FLK_ACTIVE_STATE:
4043 4031 if (new_state == FLK_DEAD_STATE) {
4044 4032 return (0);
4045 4033 } else {
4046 4034 return (1);
4047 4035 }
4048 4036 case FLK_SLEEPING_STATE:
4049 4037 if ((new_state == FLK_GRANTED_STATE) ||
4050 4038 (new_state == FLK_INTERRUPTED_STATE) ||
4051 4039 (new_state == FLK_CANCELLED_STATE)) {
4052 4040 return (0);
4053 4041 } else {
4054 4042 return (1);
4055 4043 }
4056 4044 case FLK_GRANTED_STATE:
4057 4045 if ((new_state == FLK_START_STATE) ||
4058 4046 (new_state == FLK_INTERRUPTED_STATE) ||
4059 4047 (new_state == FLK_CANCELLED_STATE)) {
4060 4048 return (0);
4061 4049 } else {
4062 4050 return (1);
4063 4051 }
4064 4052 case FLK_CANCELLED_STATE:
4065 4053 if ((new_state == FLK_INTERRUPTED_STATE) ||
4066 4054 (new_state == FLK_DEAD_STATE)) {
4067 4055 return (0);
4068 4056 } else {
4069 4057 return (1);
4070 4058 }
4071 4059 case FLK_INTERRUPTED_STATE:
4072 4060 if (new_state == FLK_DEAD_STATE) {
4073 4061 return (0);
4074 4062 } else {
4075 4063 return (1);
4076 4064 }
4077 4065 case FLK_DEAD_STATE:
4078 4066 /* May be set more than once */
4079 4067 if (new_state == FLK_DEAD_STATE) {
4080 4068 return (0);
4081 4069 } else {
4082 4070 return (1);
4083 4071 }
4084 4072 default:
4085 4073 return (1);
4086 4074 }
4087 4075 }
4088 4076
4089 4077 static void
4090 4078 check_sleeping_locks(graph_t *gp)
4091 4079 {
4092 4080 lock_descriptor_t *lock1, *lock2;
4093 4081 edge_t *ep;
4094 4082 for (lock1 = SLEEPING_HEAD(gp)->l_next; lock1 != SLEEPING_HEAD(gp);
4095 4083 lock1 = lock1->l_next) {
4096 4084 ASSERT(!IS_BARRIER(lock1));
4097 4085 for (lock2 = lock1->l_next; lock2 != SLEEPING_HEAD(gp);
4098 4086 lock2 = lock2->l_next) {
4099 4087 if (lock1->l_vnode == lock2->l_vnode) {
4100 4088 if (BLOCKS(lock2, lock1)) {
4101 4089 ASSERT(!IS_GRANTED(lock1));
4102 4090 ASSERT(!NOT_BLOCKED(lock1));
4103 4091 path(lock1, lock2);
4104 4092 }
4105 4093 }
4106 4094 }
4107 4095
4108 4096 for (lock2 = ACTIVE_HEAD(gp)->l_next; lock2 != ACTIVE_HEAD(gp);
4109 4097 lock2 = lock2->l_next) {
4110 4098 ASSERT(!IS_BARRIER(lock1));
4111 4099 if (lock1->l_vnode == lock2->l_vnode) {
4112 4100 if (BLOCKS(lock2, lock1)) {
4113 4101 ASSERT(!IS_GRANTED(lock1));
4114 4102 ASSERT(!NOT_BLOCKED(lock1));
4115 4103 path(lock1, lock2);
4116 4104 }
4117 4105 }
4118 4106 }
4119 4107 ep = FIRST_ADJ(lock1);
4120 4108 while (ep != HEAD(lock1)) {
4121 4109 ASSERT(BLOCKS(ep->to_vertex, lock1));
4122 4110 ep = NEXT_ADJ(ep);
4123 4111 }
4124 4112 }
4125 4113 }
4126 4114
4127 4115 static int
4128 4116 level_two_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2, int no_path)
4129 4117 {
4130 4118 edge_t *ep;
4131 4119 lock_descriptor_t *vertex;
4132 4120 lock_descriptor_t *vertex_stack;
4133 4121
4134 4122 STACK_INIT(vertex_stack);
4135 4123
4136 4124 flk_graph_uncolor(lock1->l_graph);
4137 4125 ep = FIRST_ADJ(lock1);
4138 4126 ASSERT(ep != HEAD(lock1));
4139 4127 while (ep != HEAD(lock1)) {
4140 4128 if (no_path)
4141 4129 ASSERT(ep->to_vertex != lock2);
4142 4130 STACK_PUSH(vertex_stack, ep->to_vertex, l_dstack);
4143 4131 COLOR(ep->to_vertex);
4144 4132 ep = NEXT_ADJ(ep);
4145 4133 }
4146 4134
4147 4135 while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
4148 4136 STACK_POP(vertex_stack, l_dstack);
4149 4137 for (ep = FIRST_ADJ(vertex); ep != HEAD(vertex);
4150 4138 ep = NEXT_ADJ(ep)) {
4151 4139 if (COLORED(ep->to_vertex))
4152 4140 continue;
4153 4141 COLOR(ep->to_vertex);
4154 4142 if (ep->to_vertex == lock2)
4155 4143 return (1);
4156 4144
4157 4145 STACK_PUSH(vertex_stack, ep->to_vertex, l_dstack);
4158 4146 }
4159 4147 }
4160 4148 return (0);
4161 4149 }
4162 4150
4163 4151 static void
4164 4152 check_owner_locks(graph_t *gp, pid_t pid, int sysid, vnode_t *vp)
4165 4153 {
4166 4154 lock_descriptor_t *lock;
4167 4155
4168 4156 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
4169 4157
4170 4158 if (lock) {
4171 4159 while (lock != ACTIVE_HEAD(gp) && (lock->l_vnode == vp)) {
4172 4160 if (lock->l_flock.l_pid == pid &&
4173 4161 lock->l_flock.l_sysid == sysid)
4174 4162 cmn_err(CE_PANIC,
4175 4163 "owner pid %d's lock %p in active queue",
4176 4164 pid, (void *)lock);
4177 4165 lock = lock->l_next;
4178 4166 }
4179 4167 }
4180 4168 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
4181 4169
4182 4170 if (lock) {
4183 4171 while (lock != SLEEPING_HEAD(gp) && (lock->l_vnode == vp)) {
4184 4172 if (lock->l_flock.l_pid == pid &&
4185 4173 lock->l_flock.l_sysid == sysid)
4186 4174 cmn_err(CE_PANIC,
4187 4175 "owner pid %d's lock %p in sleep queue",
4188 4176 pid, (void *)lock);
4189 4177 lock = lock->l_next;
4190 4178 }
4191 4179 }
4192 4180 }
4193 4181
4194 4182 static int
4195 4183 level_one_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2)
4196 4184 {
4197 4185 edge_t *ep = FIRST_ADJ(lock1);
4198 4186
4199 4187 while (ep != HEAD(lock1)) {
4200 4188 if (ep->to_vertex == lock2)
4201 4189 return (1);
4202 4190 else
4203 4191 ep = NEXT_ADJ(ep);
4204 4192 }
4205 4193 return (0);
4206 4194 }
4207 4195
4208 4196 static int
4209 4197 no_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2)
4210 4198 {
4211 4199 return (!level_two_path(lock1, lock2, 1));
4212 4200 }
4213 4201
4214 4202 static void
4215 4203 path(lock_descriptor_t *lock1, lock_descriptor_t *lock2)
4216 4204 {
4217 4205 if (level_one_path(lock1, lock2)) {
4218 4206 if (level_two_path(lock1, lock2, 0) != 0) {
4219 4207 cmn_err(CE_WARN,
4220 4208 "one edge one path from lock1 %p lock2 %p",
4221 4209 (void *)lock1, (void *)lock2);
4222 4210 }
4223 4211 } else if (no_path(lock1, lock2)) {
4224 4212 cmn_err(CE_PANIC,
4225 4213 "No path from lock1 %p to lock2 %p",
4226 4214 (void *)lock1, (void *)lock2);
4227 4215 }
4228 4216 }
4229 4217 #endif /* DEBUG */
↓ open down ↓ |
346 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX