Print this page
5042 stop using deprecated atomic functions
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/os/rwlock.c
+++ new/usr/src/uts/common/os/rwlock.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25
26 26 /*
27 27 * Copyright (c) 2013, Joyent, Inc. All rights reserved.
28 28 */
29 29
30 30 #include <sys/param.h>
31 31 #include <sys/thread.h>
32 32 #include <sys/cmn_err.h>
33 33 #include <sys/debug.h>
34 34 #include <sys/cpuvar.h>
35 35 #include <sys/sobject.h>
36 36 #include <sys/turnstile.h>
37 37 #include <sys/rwlock.h>
38 38 #include <sys/rwlock_impl.h>
39 39 #include <sys/atomic.h>
40 40 #include <sys/lockstat.h>
41 41
42 42 /*
43 43 * Big Theory Statement for readers/writer locking primitives.
44 44 *
45 45 * An rwlock provides exclusive access to a single thread ("writer") or
46 46 * concurrent access to multiple threads ("readers"). See rwlock(9F)
47 47 * for a full description of the interfaces and programming model.
48 48 * The rest of this comment describes the implementation.
49 49 *
50 50 * An rwlock is a single word with the following structure:
51 51 *
52 52 * ---------------------------------------------------------------------
53 53 * | OWNER (writer) or HOLD COUNT (readers) | WRLOCK | WRWANT | WAIT |
54 54 * ---------------------------------------------------------------------
55 55 * 63 / 31 .. 3 2 1 0
56 56 *
57 57 * The waiters bit (0) indicates whether any threads are blocked waiting
58 58 * for the lock. The write-wanted bit (1) indicates whether any threads
59 59 * are blocked waiting for write access. The write-locked bit (2) indicates
60 60 * whether the lock is held by a writer, which determines whether the upper
61 61 * bits (3..31 in ILP32, 3..63 in LP64) should be interpreted as the owner
62 62 * (thread pointer) or the hold count (number of readers).
63 63 *
64 64 * In the absence of any contention, a writer gets the lock by setting
65 65 * this word to (curthread | RW_WRITE_LOCKED); a reader gets the lock
66 66 * by incrementing the hold count (i.e. adding 8, aka RW_READ_LOCK).
67 67 *
68 68 * A writer will fail to acquire the lock if any other thread owns it.
69 69 * A reader will fail if the lock is either owned (in the RW_READER and
70 70 * RW_READER_STARVEWRITER cases) or wanted by a writer (in the RW_READER
71 71 * case). rw_tryenter() returns 0 in these cases; rw_enter() blocks until
72 72 * the lock becomes available.
73 73 *
74 74 * When a thread blocks it acquires the rwlock's hashed turnstile lock and
75 75 * attempts to set RW_HAS_WAITERS (and RW_WRITE_WANTED in the writer case)
76 76 * atomically *only if the lock still appears busy*. A thread must never
77 77 * accidentally block for an available lock since there would be no owner
78 78 * to awaken it. casip() provides the required atomicity. Once casip()
79 79 * succeeds, the decision to block becomes final and irreversible. The
80 80 * thread will not become runnable again until it has been granted ownership
81 81 * of the lock via direct handoff from a former owner as described below.
82 82 *
83 83 * In the absence of any waiters, rw_exit() just clears the lock (if it
84 84 * is write-locked) or decrements the hold count (if it is read-locked).
85 85 * Note that even if waiters are present, decrementing the hold count
86 86 * to a non-zero value requires no special action since the lock is still
87 87 * held by at least one other thread.
88 88 *
89 89 * On the "final exit" (transition to unheld state) of a lock with waiters,
90 90 * rw_exit_wakeup() grabs the turnstile lock and transfers ownership directly
91 91 * to the next writer or set of readers. There are several advantages to this
92 92 * approach: (1) it closes all windows for priority inversion (when a new
93 93 * writer has grabbed the lock but has not yet inherited from blocked readers);
94 94 * (2) it prevents starvation of equal-priority threads by granting the lock
95 95 * in FIFO order; (3) it eliminates the need for a write-wanted count -- a
96 96 * single bit suffices because the lock remains held until all waiting
97 97 * writers are gone; (4) when we awaken N readers we can perform a single
98 98 * "atomic_add(&x, N)" to set the total hold count rather than having all N
99 99 * threads fight for the cache to perform an "atomic_add(&x, 1)" upon wakeup.
100 100 *
101 101 * The most interesting policy decision in rw_exit_wakeup() is which thread
102 102 * to wake. Starvation is always possible with priority-based scheduling,
103 103 * but any sane wakeup policy should at least satisfy these requirements:
104 104 *
105 105 * (1) The highest-priority thread in the system should not starve.
106 106 * (2) The highest-priority writer should not starve.
107 107 * (3) No writer should starve due to lower-priority threads.
108 108 * (4) No reader should starve due to lower-priority writers.
109 109 * (5) If all threads have equal priority, none of them should starve.
110 110 *
111 111 * We used to employ a writers-always-win policy, which doesn't even
112 112 * satisfy (1): a steady stream of low-priority writers can starve out
113 113 * a real-time reader! This is clearly a broken policy -- it violates
114 114 * (1), (4), and (5) -- but it's how rwlocks always used to behave.
115 115 *
116 116 * A round-robin policy (exiting readers grant the lock to blocked writers
117 117 * and vice versa) satisfies all but (3): a single high-priority writer
118 118 * and many low-priority readers can starve out medium-priority writers.
119 119 *
120 120 * A strict priority policy (grant the lock to the highest priority blocked
121 121 * thread) satisfies everything but (2): a steady stream of high-priority
122 122 * readers can permanently starve the highest-priority writer.
123 123 *
124 124 * The reason we care about (2) is that it's important to process writers
125 125 * reasonably quickly -- even if they're low priority -- because their very
126 126 * presence causes all readers to take the slow (blocking) path through this
127 127 * code. There is also a general sense that writers deserve some degree of
128 128 * deference because they're updating the data upon which all readers act.
129 129 * Presumably this data should not be allowed to become arbitrarily stale
130 130 * due to writer starvation. Finally, it seems reasonable to level the
131 131 * playing field a bit to compensate for the fact that it's so much harder
132 132 * for a writer to get in when there are already many readers present.
133 133 *
134 134 * A hybrid of round-robin and strict priority can be made to satisfy
135 135 * all five criteria. In this "writer priority policy" exiting readers
136 136 * always grant the lock to waiting writers, but exiting writers only
137 137 * grant the lock to readers of the same or higher priority than the
138 138 * highest-priority blocked writer. Thus requirement (2) is satisfied,
139 139 * necessarily, by a willful act of priority inversion: an exiting reader
140 140 * will grant the lock to a blocked writer even if there are blocked
141 141 * readers of higher priority. The situation is mitigated by the fact
142 142 * that writers always inherit priority from blocked readers, and the
143 143 * writer will awaken those readers as soon as it exits the lock.
144 144 *
145 145 * Finally, note that this hybrid scheme -- and indeed, any scheme that
146 146 * satisfies requirement (2) -- has an important consequence: if a lock is
147 147 * held as reader and a writer subsequently becomes blocked, any further
148 148 * readers must be blocked to avoid writer starvation. This implementation
149 149 * detail has ramifications for the semantics of rwlocks, as it prohibits
150 150 * recursively acquiring an rwlock as reader: any writer that wishes to
151 151 * acquire the lock after the first but before the second acquisition as
152 152 * reader will block the second acquisition -- resulting in deadlock. This
153 153 * itself is not necessarily prohibitive, as it is often straightforward to
154 154 * prevent a single thread from recursively acquiring an rwlock as reader.
155 155 * However, a more subtle situation arises when both a traditional mutex and
156 156 * a reader lock are acquired by two different threads in opposite order.
157 157 * (That is, one thread first acquires the mutex and then the rwlock as
158 158 * reader; the other acquires the rwlock as reader and then the mutex.) As
159 159 * with the single threaded case, this is fine absent a blocked writer: the
160 160 * thread that acquires the mutex before acquiring the rwlock as reader will
161 161 * be able to successfully acquire the rwlock -- even as/if the other thread
162 162 * has the rwlock as reader and is blocked on the held mutex. However, if
163 163 * an unrelated writer (that is, a third thread) becomes blocked on the
164 164 * rwlock after the first thread acquires the rwlock as reader but before
165 165 * it's able to acquire the mutex, the second thread -- with the mutex held
166 166 * -- will not be able to acquire the rwlock as reader due to the waiting
167 167 * writer, deadlocking the three threads. Unlike the single-threaded
168 168 * (recursive) rwlock acquisition case, this case can be quite a bit
169 169 * thornier to fix, especially as there is nothing inherently wrong in the
170 170 * locking strategy: the deadlock is really induced by requirement (2), not
171 171 * the consumers of the rwlock. To permit such consumers, we allow rwlock
172 172 * acquirers to explicitly opt out of requirement (2) by specifying
173 173 * RW_READER_STARVEWRITER when acquiring the rwlock. This (obviously) means
174 174 * that inifinite readers can starve writers, but it also allows for
175 175 * multiple readers in the presence of other synchronization primitives
176 176 * without regard for lock-ordering. And while certainly odd (and perhaps
177 177 * unwise), RW_READER_STARVEWRITER can be safely used alongside RW_READER on
178 178 * the same lock -- RW_READER_STARVEWRITER describes only the act of lock
179 179 * acquisition with respect to waiting writers, not the lock itself.
180 180 *
181 181 * rw_downgrade() follows the same wakeup policy as an exiting writer.
182 182 *
183 183 * rw_tryupgrade() has the same failure mode as rw_tryenter() for a
184 184 * write lock. Both honor the WRITE_WANTED bit by specification.
185 185 *
186 186 * The following rules apply to manipulation of rwlock internal state:
187 187 *
188 188 * (1) The rwlock is only modified via the atomic primitives casip()
189 189 * and atomic_add_ip().
190 190 *
191 191 * (2) The waiters bit and write-wanted bit are only modified under
192 192 * turnstile_lookup(). This ensures that the turnstile is consistent
193 193 * with the rwlock.
194 194 *
195 195 * (3) Waiters receive the lock by direct handoff from the previous
196 196 * owner. Therefore, waiters *always* wake up holding the lock.
197 197 */
198 198
199 199 /*
200 200 * The sobj_ops vector exports a set of functions needed when a thread
201 201 * is asleep on a synchronization object of a given type.
202 202 */
203 203 static sobj_ops_t rw_sobj_ops = {
204 204 SOBJ_RWLOCK, rw_owner, turnstile_stay_asleep, turnstile_change_pri
205 205 };
206 206
207 207 /*
208 208 * If the system panics on an rwlock, save the address of the offending
209 209 * rwlock in panic_rwlock_addr, and save the contents in panic_rwlock.
↓ open down ↓ |
209 lines elided |
↑ open up ↑ |
210 210 */
211 211 static rwlock_impl_t panic_rwlock;
212 212 static rwlock_impl_t *panic_rwlock_addr;
213 213
214 214 static void
215 215 rw_panic(char *msg, rwlock_impl_t *lp)
216 216 {
217 217 if (panicstr)
218 218 return;
219 219
220 - if (casptr(&panic_rwlock_addr, NULL, lp) == NULL)
220 + if (atomic_cas_ptr(&panic_rwlock_addr, NULL, lp) == NULL)
221 221 panic_rwlock = *lp;
222 222
223 223 panic("%s, lp=%p wwwh=%lx thread=%p",
224 224 msg, (void *)lp, panic_rwlock.rw_wwwh, (void *)curthread);
225 225 }
226 226
227 227 /* ARGSUSED */
228 228 void
229 229 rw_init(krwlock_t *rwlp, char *name, krw_type_t type, void *arg)
230 230 {
231 231 ((rwlock_impl_t *)rwlp)->rw_wwwh = 0;
232 232 }
233 233
234 234 void
235 235 rw_destroy(krwlock_t *rwlp)
236 236 {
237 237 rwlock_impl_t *lp = (rwlock_impl_t *)rwlp;
238 238
239 239 if (lp->rw_wwwh != 0) {
240 240 if ((lp->rw_wwwh & RW_DOUBLE_LOCK) == RW_DOUBLE_LOCK)
241 241 rw_panic("rw_destroy: lock already destroyed", lp);
242 242 else
243 243 rw_panic("rw_destroy: lock still active", lp);
244 244 }
245 245
246 246 lp->rw_wwwh = RW_DOUBLE_LOCK;
247 247 }
248 248
249 249 /*
250 250 * Verify that an rwlock is held correctly.
251 251 */
252 252 static int
253 253 rw_locked(rwlock_impl_t *lp, krw_t rw)
254 254 {
255 255 uintptr_t old = lp->rw_wwwh;
256 256
257 257 if (rw == RW_READER || rw == RW_READER_STARVEWRITER)
258 258 return ((old & RW_LOCKED) && !(old & RW_WRITE_LOCKED));
259 259
260 260 if (rw == RW_WRITER)
261 261 return ((old & RW_OWNER) == (uintptr_t)curthread);
262 262
263 263 return (0);
264 264 }
265 265
266 266 uint_t (*rw_lock_backoff)(uint_t) = NULL;
267 267 void (*rw_lock_delay)(uint_t) = NULL;
268 268
269 269 /*
270 270 * Full-service implementation of rw_enter() to handle all the hard cases.
271 271 * Called from the assembly version if anything complicated is going on.
272 272 * The only semantic difference between calling rw_enter() and calling
273 273 * rw_enter_sleep() directly is that we assume the caller has already done
274 274 * a THREAD_KPRI_REQUEST() in the RW_READER cases.
275 275 */
276 276 void
277 277 rw_enter_sleep(rwlock_impl_t *lp, krw_t rw)
278 278 {
279 279 uintptr_t old, new, lock_value, lock_busy, lock_wait;
280 280 hrtime_t sleep_time;
281 281 turnstile_t *ts;
282 282 uint_t backoff = 0;
283 283 int loop_count = 0;
284 284
285 285 if (rw == RW_READER) {
286 286 lock_value = RW_READ_LOCK;
287 287 lock_busy = RW_WRITE_CLAIMED;
288 288 lock_wait = RW_HAS_WAITERS;
289 289 } else if (rw == RW_READER_STARVEWRITER) {
290 290 lock_value = RW_READ_LOCK;
291 291 lock_busy = RW_WRITE_LOCKED;
292 292 lock_wait = RW_HAS_WAITERS;
293 293 } else {
294 294 lock_value = RW_WRITE_LOCK(curthread);
295 295 lock_busy = (uintptr_t)RW_LOCKED;
296 296 lock_wait = RW_HAS_WAITERS | RW_WRITE_WANTED;
297 297 }
298 298
299 299 for (;;) {
300 300 if (((old = lp->rw_wwwh) & lock_busy) == 0) {
301 301 if (casip(&lp->rw_wwwh, old, old + lock_value) != old) {
302 302 if (rw_lock_delay != NULL) {
303 303 backoff = rw_lock_backoff(backoff);
304 304 rw_lock_delay(backoff);
305 305 if (++loop_count == ncpus_online) {
306 306 backoff = 0;
307 307 loop_count = 0;
308 308 }
309 309 }
310 310 continue;
311 311 }
312 312 break;
313 313 }
314 314
315 315 if (panicstr)
316 316 return;
317 317
318 318 if ((old & RW_DOUBLE_LOCK) == RW_DOUBLE_LOCK) {
319 319 rw_panic("rw_enter: bad rwlock", lp);
320 320 return;
321 321 }
322 322
323 323 if ((old & RW_OWNER) == (uintptr_t)curthread) {
324 324 rw_panic("recursive rw_enter", lp);
325 325 return;
326 326 }
327 327
328 328 ts = turnstile_lookup(lp);
329 329
330 330 do {
331 331 if (((old = lp->rw_wwwh) & lock_busy) == 0)
332 332 break;
333 333 new = old | lock_wait;
334 334 } while (old != new && casip(&lp->rw_wwwh, old, new) != old);
335 335
336 336 if ((old & lock_busy) == 0) {
337 337 /*
338 338 * The lock appears free now; try the dance again
339 339 */
340 340 turnstile_exit(lp);
341 341 continue;
342 342 }
343 343
344 344 /*
345 345 * We really are going to block. Bump the stats, and drop
346 346 * kpri if we're a reader.
347 347 */
348 348 ASSERT(lp->rw_wwwh & lock_wait);
349 349 ASSERT(lp->rw_wwwh & RW_LOCKED);
350 350
351 351 sleep_time = -gethrtime();
352 352 if (rw != RW_WRITER) {
353 353 THREAD_KPRI_RELEASE();
354 354 CPU_STATS_ADDQ(CPU, sys, rw_rdfails, 1);
355 355 (void) turnstile_block(ts, TS_READER_Q, lp,
356 356 &rw_sobj_ops, NULL, NULL);
357 357 } else {
358 358 CPU_STATS_ADDQ(CPU, sys, rw_wrfails, 1);
359 359 (void) turnstile_block(ts, TS_WRITER_Q, lp,
360 360 &rw_sobj_ops, NULL, NULL);
361 361 }
362 362 sleep_time += gethrtime();
363 363
364 364 LOCKSTAT_RECORD4(LS_RW_ENTER_BLOCK, lp, sleep_time, rw,
365 365 (old & RW_WRITE_LOCKED) ? 1 : 0,
366 366 old >> RW_HOLD_COUNT_SHIFT);
367 367
368 368 /*
369 369 * We wake up holding the lock (and having kpri if we're
370 370 * a reader) via direct handoff from the previous owner.
371 371 */
372 372 break;
373 373 }
374 374
375 375 ASSERT(rw_locked(lp, rw));
376 376
377 377 membar_enter();
378 378
379 379 LOCKSTAT_RECORD(LS_RW_ENTER_ACQUIRE, lp, rw);
380 380 }
381 381
382 382 /*
383 383 * Return the number of readers to wake, or zero if we should wake a writer.
384 384 * Called only by exiting/downgrading writers (readers don't wake readers).
385 385 */
386 386 static int
387 387 rw_readers_to_wake(turnstile_t *ts)
388 388 {
389 389 kthread_t *next_writer = ts->ts_sleepq[TS_WRITER_Q].sq_first;
390 390 kthread_t *next_reader = ts->ts_sleepq[TS_READER_Q].sq_first;
391 391 pri_t wpri = (next_writer != NULL) ? DISP_PRIO(next_writer) : -1;
392 392 int count = 0;
393 393
394 394 while (next_reader != NULL) {
395 395 if (DISP_PRIO(next_reader) < wpri)
396 396 break;
397 397 next_reader->t_kpri_req++;
398 398 next_reader = next_reader->t_link;
399 399 count++;
400 400 }
401 401 return (count);
402 402 }
403 403
404 404 /*
405 405 * Full-service implementation of rw_exit() to handle all the hard cases.
406 406 * Called from the assembly version if anything complicated is going on.
407 407 * There is no semantic difference between calling rw_exit() and calling
408 408 * rw_exit_wakeup() directly.
409 409 */
410 410 void
411 411 rw_exit_wakeup(rwlock_impl_t *lp)
412 412 {
413 413 turnstile_t *ts;
414 414 uintptr_t old, new, lock_value;
415 415 kthread_t *next_writer;
416 416 int nreaders;
417 417 uint_t backoff = 0;
418 418 int loop_count = 0;
419 419
420 420 membar_exit();
421 421
422 422 old = lp->rw_wwwh;
423 423 if (old & RW_WRITE_LOCKED) {
424 424 if ((old & RW_OWNER) != (uintptr_t)curthread) {
425 425 rw_panic("rw_exit: not owner", lp);
426 426 lp->rw_wwwh = 0;
427 427 return;
428 428 }
429 429 lock_value = RW_WRITE_LOCK(curthread);
430 430 } else {
431 431 if ((old & RW_LOCKED) == 0) {
432 432 rw_panic("rw_exit: lock not held", lp);
433 433 return;
434 434 }
435 435 lock_value = RW_READ_LOCK;
436 436 }
437 437
438 438 for (;;) {
439 439 /*
440 440 * If this is *not* the final exit of a lock with waiters,
441 441 * just drop the lock -- there's nothing tricky going on.
442 442 */
443 443 old = lp->rw_wwwh;
444 444 new = old - lock_value;
445 445 if ((new & (RW_LOCKED | RW_HAS_WAITERS)) != RW_HAS_WAITERS) {
446 446 if (casip(&lp->rw_wwwh, old, new) != old) {
447 447 if (rw_lock_delay != NULL) {
448 448 backoff = rw_lock_backoff(backoff);
449 449 rw_lock_delay(backoff);
450 450 if (++loop_count == ncpus_online) {
451 451 backoff = 0;
452 452 loop_count = 0;
453 453 }
454 454 }
455 455 continue;
456 456 }
457 457 break;
458 458 }
459 459
460 460 /*
461 461 * This appears to be the final exit of a lock with waiters.
462 462 * If we do not have the lock as writer (that is, if this is
463 463 * the last exit of a reader with waiting writers), we will
464 464 * grab the lock as writer to prevent additional readers.
465 465 * (This is required because a reader that is acquiring the
466 466 * lock via RW_READER_STARVEWRITER will not observe the
467 467 * RW_WRITE_WANTED bit -- and we could therefore be racing
468 468 * with such readers here.)
469 469 */
470 470 if (!(old & RW_WRITE_LOCKED)) {
471 471 new = RW_WRITE_LOCK(curthread) |
472 472 RW_HAS_WAITERS | RW_WRITE_WANTED;
473 473
474 474 if (casip(&lp->rw_wwwh, old, new) != old)
475 475 continue;
476 476 }
477 477
478 478 /*
479 479 * Perform the final exit of a lock that has waiters.
480 480 */
481 481 ts = turnstile_lookup(lp);
482 482
483 483 next_writer = ts->ts_sleepq[TS_WRITER_Q].sq_first;
484 484
485 485 if ((old & RW_WRITE_LOCKED) &&
486 486 (nreaders = rw_readers_to_wake(ts)) > 0) {
487 487 /*
488 488 * Don't drop the lock -- just set the hold count
489 489 * such that we grant the lock to all readers at once.
490 490 */
491 491 new = nreaders * RW_READ_LOCK;
492 492 if (ts->ts_waiters > nreaders)
493 493 new |= RW_HAS_WAITERS;
494 494 if (next_writer)
495 495 new |= RW_WRITE_WANTED;
496 496 lp->rw_wwwh = new;
497 497 membar_enter();
498 498 turnstile_wakeup(ts, TS_READER_Q, nreaders, NULL);
499 499 } else {
500 500 /*
501 501 * Don't drop the lock -- just transfer ownership
502 502 * directly to next_writer. Note that there must
503 503 * be at least one waiting writer, because we get
504 504 * here only if (A) the lock is read-locked or
505 505 * (B) there are no waiting readers. In case (A),
506 506 * since the lock is read-locked there would be no
507 507 * reason for other readers to have blocked unless
508 508 * the RW_WRITE_WANTED bit was set. In case (B),
509 509 * since there are waiters but no waiting readers,
510 510 * they must all be waiting writers.
511 511 */
512 512 ASSERT(lp->rw_wwwh & RW_WRITE_WANTED);
513 513 new = RW_WRITE_LOCK(next_writer);
514 514 if (ts->ts_waiters > 1)
515 515 new |= RW_HAS_WAITERS;
516 516 if (next_writer->t_link)
517 517 new |= RW_WRITE_WANTED;
518 518 lp->rw_wwwh = new;
519 519 membar_enter();
520 520 turnstile_wakeup(ts, TS_WRITER_Q, 1, next_writer);
521 521 }
522 522 break;
523 523 }
524 524
525 525 if (lock_value == RW_READ_LOCK) {
526 526 THREAD_KPRI_RELEASE();
527 527 LOCKSTAT_RECORD(LS_RW_EXIT_RELEASE, lp, RW_READER);
528 528 } else {
529 529 LOCKSTAT_RECORD(LS_RW_EXIT_RELEASE, lp, RW_WRITER);
530 530 }
531 531 }
532 532
533 533 int
534 534 rw_tryenter(krwlock_t *rwlp, krw_t rw)
535 535 {
536 536 rwlock_impl_t *lp = (rwlock_impl_t *)rwlp;
537 537 uintptr_t old;
538 538
539 539 if (rw != RW_WRITER) {
540 540 uint_t backoff = 0;
541 541 int loop_count = 0;
542 542 THREAD_KPRI_REQUEST();
543 543 for (;;) {
544 544 if ((old = lp->rw_wwwh) & (rw == RW_READER ?
545 545 RW_WRITE_CLAIMED : RW_WRITE_LOCKED)) {
546 546 THREAD_KPRI_RELEASE();
547 547 return (0);
548 548 }
549 549 if (casip(&lp->rw_wwwh, old, old + RW_READ_LOCK) == old)
550 550 break;
551 551 if (rw_lock_delay != NULL) {
552 552 backoff = rw_lock_backoff(backoff);
553 553 rw_lock_delay(backoff);
554 554 if (++loop_count == ncpus_online) {
555 555 backoff = 0;
556 556 loop_count = 0;
557 557 }
558 558 }
559 559 }
560 560 LOCKSTAT_RECORD(LS_RW_TRYENTER_ACQUIRE, lp, rw);
561 561 } else {
562 562 if (casip(&lp->rw_wwwh, 0, RW_WRITE_LOCK(curthread)) != 0)
563 563 return (0);
564 564 LOCKSTAT_RECORD(LS_RW_TRYENTER_ACQUIRE, lp, rw);
565 565 }
566 566 ASSERT(rw_locked(lp, rw));
567 567 membar_enter();
568 568 return (1);
569 569 }
570 570
571 571 void
572 572 rw_downgrade(krwlock_t *rwlp)
573 573 {
574 574 rwlock_impl_t *lp = (rwlock_impl_t *)rwlp;
575 575
576 576 THREAD_KPRI_REQUEST();
577 577 membar_exit();
578 578
579 579 if ((lp->rw_wwwh & RW_OWNER) != (uintptr_t)curthread) {
580 580 rw_panic("rw_downgrade: not owner", lp);
581 581 return;
582 582 }
583 583
584 584 if (atomic_add_ip_nv(&lp->rw_wwwh,
585 585 RW_READ_LOCK - RW_WRITE_LOCK(curthread)) & RW_HAS_WAITERS) {
586 586 turnstile_t *ts = turnstile_lookup(lp);
587 587 int nreaders = rw_readers_to_wake(ts);
588 588 if (nreaders > 0) {
589 589 uintptr_t delta = nreaders * RW_READ_LOCK;
590 590 if (ts->ts_waiters == nreaders)
591 591 delta -= RW_HAS_WAITERS;
592 592 atomic_add_ip(&lp->rw_wwwh, delta);
593 593 }
594 594 turnstile_wakeup(ts, TS_READER_Q, nreaders, NULL);
595 595 }
596 596 ASSERT(rw_locked(lp, RW_READER));
597 597 LOCKSTAT_RECORD0(LS_RW_DOWNGRADE_DOWNGRADE, lp);
598 598 }
599 599
600 600 int
601 601 rw_tryupgrade(krwlock_t *rwlp)
602 602 {
603 603 rwlock_impl_t *lp = (rwlock_impl_t *)rwlp;
604 604 uintptr_t old, new;
605 605
606 606 ASSERT(rw_locked(lp, RW_READER));
607 607
608 608 do {
609 609 if (((old = lp->rw_wwwh) & ~RW_HAS_WAITERS) != RW_READ_LOCK)
610 610 return (0);
611 611 new = old + RW_WRITE_LOCK(curthread) - RW_READ_LOCK;
612 612 } while (casip(&lp->rw_wwwh, old, new) != old);
613 613
614 614 membar_enter();
615 615 THREAD_KPRI_RELEASE();
616 616 LOCKSTAT_RECORD0(LS_RW_TRYUPGRADE_UPGRADE, lp);
617 617 ASSERT(rw_locked(lp, RW_WRITER));
618 618 return (1);
619 619 }
620 620
621 621 int
622 622 rw_read_held(krwlock_t *rwlp)
623 623 {
624 624 uintptr_t tmp;
625 625
626 626 return (_RW_READ_HELD(rwlp, tmp));
627 627 }
628 628
629 629 int
630 630 rw_write_held(krwlock_t *rwlp)
631 631 {
632 632 return (_RW_WRITE_HELD(rwlp));
633 633 }
634 634
635 635 int
636 636 rw_lock_held(krwlock_t *rwlp)
637 637 {
638 638 return (_RW_LOCK_HELD(rwlp));
639 639 }
640 640
641 641 /*
642 642 * Like rw_read_held(), but ASSERTs that the lock is currently held
643 643 */
644 644 int
645 645 rw_read_locked(krwlock_t *rwlp)
646 646 {
647 647 uintptr_t old = ((rwlock_impl_t *)rwlp)->rw_wwwh;
648 648
649 649 ASSERT(old & RW_LOCKED);
650 650 return ((old & RW_LOCKED) && !(old & RW_WRITE_LOCKED));
651 651 }
652 652
653 653 /*
654 654 * Returns non-zero if the lock is either held or desired by a writer
655 655 */
656 656 int
657 657 rw_iswriter(krwlock_t *rwlp)
658 658 {
659 659 return (_RW_ISWRITER(rwlp));
660 660 }
661 661
662 662 kthread_t *
663 663 rw_owner(krwlock_t *rwlp)
664 664 {
665 665 uintptr_t old = ((rwlock_impl_t *)rwlp)->rw_wwwh;
666 666
667 667 return ((old & RW_WRITE_LOCKED) ? (kthread_t *)(old & RW_OWNER) : NULL);
668 668 }
↓ open down ↓ |
438 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX