Print this page
5045 use atomic_{inc,dec}_* instead of atomic_add_*
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/os/contract.c
+++ new/usr/src/uts/common/os/contract.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25
26 26 /*
27 27 * Contracts
28 28 * ---------
29 29 *
30 30 * Contracts are a primitive which enrich the relationships between
31 31 * processes and system resources. The primary purpose of contracts is
32 32 * to provide a means for the system to negotiate the departure from a
33 33 * binding relationship (e.g. pages locked in memory or a thread bound
34 34 * to processor), but they can also be used as a purely asynchronous
35 35 * error reporting mechanism as they are with process contracts.
36 36 *
37 37 * More information on how one interfaces with contracts and what
38 38 * contracts can do for you can be found in:
39 39 * PSARC 2003/193 Solaris Contracts
40 40 * PSARC 2004/460 Contracts addendum
41 41 *
42 42 * This file contains the core contracts framework. By itself it is
43 43 * useless: it depends the contracts filesystem (ctfs) to provide an
44 44 * interface to user processes and individual contract types to
45 45 * implement the process/resource relationships.
46 46 *
47 47 * Data structure overview
48 48 * -----------------------
49 49 *
50 50 * A contract is represented by a contract_t, which itself points to an
51 51 * encapsulating contract-type specific contract object. A contract_t
52 52 * contains the contract's static identity (including its terms), its
53 53 * linkage to various bookkeeping structures, the contract-specific
54 54 * event queue, and a reference count.
55 55 *
56 56 * A contract template is represented by a ct_template_t, which, like a
57 57 * contract, points to an encapsulating contract-type specific template
58 58 * object. A ct_template_t contains the template's terms.
59 59 *
60 60 * An event queue is represented by a ct_equeue_t, and consists of a
61 61 * list of events, a list of listeners, and a list of listeners who are
62 62 * waiting for new events (affectionately referred to as "tail
63 63 * listeners"). There are three queue types, defined by ct_listnum_t
64 64 * (an enum). An event may be on one of each type of queue
65 65 * simultaneously; the list linkage used by a queue is determined by
66 66 * its type.
67 67 *
68 68 * An event is represented by a ct_kevent_t, which contains mostly
69 69 * static event data (e.g. id, payload). It also has an array of
70 70 * ct_member_t structures, each of which contains a list_node_t and
71 71 * represent the event's linkage in a specific event queue.
72 72 *
73 73 * Each open of an event endpoint results in the creation of a new
74 74 * listener, represented by a ct_listener_t. In addition to linkage
75 75 * into the aforementioned lists in the event_queue, a ct_listener_t
76 76 * contains a pointer to the ct_kevent_t it is currently positioned at
77 77 * as well as a set of status flags and other administrative data.
78 78 *
79 79 * Each process has a list of contracts it owns, p_ct_held; a pointer
80 80 * to the process contract it is a member of, p_ct_process; the linkage
81 81 * for that membership, p_ct_member; and an array of event queue
82 82 * structures representing the process bundle queues.
83 83 *
84 84 * Each LWP has an array of its active templates, lwp_ct_active; and
85 85 * the most recently created contracts, lwp_ct_latest.
86 86 *
87 87 * A process contract has a list of member processes and a list of
88 88 * inherited contracts.
89 89 *
90 90 * There is a system-wide list of all contracts, as well as per-type
91 91 * lists of contracts.
92 92 *
93 93 * Lock ordering overview
94 94 * ----------------------
95 95 *
96 96 * Locks at the top are taken first:
97 97 *
98 98 * ct_evtlock
99 99 * regent ct_lock
100 100 * member ct_lock
101 101 * pidlock
102 102 * p_lock
103 103 * contract ctq_lock contract_lock
104 104 * pbundle ctq_lock
105 105 * cte_lock
106 106 * ct_reflock
107 107 *
108 108 * contract_lock and ctq_lock/cte_lock are not currently taken at the
109 109 * same time.
110 110 *
111 111 * Reference counting and locking
112 112 * ------------------------------
113 113 *
114 114 * A contract has a reference count, protected by ct_reflock.
115 115 * (ct_reflock is also used in a couple other places where atomic
116 116 * access to a variable is needed in an innermost context). A process
117 117 * maintains a hold on each contract it owns. A process contract has a
118 118 * hold on each contract is has inherited. Each event has a hold on
119 119 * the contract which generated it. Process contract templates have
120 120 * holds on the contracts referred to by their transfer terms. CTFS
121 121 * contract directory nodes have holds on contracts. Lastly, various
122 122 * code paths may temporarily take holds on contracts to prevent them
123 123 * from disappearing while other processing is going on. It is
124 124 * important to note that the global contract lists do not hold
125 125 * references on contracts; a contract is removed from these structures
126 126 * atomically with the release of its last reference.
127 127 *
128 128 * At a given point in time, a contract can either be owned by a
129 129 * process, inherited by a regent process contract, or orphaned. A
130 130 * contract_t's owner and regent pointers, ct_owner and ct_regent, are
131 131 * protected by its ct_lock. The linkage in the holder's (holder =
132 132 * owner or regent) list of contracts, ct_ctlist, is protected by
133 133 * whatever lock protects the holder's data structure. In order for
134 134 * these two directions to remain consistent, changing the holder of a
135 135 * contract requires that both locks be held.
136 136 *
137 137 * Events also have reference counts. There is one hold on an event
138 138 * per queue it is present on, in addition to those needed for the
139 139 * usual sundry reasons. Individual listeners are associated with
140 140 * specific queues, and increase a queue-specific reference count
141 141 * stored in the ct_member_t structure.
142 142 *
143 143 * The dynamic contents of an event (reference count and flags) are
144 144 * protected by its cte_lock, while the contents of the embedded
145 145 * ct_member_t structures are protected by the locks of the queues they
146 146 * are linked into. A ct_listener_t's contents are also protected by
147 147 * its event queue's ctq_lock.
148 148 *
149 149 * Resource controls
150 150 * -----------------
151 151 *
152 152 * Control: project.max-contracts (rc_project_contract)
153 153 * Description: Maximum number of contracts allowed a project.
154 154 *
155 155 * When a contract is created, the project's allocation is tested and
156 156 * (assuming success) increased. When the last reference to a
157 157 * contract is released, the creating project's allocation is
158 158 * decreased.
159 159 */
160 160
161 161 #include <sys/mutex.h>
162 162 #include <sys/debug.h>
163 163 #include <sys/types.h>
164 164 #include <sys/param.h>
165 165 #include <sys/kmem.h>
166 166 #include <sys/thread.h>
167 167 #include <sys/id_space.h>
168 168 #include <sys/avl.h>
169 169 #include <sys/list.h>
170 170 #include <sys/sysmacros.h>
171 171 #include <sys/proc.h>
172 172 #include <sys/ctfs.h>
173 173 #include <sys/contract_impl.h>
174 174 #include <sys/contract/process_impl.h>
175 175 #include <sys/dditypes.h>
176 176 #include <sys/contract/device_impl.h>
177 177 #include <sys/systm.h>
178 178 #include <sys/atomic.h>
179 179 #include <sys/cmn_err.h>
180 180 #include <sys/model.h>
181 181 #include <sys/policy.h>
182 182 #include <sys/zone.h>
183 183 #include <sys/task.h>
184 184 #include <sys/ddi.h>
185 185 #include <sys/sunddi.h>
186 186
187 187 extern rctl_hndl_t rc_project_contract;
188 188
189 189 static id_space_t *contract_ids;
190 190 static avl_tree_t contract_avl;
191 191 static kmutex_t contract_lock;
192 192
193 193 int ct_ntypes = CTT_MAXTYPE;
194 194 static ct_type_t *ct_types_static[CTT_MAXTYPE];
195 195 ct_type_t **ct_types = ct_types_static;
196 196 int ct_debug;
197 197
198 198 static void cte_queue_create(ct_equeue_t *, ct_listnum_t, int, int);
199 199 static void cte_queue_destroy(ct_equeue_t *);
200 200 static void cte_queue_drain(ct_equeue_t *, int);
201 201 static void cte_trim(ct_equeue_t *, contract_t *);
202 202 static void cte_copy(ct_equeue_t *, ct_equeue_t *);
203 203
204 204 /*
205 205 * contract_compar
206 206 *
207 207 * A contract comparator which sorts on contract ID.
208 208 */
209 209 int
210 210 contract_compar(const void *x, const void *y)
211 211 {
212 212 const contract_t *ct1 = x;
213 213 const contract_t *ct2 = y;
214 214
215 215 if (ct1->ct_id < ct2->ct_id)
216 216 return (-1);
217 217 if (ct1->ct_id > ct2->ct_id)
218 218 return (1);
219 219 return (0);
220 220 }
221 221
222 222 /*
223 223 * contract_init
224 224 *
225 225 * Initializes the contract subsystem, the specific contract types, and
226 226 * process 0.
227 227 */
228 228 void
229 229 contract_init(void)
230 230 {
231 231 /*
232 232 * Initialize contract subsystem.
233 233 */
234 234 contract_ids = id_space_create("contracts", 1, INT_MAX);
235 235 avl_create(&contract_avl, contract_compar, sizeof (contract_t),
236 236 offsetof(contract_t, ct_ctavl));
237 237 mutex_init(&contract_lock, NULL, MUTEX_DEFAULT, NULL);
238 238
239 239 /*
240 240 * Initialize contract types.
241 241 */
242 242 contract_process_init();
243 243 contract_device_init();
244 244
245 245 /*
246 246 * Initialize p0/lwp0 contract state.
247 247 */
248 248 avl_create(&p0.p_ct_held, contract_compar, sizeof (contract_t),
249 249 offsetof(contract_t, ct_ctlist));
250 250 }
251 251
252 252 /*
253 253 * contract_dtor
254 254 *
255 255 * Performs basic destruction of the common portions of a contract.
256 256 * Called from the failure path of contract_ctor and from
257 257 * contract_rele.
258 258 */
259 259 static void
260 260 contract_dtor(contract_t *ct)
261 261 {
262 262 cte_queue_destroy(&ct->ct_events);
263 263 list_destroy(&ct->ct_vnodes);
264 264 mutex_destroy(&ct->ct_reflock);
265 265 mutex_destroy(&ct->ct_lock);
266 266 mutex_destroy(&ct->ct_evtlock);
267 267 }
268 268
269 269 /*
270 270 * contract_ctor
271 271 *
272 272 * Called by a contract type to initialize a contract. Fails if the
273 273 * max-contract resource control would have been exceeded. After a
274 274 * successful call to contract_ctor, the contract is unlocked and
275 275 * visible in all namespaces; any type-specific initialization should
276 276 * be completed before calling contract_ctor. Returns 0 on success.
277 277 *
278 278 * Because not all callers can tolerate failure, a 0 value for canfail
279 279 * instructs contract_ctor to ignore the project.max-contracts resource
280 280 * control. Obviously, this "out" should only be employed by callers
281 281 * who are sufficiently constrained in other ways (e.g. newproc).
282 282 */
283 283 int
284 284 contract_ctor(contract_t *ct, ct_type_t *type, ct_template_t *tmpl, void *data,
285 285 ctflags_t flags, proc_t *author, int canfail)
286 286 {
287 287 avl_index_t where;
288 288 klwp_t *curlwp = ttolwp(curthread);
289 289
290 290 ASSERT(author == curproc);
291 291
292 292 mutex_init(&ct->ct_lock, NULL, MUTEX_DEFAULT, NULL);
293 293 mutex_init(&ct->ct_reflock, NULL, MUTEX_DEFAULT, NULL);
294 294 mutex_init(&ct->ct_evtlock, NULL, MUTEX_DEFAULT, NULL);
295 295 ct->ct_id = id_alloc(contract_ids);
296 296
297 297 cte_queue_create(&ct->ct_events, CTEL_CONTRACT, 20, 0);
298 298 list_create(&ct->ct_vnodes, sizeof (contract_vnode_t),
299 299 offsetof(contract_vnode_t, ctv_node));
300 300
301 301 /*
302 302 * Instance data
303 303 */
304 304 ct->ct_ref = 2; /* one for the holder, one for "latest" */
305 305 ct->ct_cuid = crgetuid(CRED());
306 306 ct->ct_type = type;
307 307 ct->ct_data = data;
308 308 gethrestime(&ct->ct_ctime);
309 309 ct->ct_state = CTS_OWNED;
310 310 ct->ct_flags = flags;
311 311 ct->ct_regent = author->p_ct_process ?
312 312 &author->p_ct_process->conp_contract : NULL;
313 313 ct->ct_ev_info = tmpl->ctmpl_ev_info;
314 314 ct->ct_ev_crit = tmpl->ctmpl_ev_crit;
315 315 ct->ct_cookie = tmpl->ctmpl_cookie;
316 316 ct->ct_owner = author;
317 317 ct->ct_ntime.ctm_total = -1;
318 318 ct->ct_qtime.ctm_total = -1;
319 319 ct->ct_nevent = NULL;
320 320
321 321 /*
322 322 * Test project.max-contracts.
323 323 */
324 324 mutex_enter(&author->p_lock);
325 325 mutex_enter(&contract_lock);
326 326 if (canfail && rctl_test(rc_project_contract,
327 327 author->p_task->tk_proj->kpj_rctls, author, 1,
328 328 RCA_SAFE) & RCT_DENY) {
329 329 id_free(contract_ids, ct->ct_id);
330 330 mutex_exit(&contract_lock);
331 331 mutex_exit(&author->p_lock);
332 332 ct->ct_events.ctq_flags |= CTQ_DEAD;
333 333 contract_dtor(ct);
334 334 return (1);
335 335 }
336 336 ct->ct_proj = author->p_task->tk_proj;
337 337 ct->ct_proj->kpj_data.kpd_contract++;
338 338 (void) project_hold(ct->ct_proj);
339 339 mutex_exit(&contract_lock);
340 340
341 341 /*
342 342 * Insert into holder's avl of contracts.
343 343 * We use an avl not because order is important, but because
344 344 * readdir of /proc/contracts requires we be able to use a
345 345 * scalar as an index into the process's list of contracts
346 346 */
347 347 ct->ct_zoneid = author->p_zone->zone_id;
348 348 ct->ct_czuniqid = ct->ct_mzuniqid = author->p_zone->zone_uniqid;
349 349 VERIFY(avl_find(&author->p_ct_held, ct, &where) == NULL);
350 350 avl_insert(&author->p_ct_held, ct, where);
351 351 mutex_exit(&author->p_lock);
352 352
353 353 /*
354 354 * Insert into global contract AVL
355 355 */
356 356 mutex_enter(&contract_lock);
357 357 VERIFY(avl_find(&contract_avl, ct, &where) == NULL);
358 358 avl_insert(&contract_avl, ct, where);
359 359 mutex_exit(&contract_lock);
360 360
361 361 /*
362 362 * Insert into type AVL
363 363 */
364 364 mutex_enter(&type->ct_type_lock);
365 365 VERIFY(avl_find(&type->ct_type_avl, ct, &where) == NULL);
366 366 avl_insert(&type->ct_type_avl, ct, where);
367 367 type->ct_type_timestruc = ct->ct_ctime;
368 368 mutex_exit(&type->ct_type_lock);
369 369
370 370 if (curlwp->lwp_ct_latest[type->ct_type_index])
371 371 contract_rele(curlwp->lwp_ct_latest[type->ct_type_index]);
372 372 curlwp->lwp_ct_latest[type->ct_type_index] = ct;
373 373
374 374 return (0);
375 375 }
376 376
377 377 /*
378 378 * contract_rele
379 379 *
380 380 * Releases a reference to a contract. If the caller had the last
381 381 * reference, the contract is removed from all namespaces, its
382 382 * allocation against the max-contracts resource control is released,
383 383 * and the contract type's free entry point is invoked for any
384 384 * type-specific deconstruction and to (presumably) free the object.
385 385 */
386 386 void
387 387 contract_rele(contract_t *ct)
388 388 {
389 389 uint64_t nref;
390 390
391 391 mutex_enter(&ct->ct_reflock);
392 392 ASSERT(ct->ct_ref > 0);
393 393 nref = --ct->ct_ref;
394 394 mutex_exit(&ct->ct_reflock);
395 395 if (nref == 0) {
396 396 /*
397 397 * ct_owner is cleared when it drops its reference.
398 398 */
399 399 ASSERT(ct->ct_owner == NULL);
400 400 ASSERT(ct->ct_evcnt == 0);
401 401
402 402 /*
403 403 * Remove from global contract AVL
404 404 */
405 405 mutex_enter(&contract_lock);
406 406 avl_remove(&contract_avl, ct);
407 407 mutex_exit(&contract_lock);
408 408
409 409 /*
410 410 * Remove from type AVL
411 411 */
412 412 mutex_enter(&ct->ct_type->ct_type_lock);
413 413 avl_remove(&ct->ct_type->ct_type_avl, ct);
414 414 mutex_exit(&ct->ct_type->ct_type_lock);
415 415
416 416 /*
417 417 * Release the contract's ID
418 418 */
419 419 id_free(contract_ids, ct->ct_id);
420 420
421 421 /*
422 422 * Release project hold
423 423 */
424 424 mutex_enter(&contract_lock);
425 425 ct->ct_proj->kpj_data.kpd_contract--;
426 426 project_rele(ct->ct_proj);
427 427 mutex_exit(&contract_lock);
428 428
429 429 /*
430 430 * Free the contract
431 431 */
432 432 contract_dtor(ct);
433 433 ct->ct_type->ct_type_ops->contop_free(ct);
434 434 }
435 435 }
436 436
437 437 /*
438 438 * contract_hold
439 439 *
440 440 * Adds a reference to a contract
441 441 */
442 442 void
443 443 contract_hold(contract_t *ct)
444 444 {
445 445 mutex_enter(&ct->ct_reflock);
446 446 ASSERT(ct->ct_ref < UINT64_MAX);
447 447 ct->ct_ref++;
448 448 mutex_exit(&ct->ct_reflock);
449 449 }
450 450
451 451 /*
452 452 * contract_getzuniqid
453 453 *
454 454 * Get a contract's zone unique ID. Needed because 64-bit reads and
455 455 * writes aren't atomic on x86. Since there are contexts where we are
456 456 * unable to take ct_lock, we instead use ct_reflock; in actuality any
457 457 * lock would do.
458 458 */
459 459 uint64_t
460 460 contract_getzuniqid(contract_t *ct)
461 461 {
462 462 uint64_t zuniqid;
463 463
464 464 mutex_enter(&ct->ct_reflock);
465 465 zuniqid = ct->ct_mzuniqid;
466 466 mutex_exit(&ct->ct_reflock);
467 467
468 468 return (zuniqid);
469 469 }
470 470
471 471 /*
472 472 * contract_setzuniqid
473 473 *
474 474 * Sets a contract's zone unique ID. See contract_getzuniqid.
475 475 */
476 476 void
477 477 contract_setzuniqid(contract_t *ct, uint64_t zuniqid)
478 478 {
479 479 mutex_enter(&ct->ct_reflock);
480 480 ct->ct_mzuniqid = zuniqid;
481 481 mutex_exit(&ct->ct_reflock);
482 482 }
483 483
484 484 /*
485 485 * contract_abandon
486 486 *
487 487 * Abandons the specified contract. If "explicit" is clear, the
488 488 * contract was implicitly abandoned (by process exit) and should be
489 489 * inherited if its terms allow it and its owner was a member of a
490 490 * regent contract. Otherwise, the contract type's abandon entry point
491 491 * is invoked to either destroy or orphan the contract.
492 492 */
493 493 int
494 494 contract_abandon(contract_t *ct, proc_t *p, int explicit)
495 495 {
496 496 ct_equeue_t *q = NULL;
497 497 contract_t *parent = &p->p_ct_process->conp_contract;
498 498 int inherit = 0;
499 499
500 500 VERIFY(p == curproc);
501 501
502 502 mutex_enter(&ct->ct_lock);
503 503
504 504 /*
505 505 * Multiple contract locks are taken contract -> subcontract.
506 506 * Check if the contract will be inherited so we can acquire
507 507 * all the necessary locks before making sensitive changes.
508 508 */
509 509 if (!explicit && (ct->ct_flags & CTF_INHERIT) &&
510 510 contract_process_accept(parent)) {
511 511 mutex_exit(&ct->ct_lock);
512 512 mutex_enter(&parent->ct_lock);
513 513 mutex_enter(&ct->ct_lock);
514 514 inherit = 1;
515 515 }
516 516
517 517 if (ct->ct_owner != p) {
518 518 mutex_exit(&ct->ct_lock);
519 519 if (inherit)
520 520 mutex_exit(&parent->ct_lock);
521 521 return (EINVAL);
522 522 }
523 523
524 524 mutex_enter(&p->p_lock);
525 525 if (explicit)
526 526 avl_remove(&p->p_ct_held, ct);
527 527 ct->ct_owner = NULL;
528 528 mutex_exit(&p->p_lock);
529 529
530 530 /*
531 531 * Since we can't call cte_trim with the contract lock held,
532 532 * we grab the queue pointer here.
533 533 */
534 534 if (p->p_ct_equeue)
535 535 q = p->p_ct_equeue[ct->ct_type->ct_type_index];
536 536
537 537 /*
538 538 * contop_abandon may destroy the contract so we rely on it to
539 539 * drop ct_lock. We retain a reference on the contract so that
540 540 * the cte_trim which follows functions properly. Even though
541 541 * cte_trim doesn't dereference the contract pointer, it is
542 542 * still necessary to retain a reference to the contract so
543 543 * that we don't trim events which are sent by a subsequently
544 544 * allocated contract infortuitously located at the same address.
545 545 */
546 546 contract_hold(ct);
547 547
548 548 if (inherit) {
549 549 ct->ct_state = CTS_INHERITED;
550 550 VERIFY(ct->ct_regent == parent);
551 551 contract_process_take(parent, ct);
552 552
553 553 /*
554 554 * We are handing off the process's reference to the
555 555 * parent contract. For this reason, the order in
556 556 * which we drop the contract locks is also important.
557 557 */
558 558 mutex_exit(&ct->ct_lock);
559 559 mutex_exit(&parent->ct_lock);
560 560 } else {
561 561 ct->ct_regent = NULL;
562 562 ct->ct_type->ct_type_ops->contop_abandon(ct);
563 563 }
564 564
565 565 /*
566 566 * ct_lock has been dropped; we can safely trim the event
567 567 * queue now.
568 568 */
569 569 if (q) {
570 570 mutex_enter(&q->ctq_lock);
571 571 cte_trim(q, ct);
572 572 mutex_exit(&q->ctq_lock);
573 573 }
574 574
575 575 contract_rele(ct);
576 576
577 577 return (0);
578 578 }
579 579
580 580 int
581 581 contract_newct(contract_t *ct)
582 582 {
583 583 return (ct->ct_type->ct_type_ops->contop_newct(ct));
584 584 }
585 585
586 586 /*
587 587 * contract_adopt
588 588 *
589 589 * Adopts a contract. After a successful call to this routine, the
590 590 * previously inherited contract will belong to the calling process,
591 591 * and its events will have been appended to its new owner's process
592 592 * bundle queue.
593 593 */
594 594 int
595 595 contract_adopt(contract_t *ct, proc_t *p)
596 596 {
597 597 avl_index_t where;
598 598 ct_equeue_t *q;
599 599 contract_t *parent;
600 600
601 601 ASSERT(p == curproc);
602 602
603 603 /*
604 604 * Ensure the process has an event queue. Checked by ASSERTs
605 605 * below.
606 606 */
607 607 (void) contract_type_pbundle(ct->ct_type, p);
608 608
609 609 mutex_enter(&ct->ct_lock);
610 610 parent = ct->ct_regent;
611 611 if (ct->ct_state != CTS_INHERITED ||
612 612 &p->p_ct_process->conp_contract != parent ||
613 613 p->p_zone->zone_uniqid != ct->ct_czuniqid) {
614 614 mutex_exit(&ct->ct_lock);
615 615 return (EINVAL);
616 616 }
617 617
618 618 /*
619 619 * Multiple contract locks are taken contract -> subcontract.
620 620 */
621 621 mutex_exit(&ct->ct_lock);
622 622 mutex_enter(&parent->ct_lock);
623 623 mutex_enter(&ct->ct_lock);
624 624
625 625 /*
626 626 * It is possible that the contract was adopted by someone else
627 627 * while its lock was dropped. It isn't possible for the
628 628 * contract to have been inherited by a different regent
629 629 * contract.
630 630 */
631 631 if (ct->ct_state != CTS_INHERITED) {
632 632 mutex_exit(&parent->ct_lock);
633 633 mutex_exit(&ct->ct_lock);
634 634 return (EBUSY);
635 635 }
636 636 ASSERT(ct->ct_regent == parent);
637 637
638 638 ct->ct_state = CTS_OWNED;
639 639
640 640 contract_process_adopt(ct, p);
641 641
642 642 mutex_enter(&p->p_lock);
643 643 ct->ct_owner = p;
644 644 VERIFY(avl_find(&p->p_ct_held, ct, &where) == NULL);
645 645 avl_insert(&p->p_ct_held, ct, where);
646 646 mutex_exit(&p->p_lock);
647 647
648 648 ASSERT(ct->ct_owner->p_ct_equeue);
649 649 ASSERT(ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index]);
650 650 q = ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index];
651 651 cte_copy(&ct->ct_events, q);
652 652 mutex_exit(&ct->ct_lock);
653 653
654 654 return (0);
655 655 }
656 656
657 657 /*
658 658 * contract_ack
659 659 *
660 660 * Acknowledges receipt of a critical event.
661 661 */
662 662 int
663 663 contract_ack(contract_t *ct, uint64_t evid, int ack)
664 664 {
665 665 ct_kevent_t *ev;
666 666 list_t *queue = &ct->ct_events.ctq_events;
667 667 int error = ESRCH;
668 668 int nego = 0;
669 669 uint_t evtype;
670 670
671 671 ASSERT(ack == CT_ACK || ack == CT_NACK);
672 672
673 673 mutex_enter(&ct->ct_lock);
674 674 mutex_enter(&ct->ct_events.ctq_lock);
675 675 /*
676 676 * We are probably ACKing something near the head of the queue.
677 677 */
678 678 for (ev = list_head(queue); ev; ev = list_next(queue, ev)) {
679 679 if (ev->cte_id == evid) {
680 680 if (ev->cte_flags & CTE_NEG)
681 681 nego = 1;
682 682 else if (ack == CT_NACK)
683 683 break;
684 684 if ((ev->cte_flags & (CTE_INFO | CTE_ACK)) == 0) {
685 685 ev->cte_flags |= CTE_ACK;
686 686 ct->ct_evcnt--;
687 687 evtype = ev->cte_type;
688 688 error = 0;
689 689 }
690 690 break;
691 691 }
692 692 }
693 693 mutex_exit(&ct->ct_events.ctq_lock);
694 694 mutex_exit(&ct->ct_lock);
695 695
696 696 /*
697 697 * Not all critical events are negotiation events, however
698 698 * every negotiation event is a critical event. NEGEND events
699 699 * are critical events but are not negotiation events
700 700 */
701 701 if (error || !nego)
702 702 return (error);
703 703
704 704 if (ack == CT_ACK)
705 705 error = ct->ct_type->ct_type_ops->contop_ack(ct, evtype, evid);
706 706 else
707 707 error = ct->ct_type->ct_type_ops->contop_nack(ct, evtype, evid);
708 708
709 709 return (error);
710 710 }
711 711
712 712 /*ARGSUSED*/
713 713 int
714 714 contract_ack_inval(contract_t *ct, uint_t evtype, uint64_t evid)
715 715 {
716 716 cmn_err(CE_PANIC, "contract_ack_inval: unsupported call: ctid: %u",
717 717 ct->ct_id);
718 718 return (ENOSYS);
719 719 }
720 720
721 721 /*ARGSUSED*/
722 722 int
723 723 contract_qack_inval(contract_t *ct, uint_t evtype, uint64_t evid)
724 724 {
725 725 cmn_err(CE_PANIC, "contract_ack_inval: unsupported call: ctid: %u",
726 726 ct->ct_id);
727 727 return (ENOSYS);
728 728 }
729 729
730 730 /*ARGSUSED*/
731 731 int
732 732 contract_qack_notsup(contract_t *ct, uint_t evtype, uint64_t evid)
733 733 {
734 734 return (ERANGE);
735 735 }
736 736
737 737 /*
738 738 * contract_qack
739 739 *
740 740 * Asks that negotiations be extended by another time quantum
741 741 */
742 742 int
743 743 contract_qack(contract_t *ct, uint64_t evid)
744 744 {
745 745 ct_kevent_t *ev;
746 746 list_t *queue = &ct->ct_events.ctq_events;
747 747 int nego = 0;
748 748 uint_t evtype;
749 749
750 750 mutex_enter(&ct->ct_lock);
751 751 mutex_enter(&ct->ct_events.ctq_lock);
752 752
753 753 for (ev = list_head(queue); ev; ev = list_next(queue, ev)) {
754 754 if (ev->cte_id == evid) {
755 755 if ((ev->cte_flags & (CTE_NEG | CTE_ACK)) == CTE_NEG) {
756 756 evtype = ev->cte_type;
757 757 nego = 1;
758 758 }
759 759 break;
760 760 }
761 761 }
762 762 mutex_exit(&ct->ct_events.ctq_lock);
763 763 mutex_exit(&ct->ct_lock);
764 764
765 765 /*
766 766 * Only a negotiated event (which is by definition also a critical
767 767 * event) which has not yet been acknowledged can provide
768 768 * time quanta to a negotiating owner process.
769 769 */
770 770 if (!nego)
771 771 return (ESRCH);
772 772
773 773 return (ct->ct_type->ct_type_ops->contop_qack(ct, evtype, evid));
774 774 }
775 775
776 776 /*
777 777 * contract_orphan
778 778 *
779 779 * Icky-poo. This is a process-contract special, used to ACK all
780 780 * critical messages when a contract is orphaned.
781 781 */
782 782 void
783 783 contract_orphan(contract_t *ct)
784 784 {
785 785 ct_kevent_t *ev;
786 786 list_t *queue = &ct->ct_events.ctq_events;
787 787
788 788 ASSERT(MUTEX_HELD(&ct->ct_lock));
789 789 ASSERT(ct->ct_state != CTS_ORPHAN);
790 790
791 791 mutex_enter(&ct->ct_events.ctq_lock);
792 792 ct->ct_state = CTS_ORPHAN;
793 793 for (ev = list_head(queue); ev; ev = list_next(queue, ev)) {
794 794 if ((ev->cte_flags & (CTE_INFO | CTE_ACK)) == 0) {
795 795 ev->cte_flags |= CTE_ACK;
796 796 ct->ct_evcnt--;
797 797 }
798 798 }
799 799 mutex_exit(&ct->ct_events.ctq_lock);
800 800
801 801 ASSERT(ct->ct_evcnt == 0);
802 802 }
803 803
804 804 /*
805 805 * contract_destroy
806 806 *
807 807 * Explicit contract destruction. Called when contract is empty.
808 808 * The contract will actually stick around until all of its events are
809 809 * removed from the bundle and and process bundle queues, and all fds
810 810 * which refer to it are closed. See contract_dtor if you are looking
811 811 * for what destroys the contract structure.
812 812 */
813 813 void
814 814 contract_destroy(contract_t *ct)
815 815 {
816 816 ASSERT(MUTEX_HELD(&ct->ct_lock));
817 817 ASSERT(ct->ct_state != CTS_DEAD);
818 818 ASSERT(ct->ct_owner == NULL);
819 819
820 820 ct->ct_state = CTS_DEAD;
821 821 cte_queue_drain(&ct->ct_events, 1);
822 822 mutex_exit(&ct->ct_lock);
823 823 mutex_enter(&ct->ct_type->ct_type_events.ctq_lock);
824 824 cte_trim(&ct->ct_type->ct_type_events, ct);
825 825 mutex_exit(&ct->ct_type->ct_type_events.ctq_lock);
826 826 mutex_enter(&ct->ct_lock);
827 827 ct->ct_type->ct_type_ops->contop_destroy(ct);
828 828 mutex_exit(&ct->ct_lock);
829 829 contract_rele(ct);
830 830 }
831 831
832 832 /*
833 833 * contract_vnode_get
834 834 *
835 835 * Obtains the contract directory vnode for this contract, if there is
836 836 * one. The caller must VN_RELE the vnode when they are through using
837 837 * it.
838 838 */
839 839 vnode_t *
840 840 contract_vnode_get(contract_t *ct, vfs_t *vfsp)
841 841 {
842 842 contract_vnode_t *ctv;
843 843 vnode_t *vp = NULL;
844 844
845 845 mutex_enter(&ct->ct_lock);
846 846 for (ctv = list_head(&ct->ct_vnodes); ctv != NULL;
847 847 ctv = list_next(&ct->ct_vnodes, ctv))
848 848 if (ctv->ctv_vnode->v_vfsp == vfsp) {
849 849 vp = ctv->ctv_vnode;
850 850 VN_HOLD(vp);
851 851 break;
852 852 }
853 853 mutex_exit(&ct->ct_lock);
854 854 return (vp);
855 855 }
856 856
857 857 /*
858 858 * contract_vnode_set
859 859 *
860 860 * Sets the contract directory vnode for this contract. We don't hold
861 861 * a reference on the vnode because we don't want to prevent it from
862 862 * being freed. The vnode's inactive entry point will take care of
863 863 * notifying us when it should be removed.
864 864 */
865 865 void
866 866 contract_vnode_set(contract_t *ct, contract_vnode_t *ctv, vnode_t *vnode)
867 867 {
868 868 mutex_enter(&ct->ct_lock);
869 869 ctv->ctv_vnode = vnode;
870 870 list_insert_head(&ct->ct_vnodes, ctv);
871 871 mutex_exit(&ct->ct_lock);
872 872 }
873 873
874 874 /*
875 875 * contract_vnode_clear
876 876 *
877 877 * Removes this vnode as the contract directory vnode for this
878 878 * contract. Called from a contract directory's inactive entry point,
879 879 * this may return 0 indicating that the vnode gained another reference
880 880 * because of a simultaneous call to contract_vnode_get.
881 881 */
882 882 int
883 883 contract_vnode_clear(contract_t *ct, contract_vnode_t *ctv)
884 884 {
885 885 vnode_t *vp = ctv->ctv_vnode;
886 886 int result;
887 887
888 888 mutex_enter(&ct->ct_lock);
889 889 mutex_enter(&vp->v_lock);
890 890 if (vp->v_count == 1) {
891 891 list_remove(&ct->ct_vnodes, ctv);
892 892 result = 1;
893 893 } else {
894 894 vp->v_count--;
895 895 result = 0;
896 896 }
897 897 mutex_exit(&vp->v_lock);
898 898 mutex_exit(&ct->ct_lock);
899 899
900 900 return (result);
901 901 }
902 902
903 903 /*
904 904 * contract_exit
905 905 *
906 906 * Abandons all contracts held by process p, and drains process p's
907 907 * bundle queues. Called on process exit.
908 908 */
909 909 void
910 910 contract_exit(proc_t *p)
911 911 {
912 912 contract_t *ct;
913 913 void *cookie = NULL;
914 914 int i;
915 915
916 916 ASSERT(p == curproc);
917 917
918 918 /*
919 919 * Abandon held contracts. contract_abandon knows enough not
920 920 * to remove the contract from the list a second time. We are
921 921 * exiting, so no locks are needed here. But because
922 922 * contract_abandon will take p_lock, we need to make sure we
923 923 * aren't holding it.
924 924 */
925 925 ASSERT(MUTEX_NOT_HELD(&p->p_lock));
926 926 while ((ct = avl_destroy_nodes(&p->p_ct_held, &cookie)) != NULL)
927 927 VERIFY(contract_abandon(ct, p, 0) == 0);
928 928
929 929 /*
930 930 * Drain pbundles. Because a process bundle queue could have
931 931 * been passed to another process, they may not be freed right
932 932 * away.
933 933 */
934 934 if (p->p_ct_equeue) {
935 935 for (i = 0; i < CTT_MAXTYPE; i++)
936 936 if (p->p_ct_equeue[i])
937 937 cte_queue_drain(p->p_ct_equeue[i], 0);
938 938 kmem_free(p->p_ct_equeue, CTT_MAXTYPE * sizeof (ct_equeue_t *));
939 939 }
940 940 }
941 941
942 942 static int
943 943 get_time_left(struct ct_time *t)
944 944 {
945 945 clock_t ticks_elapsed;
946 946 int secs_elapsed;
947 947
948 948 if (t->ctm_total == -1)
949 949 return (-1);
950 950
951 951 ticks_elapsed = ddi_get_lbolt() - t->ctm_start;
952 952 secs_elapsed = t->ctm_total - (drv_hztousec(ticks_elapsed)/MICROSEC);
953 953 return (secs_elapsed > 0 ? secs_elapsed : 0);
954 954 }
955 955
956 956 /*
957 957 * contract_status_common
958 958 *
959 959 * Populates a ct_status structure. Used by contract types in their
960 960 * status entry points and ctfs when only common information is
961 961 * requested.
962 962 */
963 963 void
964 964 contract_status_common(contract_t *ct, zone_t *zone, void *status,
965 965 model_t model)
966 966 {
967 967 STRUCT_HANDLE(ct_status, lstatus);
968 968
969 969 STRUCT_SET_HANDLE(lstatus, model, status);
970 970 ASSERT(MUTEX_HELD(&ct->ct_lock));
971 971 if (zone->zone_uniqid == GLOBAL_ZONEUNIQID ||
972 972 zone->zone_uniqid == ct->ct_czuniqid) {
973 973 zone_t *czone;
974 974 zoneid_t zoneid = -1;
975 975
976 976 /*
977 977 * Contracts don't have holds on the zones they were
978 978 * created by. If the contract's zone no longer
979 979 * exists, we say its zoneid is -1.
980 980 */
981 981 if (zone->zone_uniqid == ct->ct_czuniqid ||
982 982 ct->ct_czuniqid == GLOBAL_ZONEUNIQID) {
983 983 zoneid = ct->ct_zoneid;
984 984 } else if ((czone = zone_find_by_id(ct->ct_zoneid)) != NULL) {
985 985 if (czone->zone_uniqid == ct->ct_mzuniqid)
986 986 zoneid = ct->ct_zoneid;
987 987 zone_rele(czone);
988 988 }
989 989
990 990 STRUCT_FSET(lstatus, ctst_zoneid, zoneid);
991 991 STRUCT_FSET(lstatus, ctst_holder,
992 992 (ct->ct_state == CTS_OWNED) ? ct->ct_owner->p_pid :
993 993 (ct->ct_state == CTS_INHERITED) ? ct->ct_regent->ct_id : 0);
994 994 STRUCT_FSET(lstatus, ctst_state, ct->ct_state);
995 995 } else {
996 996 /*
997 997 * We are looking at a contract which was created by a
998 998 * process outside of our zone. We provide fake zone,
999 999 * holder, and state information.
1000 1000 */
1001 1001
1002 1002 STRUCT_FSET(lstatus, ctst_zoneid, zone->zone_id);
1003 1003 /*
1004 1004 * Since "zone" can't disappear until the calling ctfs
1005 1005 * is unmounted, zone_zsched must be valid.
1006 1006 */
1007 1007 STRUCT_FSET(lstatus, ctst_holder, (ct->ct_state < CTS_ORPHAN) ?
1008 1008 zone->zone_zsched->p_pid : 0);
1009 1009 STRUCT_FSET(lstatus, ctst_state, (ct->ct_state < CTS_ORPHAN) ?
1010 1010 CTS_OWNED : ct->ct_state);
1011 1011 }
1012 1012 STRUCT_FSET(lstatus, ctst_nevents, ct->ct_evcnt);
1013 1013 STRUCT_FSET(lstatus, ctst_ntime, get_time_left(&ct->ct_ntime));
1014 1014 STRUCT_FSET(lstatus, ctst_qtime, get_time_left(&ct->ct_qtime));
1015 1015 STRUCT_FSET(lstatus, ctst_nevid,
1016 1016 ct->ct_nevent ? ct->ct_nevent->cte_id : 0);
1017 1017 STRUCT_FSET(lstatus, ctst_critical, ct->ct_ev_crit);
1018 1018 STRUCT_FSET(lstatus, ctst_informative, ct->ct_ev_info);
1019 1019 STRUCT_FSET(lstatus, ctst_cookie, ct->ct_cookie);
1020 1020 STRUCT_FSET(lstatus, ctst_type, ct->ct_type->ct_type_index);
1021 1021 STRUCT_FSET(lstatus, ctst_id, ct->ct_id);
1022 1022 }
1023 1023
1024 1024 /*
1025 1025 * contract_checkcred
1026 1026 *
1027 1027 * Determines if the specified contract is owned by a process with the
1028 1028 * same effective uid as the specified credential. The caller must
1029 1029 * ensure that the uid spaces are the same. Returns 1 on success.
1030 1030 */
1031 1031 static int
1032 1032 contract_checkcred(contract_t *ct, const cred_t *cr)
1033 1033 {
1034 1034 proc_t *p;
1035 1035 int fail = 1;
1036 1036
1037 1037 mutex_enter(&ct->ct_lock);
1038 1038 if ((p = ct->ct_owner) != NULL) {
1039 1039 mutex_enter(&p->p_crlock);
1040 1040 fail = crgetuid(cr) != crgetuid(p->p_cred);
1041 1041 mutex_exit(&p->p_crlock);
1042 1042 }
1043 1043 mutex_exit(&ct->ct_lock);
1044 1044
1045 1045 return (!fail);
1046 1046 }
1047 1047
1048 1048 /*
1049 1049 * contract_owned
1050 1050 *
1051 1051 * Determines if the specified credential can view an event generated
1052 1052 * by the specified contract. If locked is set, the contract's ct_lock
1053 1053 * is held and the caller will need to do additional work to determine
1054 1054 * if they truly can see the event. Returns 1 on success.
1055 1055 */
1056 1056 int
1057 1057 contract_owned(contract_t *ct, const cred_t *cr, int locked)
1058 1058 {
1059 1059 int owner, cmatch, zmatch;
1060 1060 uint64_t zuniqid, mzuniqid;
1061 1061 uid_t euid;
1062 1062
1063 1063 ASSERT(locked || MUTEX_NOT_HELD(&ct->ct_lock));
1064 1064
1065 1065 zuniqid = curproc->p_zone->zone_uniqid;
1066 1066 mzuniqid = contract_getzuniqid(ct);
1067 1067 euid = crgetuid(cr);
1068 1068
1069 1069 /*
1070 1070 * owner: we own the contract
1071 1071 * cmatch: we are in the creator's (and holder's) zone and our
1072 1072 * uid matches the creator's or holder's
1073 1073 * zmatch: we are in the effective zone of a contract created
1074 1074 * in the global zone, and our uid matches that of the
1075 1075 * virtualized holder's (zsched/kcred)
1076 1076 */
1077 1077 owner = (ct->ct_owner == curproc);
1078 1078 cmatch = (zuniqid == ct->ct_czuniqid) &&
1079 1079 ((ct->ct_cuid == euid) || (!locked && contract_checkcred(ct, cr)));
1080 1080 zmatch = (ct->ct_czuniqid != mzuniqid) && (zuniqid == mzuniqid) &&
1081 1081 (crgetuid(kcred) == euid);
1082 1082
1083 1083 return (owner || cmatch || zmatch);
1084 1084 }
1085 1085
1086 1086
1087 1087 /*
1088 1088 * contract_type_init
1089 1089 *
1090 1090 * Called by contract types to register themselves with the contracts
1091 1091 * framework.
1092 1092 */
1093 1093 ct_type_t *
1094 1094 contract_type_init(ct_typeid_t type, const char *name, contops_t *ops,
1095 1095 ct_f_default_t *dfault)
1096 1096 {
1097 1097 ct_type_t *result;
1098 1098
1099 1099 ASSERT(type < CTT_MAXTYPE);
1100 1100
1101 1101 result = kmem_alloc(sizeof (ct_type_t), KM_SLEEP);
1102 1102
1103 1103 mutex_init(&result->ct_type_lock, NULL, MUTEX_DEFAULT, NULL);
1104 1104 avl_create(&result->ct_type_avl, contract_compar, sizeof (contract_t),
1105 1105 offsetof(contract_t, ct_cttavl));
1106 1106 cte_queue_create(&result->ct_type_events, CTEL_BUNDLE, 20, 0);
1107 1107 result->ct_type_name = name;
1108 1108 result->ct_type_ops = ops;
1109 1109 result->ct_type_default = dfault;
1110 1110 result->ct_type_evid = 0;
1111 1111 gethrestime(&result->ct_type_timestruc);
1112 1112 result->ct_type_index = type;
1113 1113
1114 1114 ct_types[type] = result;
1115 1115
1116 1116 return (result);
1117 1117 }
1118 1118
1119 1119 /*
1120 1120 * contract_type_count
1121 1121 *
1122 1122 * Obtains the number of contracts of a particular type.
1123 1123 */
1124 1124 int
1125 1125 contract_type_count(ct_type_t *type)
1126 1126 {
1127 1127 ulong_t count;
1128 1128
1129 1129 mutex_enter(&type->ct_type_lock);
1130 1130 count = avl_numnodes(&type->ct_type_avl);
1131 1131 mutex_exit(&type->ct_type_lock);
1132 1132
1133 1133 return (count);
1134 1134 }
1135 1135
1136 1136 /*
1137 1137 * contract_type_max
1138 1138 *
1139 1139 * Obtains the maximum contract id of of a particular type.
1140 1140 */
1141 1141 ctid_t
1142 1142 contract_type_max(ct_type_t *type)
1143 1143 {
1144 1144 contract_t *ct;
1145 1145 ctid_t res;
1146 1146
1147 1147 mutex_enter(&type->ct_type_lock);
1148 1148 ct = avl_last(&type->ct_type_avl);
1149 1149 res = ct ? ct->ct_id : -1;
1150 1150 mutex_exit(&type->ct_type_lock);
1151 1151
1152 1152 return (res);
1153 1153 }
1154 1154
1155 1155 /*
1156 1156 * contract_max
1157 1157 *
1158 1158 * Obtains the maximum contract id.
1159 1159 */
1160 1160 ctid_t
1161 1161 contract_max(void)
1162 1162 {
1163 1163 contract_t *ct;
1164 1164 ctid_t res;
1165 1165
1166 1166 mutex_enter(&contract_lock);
1167 1167 ct = avl_last(&contract_avl);
1168 1168 res = ct ? ct->ct_id : -1;
1169 1169 mutex_exit(&contract_lock);
1170 1170
1171 1171 return (res);
1172 1172 }
1173 1173
1174 1174 /*
1175 1175 * contract_lookup_common
1176 1176 *
1177 1177 * Common code for contract_lookup and contract_type_lookup. Takes a
1178 1178 * pointer to an AVL tree to search in. Should be called with the
1179 1179 * appropriate tree-protecting lock held (unfortunately unassertable).
1180 1180 */
1181 1181 static ctid_t
1182 1182 contract_lookup_common(avl_tree_t *tree, uint64_t zuniqid, ctid_t current)
1183 1183 {
1184 1184 contract_t template, *ct;
1185 1185 avl_index_t where;
1186 1186 ctid_t res;
1187 1187
1188 1188 template.ct_id = current;
1189 1189 ct = avl_find(tree, &template, &where);
1190 1190 if (ct == NULL)
1191 1191 ct = avl_nearest(tree, where, AVL_AFTER);
1192 1192 if (zuniqid != GLOBAL_ZONEUNIQID)
1193 1193 while (ct && (contract_getzuniqid(ct) != zuniqid))
1194 1194 ct = AVL_NEXT(tree, ct);
1195 1195 res = ct ? ct->ct_id : -1;
1196 1196
1197 1197 return (res);
1198 1198 }
1199 1199
1200 1200 /*
1201 1201 * contract_type_lookup
1202 1202 *
1203 1203 * Returns the next type contract after the specified id, visible from
1204 1204 * the specified zone.
1205 1205 */
1206 1206 ctid_t
1207 1207 contract_type_lookup(ct_type_t *type, uint64_t zuniqid, ctid_t current)
1208 1208 {
1209 1209 ctid_t res;
1210 1210
1211 1211 mutex_enter(&type->ct_type_lock);
1212 1212 res = contract_lookup_common(&type->ct_type_avl, zuniqid, current);
1213 1213 mutex_exit(&type->ct_type_lock);
1214 1214
1215 1215 return (res);
1216 1216 }
1217 1217
1218 1218 /*
1219 1219 * contract_lookup
1220 1220 *
1221 1221 * Returns the next contract after the specified id, visible from the
1222 1222 * specified zone.
1223 1223 */
1224 1224 ctid_t
1225 1225 contract_lookup(uint64_t zuniqid, ctid_t current)
1226 1226 {
1227 1227 ctid_t res;
1228 1228
1229 1229 mutex_enter(&contract_lock);
1230 1230 res = contract_lookup_common(&contract_avl, zuniqid, current);
1231 1231 mutex_exit(&contract_lock);
1232 1232
1233 1233 return (res);
1234 1234 }
1235 1235
1236 1236 /*
1237 1237 * contract_plookup
1238 1238 *
1239 1239 * Returns the next contract held by process p after the specified id,
1240 1240 * visible from the specified zone. Made complicated by the fact that
1241 1241 * contracts visible in a zone but held by processes outside of the
1242 1242 * zone need to appear as being held by zsched to zone members.
1243 1243 */
1244 1244 ctid_t
1245 1245 contract_plookup(proc_t *p, ctid_t current, uint64_t zuniqid)
1246 1246 {
1247 1247 contract_t template, *ct;
1248 1248 avl_index_t where;
1249 1249 ctid_t res;
1250 1250
1251 1251 template.ct_id = current;
1252 1252 if (zuniqid != GLOBAL_ZONEUNIQID &&
1253 1253 (p->p_flag & (SSYS|SZONETOP)) == (SSYS|SZONETOP)) {
1254 1254 /* This is inelegant. */
1255 1255 mutex_enter(&contract_lock);
1256 1256 ct = avl_find(&contract_avl, &template, &where);
1257 1257 if (ct == NULL)
1258 1258 ct = avl_nearest(&contract_avl, where, AVL_AFTER);
1259 1259 while (ct && !(ct->ct_state < CTS_ORPHAN &&
1260 1260 contract_getzuniqid(ct) == zuniqid &&
1261 1261 ct->ct_czuniqid == GLOBAL_ZONEUNIQID))
1262 1262 ct = AVL_NEXT(&contract_avl, ct);
1263 1263 res = ct ? ct->ct_id : -1;
1264 1264 mutex_exit(&contract_lock);
1265 1265 } else {
1266 1266 mutex_enter(&p->p_lock);
1267 1267 ct = avl_find(&p->p_ct_held, &template, &where);
1268 1268 if (ct == NULL)
1269 1269 ct = avl_nearest(&p->p_ct_held, where, AVL_AFTER);
1270 1270 res = ct ? ct->ct_id : -1;
1271 1271 mutex_exit(&p->p_lock);
1272 1272 }
1273 1273
1274 1274 return (res);
1275 1275 }
1276 1276
1277 1277 /*
1278 1278 * contract_ptr_common
1279 1279 *
1280 1280 * Common code for contract_ptr and contract_type_ptr. Takes a pointer
1281 1281 * to an AVL tree to search in. Should be called with the appropriate
1282 1282 * tree-protecting lock held (unfortunately unassertable).
1283 1283 */
1284 1284 static contract_t *
1285 1285 contract_ptr_common(avl_tree_t *tree, ctid_t id, uint64_t zuniqid)
1286 1286 {
1287 1287 contract_t template, *ct;
1288 1288
1289 1289 template.ct_id = id;
1290 1290 ct = avl_find(tree, &template, NULL);
1291 1291 if (ct == NULL || (zuniqid != GLOBAL_ZONEUNIQID &&
1292 1292 contract_getzuniqid(ct) != zuniqid)) {
1293 1293 return (NULL);
1294 1294 }
1295 1295
1296 1296 /*
1297 1297 * Check to see if a thread is in the window in contract_rele
1298 1298 * between dropping the reference count and removing the
1299 1299 * contract from the type AVL.
1300 1300 */
1301 1301 mutex_enter(&ct->ct_reflock);
1302 1302 if (ct->ct_ref) {
1303 1303 ct->ct_ref++;
1304 1304 mutex_exit(&ct->ct_reflock);
1305 1305 } else {
1306 1306 mutex_exit(&ct->ct_reflock);
1307 1307 ct = NULL;
1308 1308 }
1309 1309
1310 1310 return (ct);
1311 1311 }
1312 1312
1313 1313 /*
1314 1314 * contract_type_ptr
1315 1315 *
1316 1316 * Returns a pointer to the contract with the specified id. The
1317 1317 * contract is held, so the caller needs to release the reference when
1318 1318 * it is through with the contract.
1319 1319 */
1320 1320 contract_t *
1321 1321 contract_type_ptr(ct_type_t *type, ctid_t id, uint64_t zuniqid)
1322 1322 {
1323 1323 contract_t *ct;
1324 1324
1325 1325 mutex_enter(&type->ct_type_lock);
1326 1326 ct = contract_ptr_common(&type->ct_type_avl, id, zuniqid);
1327 1327 mutex_exit(&type->ct_type_lock);
1328 1328
1329 1329 return (ct);
1330 1330 }
1331 1331
1332 1332 /*
1333 1333 * contract_ptr
1334 1334 *
1335 1335 * Returns a pointer to the contract with the specified id. The
1336 1336 * contract is held, so the caller needs to release the reference when
1337 1337 * it is through with the contract.
1338 1338 */
1339 1339 contract_t *
1340 1340 contract_ptr(ctid_t id, uint64_t zuniqid)
1341 1341 {
1342 1342 contract_t *ct;
1343 1343
1344 1344 mutex_enter(&contract_lock);
1345 1345 ct = contract_ptr_common(&contract_avl, id, zuniqid);
1346 1346 mutex_exit(&contract_lock);
1347 1347
1348 1348 return (ct);
1349 1349 }
1350 1350
1351 1351 /*
1352 1352 * contract_type_time
1353 1353 *
1354 1354 * Obtains the last time a contract of a particular type was created.
1355 1355 */
1356 1356 void
1357 1357 contract_type_time(ct_type_t *type, timestruc_t *time)
1358 1358 {
1359 1359 mutex_enter(&type->ct_type_lock);
1360 1360 *time = type->ct_type_timestruc;
1361 1361 mutex_exit(&type->ct_type_lock);
1362 1362 }
1363 1363
1364 1364 /*
1365 1365 * contract_type_bundle
1366 1366 *
1367 1367 * Obtains a type's bundle queue.
1368 1368 */
1369 1369 ct_equeue_t *
1370 1370 contract_type_bundle(ct_type_t *type)
1371 1371 {
1372 1372 return (&type->ct_type_events);
1373 1373 }
1374 1374
1375 1375 /*
1376 1376 * contract_type_pbundle
1377 1377 *
1378 1378 * Obtain's a process's bundle queue. If one doesn't exist, one is
1379 1379 * created. Often used simply to ensure that a bundle queue is
1380 1380 * allocated.
1381 1381 */
1382 1382 ct_equeue_t *
1383 1383 contract_type_pbundle(ct_type_t *type, proc_t *pp)
1384 1384 {
1385 1385 /*
1386 1386 * If there isn't an array of bundle queues, allocate one.
1387 1387 */
1388 1388 if (pp->p_ct_equeue == NULL) {
1389 1389 size_t size = CTT_MAXTYPE * sizeof (ct_equeue_t *);
1390 1390 ct_equeue_t **qa = kmem_zalloc(size, KM_SLEEP);
1391 1391
1392 1392 mutex_enter(&pp->p_lock);
1393 1393 if (pp->p_ct_equeue)
1394 1394 kmem_free(qa, size);
1395 1395 else
1396 1396 pp->p_ct_equeue = qa;
1397 1397 mutex_exit(&pp->p_lock);
1398 1398 }
1399 1399
1400 1400 /*
1401 1401 * If there isn't a bundle queue of the required type, allocate
1402 1402 * one.
1403 1403 */
1404 1404 if (pp->p_ct_equeue[type->ct_type_index] == NULL) {
1405 1405 ct_equeue_t *q = kmem_zalloc(sizeof (ct_equeue_t), KM_SLEEP);
1406 1406 cte_queue_create(q, CTEL_PBUNDLE, 20, 1);
1407 1407
1408 1408 mutex_enter(&pp->p_lock);
1409 1409 if (pp->p_ct_equeue[type->ct_type_index])
1410 1410 cte_queue_drain(q, 0);
1411 1411 else
1412 1412 pp->p_ct_equeue[type->ct_type_index] = q;
1413 1413 mutex_exit(&pp->p_lock);
1414 1414 }
1415 1415
1416 1416 return (pp->p_ct_equeue[type->ct_type_index]);
1417 1417 }
1418 1418
1419 1419 /*
1420 1420 * ctparam_copyin
1421 1421 *
1422 1422 * copyin a ct_param_t for CT_TSET or CT_TGET commands.
1423 1423 * If ctparam_copyout() is not called after ctparam_copyin(), then
1424 1424 * the caller must kmem_free() the buffer pointed by kparam->ctpm_kbuf.
1425 1425 *
1426 1426 * The copyin/out of ct_param_t is not done in ctmpl_set() and ctmpl_get()
1427 1427 * because prctioctl() calls ctmpl_set() and ctmpl_get() while holding a
1428 1428 * process lock.
1429 1429 */
1430 1430 int
1431 1431 ctparam_copyin(const void *uaddr, ct_kparam_t *kparam, int flag, int cmd)
1432 1432 {
1433 1433 uint32_t size;
1434 1434 void *ubuf;
1435 1435 ct_param_t *param = &kparam->param;
1436 1436 STRUCT_DECL(ct_param, uarg);
1437 1437
1438 1438 STRUCT_INIT(uarg, flag);
1439 1439 if (copyin(uaddr, STRUCT_BUF(uarg), STRUCT_SIZE(uarg)))
1440 1440 return (EFAULT);
1441 1441 size = STRUCT_FGET(uarg, ctpm_size);
1442 1442 ubuf = STRUCT_FGETP(uarg, ctpm_value);
1443 1443
1444 1444 if (size > CT_PARAM_MAX_SIZE || size == 0)
1445 1445 return (EINVAL);
1446 1446
1447 1447 kparam->ctpm_kbuf = kmem_alloc(size, KM_SLEEP);
1448 1448 if (cmd == CT_TSET) {
1449 1449 if (copyin(ubuf, kparam->ctpm_kbuf, size)) {
1450 1450 kmem_free(kparam->ctpm_kbuf, size);
1451 1451 return (EFAULT);
1452 1452 }
1453 1453 }
1454 1454 param->ctpm_id = STRUCT_FGET(uarg, ctpm_id);
1455 1455 param->ctpm_size = size;
1456 1456 param->ctpm_value = ubuf;
1457 1457 kparam->ret_size = 0;
1458 1458
1459 1459 return (0);
1460 1460 }
1461 1461
1462 1462 /*
1463 1463 * ctparam_copyout
1464 1464 *
1465 1465 * copyout a ct_kparam_t and frees the buffer pointed by the member
1466 1466 * ctpm_kbuf of ct_kparam_t
1467 1467 */
1468 1468 int
1469 1469 ctparam_copyout(ct_kparam_t *kparam, void *uaddr, int flag)
1470 1470 {
1471 1471 int r = 0;
1472 1472 ct_param_t *param = &kparam->param;
1473 1473 STRUCT_DECL(ct_param, uarg);
1474 1474
1475 1475 STRUCT_INIT(uarg, flag);
1476 1476
1477 1477 STRUCT_FSET(uarg, ctpm_id, param->ctpm_id);
1478 1478 STRUCT_FSET(uarg, ctpm_size, kparam->ret_size);
1479 1479 STRUCT_FSETP(uarg, ctpm_value, param->ctpm_value);
1480 1480 if (copyout(STRUCT_BUF(uarg), uaddr, STRUCT_SIZE(uarg))) {
1481 1481 r = EFAULT;
1482 1482 goto error;
1483 1483 }
1484 1484 if (copyout(kparam->ctpm_kbuf, param->ctpm_value,
1485 1485 MIN(kparam->ret_size, param->ctpm_size))) {
1486 1486 r = EFAULT;
1487 1487 }
1488 1488
1489 1489 error:
1490 1490 kmem_free(kparam->ctpm_kbuf, param->ctpm_size);
1491 1491
1492 1492 return (r);
1493 1493 }
1494 1494
1495 1495 /*
1496 1496 * ctmpl_free
1497 1497 *
1498 1498 * Frees a template.
1499 1499 */
1500 1500 void
1501 1501 ctmpl_free(ct_template_t *template)
1502 1502 {
1503 1503 mutex_destroy(&template->ctmpl_lock);
1504 1504 template->ctmpl_ops->ctop_free(template);
1505 1505 }
1506 1506
1507 1507 /*
1508 1508 * ctmpl_dup
1509 1509 *
1510 1510 * Creates a copy of a template.
1511 1511 */
1512 1512 ct_template_t *
1513 1513 ctmpl_dup(ct_template_t *template)
1514 1514 {
1515 1515 ct_template_t *new;
1516 1516
1517 1517 if (template == NULL)
1518 1518 return (NULL);
1519 1519
1520 1520 new = template->ctmpl_ops->ctop_dup(template);
1521 1521 /*
1522 1522 * ctmpl_lock was taken by ctop_dup's call to ctmpl_copy and
1523 1523 * should have remain held until now.
1524 1524 */
1525 1525 mutex_exit(&template->ctmpl_lock);
1526 1526
1527 1527 return (new);
1528 1528 }
1529 1529
1530 1530 /*
1531 1531 * ctmpl_set
1532 1532 *
1533 1533 * Sets the requested terms of a template.
1534 1534 */
1535 1535 int
1536 1536 ctmpl_set(ct_template_t *template, ct_kparam_t *kparam, const cred_t *cr)
1537 1537 {
1538 1538 int result = 0;
1539 1539 ct_param_t *param = &kparam->param;
1540 1540 uint64_t param_value;
1541 1541
1542 1542 if (param->ctpm_id == CTP_COOKIE ||
1543 1543 param->ctpm_id == CTP_EV_INFO ||
1544 1544 param->ctpm_id == CTP_EV_CRITICAL) {
1545 1545 if (param->ctpm_size < sizeof (uint64_t)) {
1546 1546 return (EINVAL);
1547 1547 } else {
1548 1548 param_value = *(uint64_t *)kparam->ctpm_kbuf;
1549 1549 }
1550 1550 }
1551 1551
1552 1552 mutex_enter(&template->ctmpl_lock);
1553 1553 switch (param->ctpm_id) {
1554 1554 case CTP_COOKIE:
1555 1555 template->ctmpl_cookie = param_value;
1556 1556 break;
1557 1557 case CTP_EV_INFO:
1558 1558 if (param_value & ~(uint64_t)template->ctmpl_ops->allevents)
1559 1559 result = EINVAL;
1560 1560 else
1561 1561 template->ctmpl_ev_info = param_value;
1562 1562 break;
1563 1563 case CTP_EV_CRITICAL:
1564 1564 if (param_value & ~(uint64_t)template->ctmpl_ops->allevents) {
1565 1565 result = EINVAL;
1566 1566 break;
1567 1567 } else if ((~template->ctmpl_ev_crit & param_value) == 0) {
1568 1568 /*
1569 1569 * Assume that a pure reduction of the critical
1570 1570 * set is allowed by the contract type.
1571 1571 */
1572 1572 template->ctmpl_ev_crit = param_value;
1573 1573 break;
1574 1574 }
1575 1575 /*
1576 1576 * There may be restrictions on what we can make
1577 1577 * critical, so we defer to the judgement of the
1578 1578 * contract type.
1579 1579 */
1580 1580 /* FALLTHROUGH */
1581 1581 default:
1582 1582 result = template->ctmpl_ops->ctop_set(template, kparam, cr);
1583 1583 }
1584 1584 mutex_exit(&template->ctmpl_lock);
1585 1585
1586 1586 return (result);
1587 1587 }
1588 1588
1589 1589 /*
1590 1590 * ctmpl_get
1591 1591 *
1592 1592 * Obtains the requested terms from a template.
1593 1593 *
1594 1594 * If the term requested is a variable-sized term and the buffer
1595 1595 * provided is too small for the data, we truncate the data and return
1596 1596 * the buffer size necessary to fit the term in kparam->ret_size. If the
1597 1597 * term requested is fix-sized (uint64_t) and the buffer provided is too
1598 1598 * small, we return EINVAL. This should never happen if you're using
1599 1599 * libcontract(3LIB), only if you call ioctl with a hand constructed
1600 1600 * ct_param_t argument.
1601 1601 *
1602 1602 * Currently, only contract specific parameters have variable-sized
1603 1603 * parameters.
1604 1604 */
1605 1605 int
1606 1606 ctmpl_get(ct_template_t *template, ct_kparam_t *kparam)
1607 1607 {
1608 1608 int result = 0;
1609 1609 ct_param_t *param = &kparam->param;
1610 1610 uint64_t *param_value;
1611 1611
1612 1612 if (param->ctpm_id == CTP_COOKIE ||
1613 1613 param->ctpm_id == CTP_EV_INFO ||
1614 1614 param->ctpm_id == CTP_EV_CRITICAL) {
1615 1615 if (param->ctpm_size < sizeof (uint64_t)) {
1616 1616 return (EINVAL);
1617 1617 } else {
1618 1618 param_value = kparam->ctpm_kbuf;
1619 1619 kparam->ret_size = sizeof (uint64_t);
1620 1620 }
1621 1621 }
1622 1622
1623 1623 mutex_enter(&template->ctmpl_lock);
1624 1624 switch (param->ctpm_id) {
1625 1625 case CTP_COOKIE:
1626 1626 *param_value = template->ctmpl_cookie;
1627 1627 break;
1628 1628 case CTP_EV_INFO:
1629 1629 *param_value = template->ctmpl_ev_info;
1630 1630 break;
1631 1631 case CTP_EV_CRITICAL:
1632 1632 *param_value = template->ctmpl_ev_crit;
1633 1633 break;
1634 1634 default:
1635 1635 result = template->ctmpl_ops->ctop_get(template, kparam);
1636 1636 }
1637 1637 mutex_exit(&template->ctmpl_lock);
1638 1638
1639 1639 return (result);
1640 1640 }
1641 1641
1642 1642 /*
1643 1643 * ctmpl_makecurrent
1644 1644 *
1645 1645 * Used by ctmpl_activate and ctmpl_clear to set the current thread's
1646 1646 * active template. Frees the old active template, if there was one.
1647 1647 */
1648 1648 static void
1649 1649 ctmpl_makecurrent(ct_template_t *template, ct_template_t *new)
1650 1650 {
1651 1651 klwp_t *curlwp = ttolwp(curthread);
1652 1652 proc_t *p = curproc;
1653 1653 ct_template_t *old;
1654 1654
1655 1655 mutex_enter(&p->p_lock);
1656 1656 old = curlwp->lwp_ct_active[template->ctmpl_type->ct_type_index];
1657 1657 curlwp->lwp_ct_active[template->ctmpl_type->ct_type_index] = new;
1658 1658 mutex_exit(&p->p_lock);
1659 1659
1660 1660 if (old)
1661 1661 ctmpl_free(old);
1662 1662 }
1663 1663
1664 1664 /*
1665 1665 * ctmpl_activate
1666 1666 *
1667 1667 * Copy the specified template as the current thread's activate
1668 1668 * template of that type.
1669 1669 */
1670 1670 void
1671 1671 ctmpl_activate(ct_template_t *template)
1672 1672 {
1673 1673 ctmpl_makecurrent(template, ctmpl_dup(template));
1674 1674 }
1675 1675
1676 1676 /*
1677 1677 * ctmpl_clear
1678 1678 *
1679 1679 * Clears the current thread's activate template of the same type as
1680 1680 * the specified template.
1681 1681 */
1682 1682 void
1683 1683 ctmpl_clear(ct_template_t *template)
1684 1684 {
1685 1685 ctmpl_makecurrent(template, NULL);
1686 1686 }
1687 1687
1688 1688 /*
1689 1689 * ctmpl_create
1690 1690 *
1691 1691 * Creates a new contract using the specified template.
1692 1692 */
1693 1693 int
1694 1694 ctmpl_create(ct_template_t *template, ctid_t *ctidp)
1695 1695 {
1696 1696 return (template->ctmpl_ops->ctop_create(template, ctidp));
1697 1697 }
1698 1698
1699 1699 /*
1700 1700 * ctmpl_init
1701 1701 *
1702 1702 * Initializes the common portion of a new contract template.
1703 1703 */
1704 1704 void
1705 1705 ctmpl_init(ct_template_t *new, ctmplops_t *ops, ct_type_t *type, void *data)
1706 1706 {
1707 1707 mutex_init(&new->ctmpl_lock, NULL, MUTEX_DEFAULT, NULL);
1708 1708 new->ctmpl_ops = ops;
1709 1709 new->ctmpl_type = type;
1710 1710 new->ctmpl_data = data;
1711 1711 new->ctmpl_ev_info = new->ctmpl_ev_crit = 0;
1712 1712 new->ctmpl_cookie = 0;
1713 1713 }
1714 1714
1715 1715 /*
1716 1716 * ctmpl_copy
1717 1717 *
1718 1718 * Copies the common portions of a contract template. Intended for use
1719 1719 * by a contract type's ctop_dup template op. Returns with the old
1720 1720 * template's lock held, which will should remain held until the
1721 1721 * template op returns (it is dropped by ctmpl_dup).
1722 1722 */
1723 1723 void
1724 1724 ctmpl_copy(ct_template_t *new, ct_template_t *old)
1725 1725 {
1726 1726 mutex_init(&new->ctmpl_lock, NULL, MUTEX_DEFAULT, NULL);
1727 1727 mutex_enter(&old->ctmpl_lock);
1728 1728 new->ctmpl_ops = old->ctmpl_ops;
1729 1729 new->ctmpl_type = old->ctmpl_type;
1730 1730 new->ctmpl_ev_crit = old->ctmpl_ev_crit;
1731 1731 new->ctmpl_ev_info = old->ctmpl_ev_info;
1732 1732 new->ctmpl_cookie = old->ctmpl_cookie;
1733 1733 }
1734 1734
1735 1735 /*
1736 1736 * ctmpl_create_inval
1737 1737 *
1738 1738 * Returns EINVAL. Provided for the convenience of those contract
1739 1739 * types which don't support ct_tmpl_create(3contract) and would
1740 1740 * otherwise need to create their own stub for the ctop_create template
1741 1741 * op.
1742 1742 */
1743 1743 /*ARGSUSED*/
1744 1744 int
1745 1745 ctmpl_create_inval(ct_template_t *template, ctid_t *ctidp)
1746 1746 {
1747 1747 return (EINVAL);
1748 1748 }
1749 1749
1750 1750
1751 1751 /*
1752 1752 * cte_queue_create
1753 1753 *
1754 1754 * Initializes a queue of a particular type. If dynamic is set, the
1755 1755 * queue is to be freed when its last listener is removed after being
1756 1756 * drained.
1757 1757 */
1758 1758 static void
1759 1759 cte_queue_create(ct_equeue_t *q, ct_listnum_t list, int maxinf, int dynamic)
1760 1760 {
1761 1761 mutex_init(&q->ctq_lock, NULL, MUTEX_DEFAULT, NULL);
1762 1762 q->ctq_listno = list;
1763 1763 list_create(&q->ctq_events, sizeof (ct_kevent_t),
1764 1764 offsetof(ct_kevent_t, cte_nodes[list].ctm_node));
1765 1765 list_create(&q->ctq_listeners, sizeof (ct_listener_t),
1766 1766 offsetof(ct_listener_t, ctl_allnode));
1767 1767 list_create(&q->ctq_tail, sizeof (ct_listener_t),
1768 1768 offsetof(ct_listener_t, ctl_tailnode));
1769 1769 gethrestime(&q->ctq_atime);
1770 1770 q->ctq_nlisteners = 0;
1771 1771 q->ctq_nreliable = 0;
1772 1772 q->ctq_ninf = 0;
1773 1773 q->ctq_max = maxinf;
1774 1774
1775 1775 /*
1776 1776 * Bundle queues and contract queues are embedded in other
1777 1777 * structures and are implicitly referenced counted by virtue
1778 1778 * of their vnodes' indirect hold on their contracts. Process
1779 1779 * bundle queues are dynamically allocated and may persist
1780 1780 * after the death of the process, so they must be explicitly
1781 1781 * reference counted.
1782 1782 */
1783 1783 q->ctq_flags = dynamic ? CTQ_REFFED : 0;
1784 1784 }
1785 1785
1786 1786 /*
1787 1787 * cte_queue_destroy
1788 1788 *
1789 1789 * Destroys the specified queue. The queue is freed if referenced
1790 1790 * counted.
1791 1791 */
1792 1792 static void
1793 1793 cte_queue_destroy(ct_equeue_t *q)
1794 1794 {
1795 1795 ASSERT(q->ctq_flags & CTQ_DEAD);
1796 1796 ASSERT(q->ctq_nlisteners == 0);
1797 1797 ASSERT(q->ctq_nreliable == 0);
1798 1798 list_destroy(&q->ctq_events);
1799 1799 list_destroy(&q->ctq_listeners);
1800 1800 list_destroy(&q->ctq_tail);
1801 1801 mutex_destroy(&q->ctq_lock);
1802 1802 if (q->ctq_flags & CTQ_REFFED)
1803 1803 kmem_free(q, sizeof (ct_equeue_t));
1804 1804 }
1805 1805
1806 1806 /*
1807 1807 * cte_hold
1808 1808 *
1809 1809 * Takes a hold on the specified event.
1810 1810 */
1811 1811 static void
1812 1812 cte_hold(ct_kevent_t *e)
1813 1813 {
1814 1814 mutex_enter(&e->cte_lock);
1815 1815 ASSERT(e->cte_refs > 0);
1816 1816 e->cte_refs++;
1817 1817 mutex_exit(&e->cte_lock);
1818 1818 }
1819 1819
1820 1820 /*
1821 1821 * cte_rele
1822 1822 *
1823 1823 * Releases a hold on the specified event. If the caller had the last
1824 1824 * reference, frees the event and releases its hold on the contract
1825 1825 * that generated it.
1826 1826 */
1827 1827 static void
1828 1828 cte_rele(ct_kevent_t *e)
1829 1829 {
1830 1830 mutex_enter(&e->cte_lock);
1831 1831 ASSERT(e->cte_refs > 0);
1832 1832 if (--e->cte_refs) {
1833 1833 mutex_exit(&e->cte_lock);
1834 1834 return;
1835 1835 }
1836 1836
1837 1837 contract_rele(e->cte_contract);
1838 1838
1839 1839 mutex_destroy(&e->cte_lock);
1840 1840 if (e->cte_data)
1841 1841 nvlist_free(e->cte_data);
1842 1842 if (e->cte_gdata)
1843 1843 nvlist_free(e->cte_gdata);
1844 1844 kmem_free(e, sizeof (ct_kevent_t));
1845 1845 }
1846 1846
1847 1847 /*
1848 1848 * cte_qrele
1849 1849 *
1850 1850 * Remove this listener's hold on the specified event, removing and
1851 1851 * releasing the queue's hold on the event if appropriate.
1852 1852 */
1853 1853 static void
1854 1854 cte_qrele(ct_equeue_t *q, ct_listener_t *l, ct_kevent_t *e)
1855 1855 {
1856 1856 ct_member_t *member = &e->cte_nodes[q->ctq_listno];
1857 1857
1858 1858 ASSERT(MUTEX_HELD(&q->ctq_lock));
1859 1859
1860 1860 if (l->ctl_flags & CTLF_RELIABLE)
1861 1861 member->ctm_nreliable--;
1862 1862 if ((--member->ctm_refs == 0) && member->ctm_trimmed) {
1863 1863 member->ctm_trimmed = 0;
1864 1864 list_remove(&q->ctq_events, e);
1865 1865 cte_rele(e);
1866 1866 }
1867 1867 }
1868 1868
1869 1869 /*
1870 1870 * cte_qmove
1871 1871 *
1872 1872 * Move this listener to the specified event in the queue.
1873 1873 */
1874 1874 static ct_kevent_t *
1875 1875 cte_qmove(ct_equeue_t *q, ct_listener_t *l, ct_kevent_t *e)
1876 1876 {
1877 1877 ct_kevent_t *olde;
1878 1878
1879 1879 ASSERT(MUTEX_HELD(&q->ctq_lock));
1880 1880 ASSERT(l->ctl_equeue == q);
1881 1881
1882 1882 if ((olde = l->ctl_position) == NULL)
1883 1883 list_remove(&q->ctq_tail, l);
1884 1884
1885 1885 while (e != NULL && e->cte_nodes[q->ctq_listno].ctm_trimmed)
1886 1886 e = list_next(&q->ctq_events, e);
1887 1887
1888 1888 if (e != NULL) {
1889 1889 e->cte_nodes[q->ctq_listno].ctm_refs++;
1890 1890 if (l->ctl_flags & CTLF_RELIABLE)
1891 1891 e->cte_nodes[q->ctq_listno].ctm_nreliable++;
1892 1892 } else {
1893 1893 list_insert_tail(&q->ctq_tail, l);
1894 1894 }
1895 1895
1896 1896 l->ctl_position = e;
1897 1897 if (olde)
1898 1898 cte_qrele(q, l, olde);
1899 1899
1900 1900 return (e);
1901 1901 }
1902 1902
1903 1903 /*
1904 1904 * cte_checkcred
1905 1905 *
1906 1906 * Determines if the specified event's contract is owned by a process
1907 1907 * with the same effective uid as the specified credential. Called
1908 1908 * after a failed call to contract_owned with locked set. Because it
1909 1909 * drops the queue lock, its caller (cte_qreadable) needs to make sure
1910 1910 * we're still in the same place after we return. Returns 1 on
1911 1911 * success.
1912 1912 */
1913 1913 static int
1914 1914 cte_checkcred(ct_equeue_t *q, ct_kevent_t *e, const cred_t *cr)
1915 1915 {
1916 1916 int result;
1917 1917 contract_t *ct = e->cte_contract;
1918 1918
1919 1919 cte_hold(e);
1920 1920 mutex_exit(&q->ctq_lock);
1921 1921 result = curproc->p_zone->zone_uniqid == ct->ct_czuniqid &&
1922 1922 contract_checkcred(ct, cr);
1923 1923 mutex_enter(&q->ctq_lock);
1924 1924 cte_rele(e);
1925 1925
1926 1926 return (result);
1927 1927 }
1928 1928
1929 1929 /*
1930 1930 * cte_qreadable
1931 1931 *
1932 1932 * Ensures that the listener is pointing to a valid event that the
1933 1933 * caller has the credentials to read. Returns 0 if we can read the
1934 1934 * event we're pointing to.
1935 1935 */
1936 1936 static int
1937 1937 cte_qreadable(ct_equeue_t *q, ct_listener_t *l, const cred_t *cr,
1938 1938 uint64_t zuniqid, int crit)
1939 1939 {
1940 1940 ct_kevent_t *e, *next;
1941 1941 contract_t *ct;
1942 1942
1943 1943 ASSERT(MUTEX_HELD(&q->ctq_lock));
1944 1944 ASSERT(l->ctl_equeue == q);
1945 1945
1946 1946 if (l->ctl_flags & CTLF_COPYOUT)
1947 1947 return (1);
1948 1948
1949 1949 next = l->ctl_position;
1950 1950 while (e = cte_qmove(q, l, next)) {
1951 1951 ct = e->cte_contract;
1952 1952 /*
1953 1953 * Check obvious things first. If we are looking for a
1954 1954 * critical message, is this one? If we aren't in the
1955 1955 * global zone, is this message meant for us?
1956 1956 */
1957 1957 if ((crit && (e->cte_flags & (CTE_INFO | CTE_ACK))) ||
1958 1958 (cr != NULL && zuniqid != GLOBAL_ZONEUNIQID &&
1959 1959 zuniqid != contract_getzuniqid(ct))) {
1960 1960
1961 1961 next = list_next(&q->ctq_events, e);
1962 1962
1963 1963 /*
1964 1964 * Next, see if our effective uid equals that of owner
1965 1965 * or author of the contract. Since we are holding the
1966 1966 * queue lock, contract_owned can't always check if we
1967 1967 * have the same effective uid as the contract's
1968 1968 * owner. If it comes to that, it fails and we take
1969 1969 * the slow(er) path.
1970 1970 */
1971 1971 } else if (cr != NULL && !contract_owned(ct, cr, B_TRUE)) {
1972 1972
1973 1973 /*
1974 1974 * At this point we either don't have any claim
1975 1975 * to this contract or we match the effective
1976 1976 * uid of the owner but couldn't tell. We
1977 1977 * first test for a NULL holder so that events
1978 1978 * from orphans and inherited contracts avoid
1979 1979 * the penalty phase.
1980 1980 */
1981 1981 if (e->cte_contract->ct_owner == NULL &&
1982 1982 !secpolicy_contract_observer_choice(cr))
1983 1983 next = list_next(&q->ctq_events, e);
1984 1984
1985 1985 /*
1986 1986 * cte_checkcred will juggle locks to see if we
1987 1987 * have the same uid as the event's contract's
1988 1988 * current owner. If it succeeds, we have to
1989 1989 * make sure we are in the same point in the
1990 1990 * queue.
1991 1991 */
1992 1992 else if (cte_checkcred(q, e, cr) &&
1993 1993 l->ctl_position == e)
1994 1994 break;
1995 1995
1996 1996 /*
1997 1997 * cte_checkcred failed; see if we're in the
1998 1998 * same place.
1999 1999 */
2000 2000 else if (l->ctl_position == e)
2001 2001 if (secpolicy_contract_observer_choice(cr))
2002 2002 break;
2003 2003 else
2004 2004 next = list_next(&q->ctq_events, e);
2005 2005
2006 2006 /*
2007 2007 * cte_checkcred failed, and our position was
2008 2008 * changed. Start from there.
2009 2009 */
2010 2010 else
2011 2011 next = l->ctl_position;
2012 2012 } else {
2013 2013 break;
2014 2014 }
2015 2015 }
2016 2016
2017 2017 /*
2018 2018 * We check for CTLF_COPYOUT again in case we dropped the queue
2019 2019 * lock in cte_checkcred.
2020 2020 */
2021 2021 return ((l->ctl_flags & CTLF_COPYOUT) || (l->ctl_position == NULL));
2022 2022 }
2023 2023
2024 2024 /*
2025 2025 * cte_qwakeup
2026 2026 *
2027 2027 * Wakes up any waiting listeners and points them at the specified event.
2028 2028 */
2029 2029 static void
2030 2030 cte_qwakeup(ct_equeue_t *q, ct_kevent_t *e)
2031 2031 {
2032 2032 ct_listener_t *l;
2033 2033
2034 2034 ASSERT(MUTEX_HELD(&q->ctq_lock));
2035 2035
2036 2036 while (l = list_head(&q->ctq_tail)) {
2037 2037 list_remove(&q->ctq_tail, l);
2038 2038 e->cte_nodes[q->ctq_listno].ctm_refs++;
2039 2039 if (l->ctl_flags & CTLF_RELIABLE)
2040 2040 e->cte_nodes[q->ctq_listno].ctm_nreliable++;
2041 2041 l->ctl_position = e;
2042 2042 cv_signal(&l->ctl_cv);
2043 2043 pollwakeup(&l->ctl_pollhead, POLLIN);
2044 2044 }
2045 2045 }
2046 2046
2047 2047 /*
2048 2048 * cte_copy
2049 2049 *
2050 2050 * Copies events from the specified contract event queue to the
2051 2051 * end of the specified process bundle queue. Only called from
2052 2052 * contract_adopt.
2053 2053 *
2054 2054 * We copy to the end of the target queue instead of mixing the events
2055 2055 * in their proper order because otherwise the act of adopting a
2056 2056 * contract would require a process to reset all process bundle
2057 2057 * listeners it needed to see the new events. This would, in turn,
2058 2058 * require the process to keep track of which preexisting events had
2059 2059 * already been processed.
2060 2060 */
2061 2061 static void
2062 2062 cte_copy(ct_equeue_t *q, ct_equeue_t *newq)
2063 2063 {
2064 2064 ct_kevent_t *e, *first = NULL;
2065 2065
2066 2066 VERIFY(q->ctq_listno == CTEL_CONTRACT);
2067 2067 VERIFY(newq->ctq_listno == CTEL_PBUNDLE);
2068 2068
2069 2069 mutex_enter(&q->ctq_lock);
2070 2070 mutex_enter(&newq->ctq_lock);
2071 2071
2072 2072 /*
2073 2073 * For now, only copy critical events.
2074 2074 */
2075 2075 for (e = list_head(&q->ctq_events); e != NULL;
2076 2076 e = list_next(&q->ctq_events, e)) {
2077 2077 if ((e->cte_flags & (CTE_INFO | CTE_ACK)) == 0) {
2078 2078 if (first == NULL)
2079 2079 first = e;
2080 2080 /*
2081 2081 * It is possible for adoption to race with an owner's
2082 2082 * cte_publish_all(); we must only enqueue events that
2083 2083 * have not already been enqueued.
2084 2084 */
2085 2085 if (!list_link_active((list_node_t *)
2086 2086 ((uintptr_t)e + newq->ctq_events.list_offset))) {
2087 2087 list_insert_tail(&newq->ctq_events, e);
2088 2088 cte_hold(e);
2089 2089 }
2090 2090 }
2091 2091 }
2092 2092
2093 2093 mutex_exit(&q->ctq_lock);
2094 2094
2095 2095 if (first)
2096 2096 cte_qwakeup(newq, first);
2097 2097
2098 2098 mutex_exit(&newq->ctq_lock);
2099 2099 }
2100 2100
2101 2101 /*
2102 2102 * cte_trim
2103 2103 *
2104 2104 * Trims unneeded events from an event queue. Algorithm works as
2105 2105 * follows:
2106 2106 *
2107 2107 * Removes all informative and acknowledged critical events until the
2108 2108 * first referenced event is found.
2109 2109 *
2110 2110 * If a contract is specified, removes all events (regardless of
2111 2111 * acknowledgement) generated by that contract until the first event
2112 2112 * referenced by a reliable listener is found. Reference events are
2113 2113 * removed by marking them "trimmed". Such events will be removed
2114 2114 * when the last reference is dropped and will be skipped by future
2115 2115 * listeners.
2116 2116 *
2117 2117 * This is pretty basic. Ideally this should remove from the middle of
2118 2118 * the list (i.e. beyond the first referenced event), and even
2119 2119 * referenced events.
2120 2120 */
2121 2121 static void
2122 2122 cte_trim(ct_equeue_t *q, contract_t *ct)
2123 2123 {
2124 2124 ct_kevent_t *e, *next;
2125 2125 int flags, stopper;
2126 2126 int start = 1;
2127 2127
2128 2128 VERIFY(MUTEX_HELD(&q->ctq_lock));
2129 2129
2130 2130 for (e = list_head(&q->ctq_events); e != NULL; e = next) {
2131 2131 next = list_next(&q->ctq_events, e);
2132 2132 flags = e->cte_flags;
2133 2133 stopper = (q->ctq_listno != CTEL_PBUNDLE) &&
2134 2134 (e->cte_nodes[q->ctq_listno].ctm_nreliable > 0);
2135 2135 if (e->cte_nodes[q->ctq_listno].ctm_refs == 0) {
2136 2136 if ((start && (flags & (CTE_INFO | CTE_ACK))) ||
2137 2137 (e->cte_contract == ct)) {
2138 2138 /*
2139 2139 * Toss informative and ACKed critical messages.
2140 2140 */
2141 2141 list_remove(&q->ctq_events, e);
2142 2142 cte_rele(e);
2143 2143 }
2144 2144 } else if ((e->cte_contract == ct) && !stopper) {
2145 2145 ASSERT(q->ctq_nlisteners != 0);
2146 2146 e->cte_nodes[q->ctq_listno].ctm_trimmed = 1;
2147 2147 } else if (ct && !stopper) {
2148 2148 start = 0;
2149 2149 } else {
2150 2150 /*
2151 2151 * Don't free messages past the first reader.
2152 2152 */
2153 2153 break;
2154 2154 }
2155 2155 }
2156 2156 }
2157 2157
2158 2158 /*
2159 2159 * cte_queue_drain
2160 2160 *
2161 2161 * Drain all events from the specified queue, and mark it dead. If
2162 2162 * "ack" is set, acknowledge any critical events we find along the
2163 2163 * way.
2164 2164 */
2165 2165 static void
2166 2166 cte_queue_drain(ct_equeue_t *q, int ack)
2167 2167 {
2168 2168 ct_kevent_t *e, *next;
2169 2169 ct_listener_t *l;
2170 2170
2171 2171 mutex_enter(&q->ctq_lock);
2172 2172
2173 2173 for (e = list_head(&q->ctq_events); e != NULL; e = next) {
2174 2174 next = list_next(&q->ctq_events, e);
2175 2175 if (ack && ((e->cte_flags & (CTE_INFO | CTE_ACK)) == 0)) {
2176 2176 /*
2177 2177 * Make sure critical messages are eventually
2178 2178 * removed from the bundle queues.
2179 2179 */
2180 2180 mutex_enter(&e->cte_lock);
2181 2181 e->cte_flags |= CTE_ACK;
2182 2182 mutex_exit(&e->cte_lock);
2183 2183 ASSERT(MUTEX_HELD(&e->cte_contract->ct_lock));
2184 2184 e->cte_contract->ct_evcnt--;
2185 2185 }
2186 2186 list_remove(&q->ctq_events, e);
2187 2187 e->cte_nodes[q->ctq_listno].ctm_refs = 0;
2188 2188 e->cte_nodes[q->ctq_listno].ctm_nreliable = 0;
2189 2189 e->cte_nodes[q->ctq_listno].ctm_trimmed = 0;
2190 2190 cte_rele(e);
2191 2191 }
2192 2192
2193 2193 /*
2194 2194 * This is necessary only because of CTEL_PBUNDLE listeners;
2195 2195 * the events they point to can move from one pbundle to
2196 2196 * another. Fortunately, this only happens if the contract is
2197 2197 * inherited, which (in turn) only happens if the process
2198 2198 * exits, which means it's an all-or-nothing deal. If this
2199 2199 * wasn't the case, we would instead need to keep track of
2200 2200 * listeners on a per-event basis, not just a per-queue basis.
2201 2201 * This would have the side benefit of letting us clean up
2202 2202 * trimmed events sooner (i.e. immediately), but would
2203 2203 * unfortunately make events even bigger than they already
2204 2204 * are.
2205 2205 */
2206 2206 for (l = list_head(&q->ctq_listeners); l;
2207 2207 l = list_next(&q->ctq_listeners, l)) {
2208 2208 l->ctl_flags |= CTLF_DEAD;
2209 2209 if (l->ctl_position) {
2210 2210 l->ctl_position = NULL;
2211 2211 list_insert_tail(&q->ctq_tail, l);
2212 2212 }
2213 2213 cv_broadcast(&l->ctl_cv);
2214 2214 }
2215 2215
2216 2216 /*
2217 2217 * Disallow events.
2218 2218 */
2219 2219 q->ctq_flags |= CTQ_DEAD;
2220 2220
2221 2221 /*
2222 2222 * If we represent the last reference to a reference counted
2223 2223 * process bundle queue, free it.
2224 2224 */
2225 2225 if ((q->ctq_flags & CTQ_REFFED) && (q->ctq_nlisteners == 0))
2226 2226 cte_queue_destroy(q);
2227 2227 else
2228 2228 mutex_exit(&q->ctq_lock);
2229 2229 }
2230 2230
2231 2231 /*
2232 2232 * cte_publish
2233 2233 *
2234 2234 * Publishes an event to a specific queue. Only called by
2235 2235 * cte_publish_all.
2236 2236 */
2237 2237 static void
2238 2238 cte_publish(ct_equeue_t *q, ct_kevent_t *e, timespec_t *tsp, boolean_t mayexist)
2239 2239 {
2240 2240 ASSERT(MUTEX_HELD(&q->ctq_lock));
2241 2241
2242 2242 q->ctq_atime = *tsp;
2243 2243
2244 2244 /*
2245 2245 * If this event may already exist on this queue, check to see if it
2246 2246 * is already there and return if so.
2247 2247 */
2248 2248 if (mayexist && list_link_active((list_node_t *)((uintptr_t)e +
2249 2249 q->ctq_events.list_offset))) {
2250 2250 mutex_exit(&q->ctq_lock);
2251 2251 cte_rele(e);
2252 2252 return;
2253 2253 }
2254 2254
2255 2255 /*
2256 2256 * Don't publish if the event is informative and there aren't
2257 2257 * any listeners, or if the queue has been shut down.
2258 2258 */
2259 2259 if (((q->ctq_nlisteners == 0) && (e->cte_flags & (CTE_INFO|CTE_ACK))) ||
2260 2260 (q->ctq_flags & CTQ_DEAD)) {
2261 2261 mutex_exit(&q->ctq_lock);
2262 2262 cte_rele(e);
2263 2263 return;
2264 2264 }
2265 2265
2266 2266 /*
2267 2267 * Enqueue event
2268 2268 */
2269 2269 VERIFY(!list_link_active((list_node_t *)
2270 2270 ((uintptr_t)e + q->ctq_events.list_offset)));
2271 2271 list_insert_tail(&q->ctq_events, e);
2272 2272
2273 2273 /*
2274 2274 * Check for waiting listeners
2275 2275 */
2276 2276 cte_qwakeup(q, e);
2277 2277
2278 2278 /*
2279 2279 * Trim unnecessary events from the queue.
2280 2280 */
2281 2281 cte_trim(q, NULL);
2282 2282 mutex_exit(&q->ctq_lock);
2283 2283 }
2284 2284
2285 2285 /*
2286 2286 * cte_publish_all
2287 2287 *
2288 2288 * Publish an event to all necessary event queues. The event, e, must
2289 2289 * be zallocated by the caller, and the event's flags and type must be
2290 2290 * set. The rest of the event's fields are initialized here.
2291 2291 */
2292 2292 uint64_t
2293 2293 cte_publish_all(contract_t *ct, ct_kevent_t *e, nvlist_t *data, nvlist_t *gdata)
2294 2294 {
↓ open down ↓ |
2294 lines elided |
↑ open up ↑ |
2295 2295 ct_equeue_t *q;
2296 2296 timespec_t ts;
2297 2297 uint64_t evid;
2298 2298 ct_kevent_t *negev;
2299 2299 int negend;
2300 2300
2301 2301 e->cte_contract = ct;
2302 2302 e->cte_data = data;
2303 2303 e->cte_gdata = gdata;
2304 2304 e->cte_refs = 3;
2305 - evid = e->cte_id = atomic_add_64_nv(&ct->ct_type->ct_type_evid, 1);
2305 + evid = e->cte_id = atomic_inc_64_nv(&ct->ct_type->ct_type_evid);
2306 2306 contract_hold(ct);
2307 2307
2308 2308 /*
2309 2309 * For a negotiation event we set the ct->ct_nevent field of the
2310 2310 * contract for the duration of the negotiation
2311 2311 */
2312 2312 negend = 0;
2313 2313 if (e->cte_flags & CTE_NEG) {
2314 2314 cte_hold(e);
2315 2315 ct->ct_nevent = e;
2316 2316 } else if (e->cte_type == CT_EV_NEGEND) {
2317 2317 negend = 1;
2318 2318 }
2319 2319
2320 2320 gethrestime(&ts);
2321 2321
2322 2322 /*
2323 2323 * ct_evtlock simply (and only) ensures that two events sent
2324 2324 * from the same contract are delivered to all queues in the
2325 2325 * same order.
2326 2326 */
2327 2327 mutex_enter(&ct->ct_evtlock);
2328 2328
2329 2329 /*
2330 2330 * CTEL_CONTRACT - First deliver to the contract queue, acking
2331 2331 * the event if the contract has been orphaned.
2332 2332 */
2333 2333 mutex_enter(&ct->ct_lock);
2334 2334 mutex_enter(&ct->ct_events.ctq_lock);
2335 2335 if ((e->cte_flags & CTE_INFO) == 0) {
2336 2336 if (ct->ct_state >= CTS_ORPHAN)
2337 2337 e->cte_flags |= CTE_ACK;
2338 2338 else
2339 2339 ct->ct_evcnt++;
2340 2340 }
2341 2341 mutex_exit(&ct->ct_lock);
2342 2342 cte_publish(&ct->ct_events, e, &ts, B_FALSE);
2343 2343
2344 2344 /*
2345 2345 * CTEL_BUNDLE - Next deliver to the contract type's bundle
2346 2346 * queue.
2347 2347 */
2348 2348 mutex_enter(&ct->ct_type->ct_type_events.ctq_lock);
2349 2349 cte_publish(&ct->ct_type->ct_type_events, e, &ts, B_FALSE);
2350 2350
2351 2351 /*
2352 2352 * CTEL_PBUNDLE - Finally, if the contract has an owner,
2353 2353 * deliver to the owner's process bundle queue.
2354 2354 */
2355 2355 mutex_enter(&ct->ct_lock);
2356 2356 if (ct->ct_owner) {
2357 2357 /*
2358 2358 * proc_exit doesn't free event queues until it has
2359 2359 * abandoned all contracts.
2360 2360 */
2361 2361 ASSERT(ct->ct_owner->p_ct_equeue);
2362 2362 ASSERT(ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index]);
2363 2363 q = ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index];
2364 2364 mutex_enter(&q->ctq_lock);
2365 2365 mutex_exit(&ct->ct_lock);
2366 2366
2367 2367 /*
2368 2368 * It is possible for this code to race with adoption; we
2369 2369 * publish the event indicating that the event may already
2370 2370 * be enqueued because adoption beat us to it (in which case
2371 2371 * cte_pubish() does nothing).
2372 2372 */
2373 2373 cte_publish(q, e, &ts, B_TRUE);
2374 2374 } else {
2375 2375 mutex_exit(&ct->ct_lock);
2376 2376 cte_rele(e);
2377 2377 }
2378 2378
2379 2379 if (negend) {
2380 2380 mutex_enter(&ct->ct_lock);
2381 2381 negev = ct->ct_nevent;
2382 2382 ct->ct_nevent = NULL;
2383 2383 cte_rele(negev);
2384 2384 mutex_exit(&ct->ct_lock);
2385 2385 }
2386 2386
2387 2387 mutex_exit(&ct->ct_evtlock);
2388 2388
2389 2389 return (evid);
2390 2390 }
2391 2391
2392 2392 /*
2393 2393 * cte_add_listener
2394 2394 *
2395 2395 * Add a new listener to an event queue.
2396 2396 */
2397 2397 void
2398 2398 cte_add_listener(ct_equeue_t *q, ct_listener_t *l)
2399 2399 {
2400 2400 cv_init(&l->ctl_cv, NULL, CV_DEFAULT, NULL);
2401 2401 l->ctl_equeue = q;
2402 2402 l->ctl_position = NULL;
2403 2403 l->ctl_flags = 0;
2404 2404
2405 2405 mutex_enter(&q->ctq_lock);
2406 2406 list_insert_head(&q->ctq_tail, l);
2407 2407 list_insert_head(&q->ctq_listeners, l);
2408 2408 q->ctq_nlisteners++;
2409 2409 mutex_exit(&q->ctq_lock);
2410 2410 }
2411 2411
2412 2412 /*
2413 2413 * cte_remove_listener
2414 2414 *
2415 2415 * Remove a listener from an event queue. No other queue activities
2416 2416 * (e.g. cte_get event) may be in progress at this endpoint when this
2417 2417 * is called.
2418 2418 */
2419 2419 void
2420 2420 cte_remove_listener(ct_listener_t *l)
2421 2421 {
2422 2422 ct_equeue_t *q = l->ctl_equeue;
2423 2423 ct_kevent_t *e;
2424 2424
2425 2425 mutex_enter(&q->ctq_lock);
2426 2426
2427 2427 ASSERT((l->ctl_flags & (CTLF_COPYOUT|CTLF_RESET)) == 0);
2428 2428
2429 2429 if ((e = l->ctl_position) != NULL)
2430 2430 cte_qrele(q, l, e);
2431 2431 else
2432 2432 list_remove(&q->ctq_tail, l);
2433 2433 l->ctl_position = NULL;
2434 2434
2435 2435 q->ctq_nlisteners--;
2436 2436 list_remove(&q->ctq_listeners, l);
2437 2437
2438 2438 if (l->ctl_flags & CTLF_RELIABLE)
2439 2439 q->ctq_nreliable--;
2440 2440
2441 2441 /*
2442 2442 * If we are a the last listener of a dead reference counted
2443 2443 * queue (i.e. a process bundle) we free it. Otherwise we just
2444 2444 * trim any events which may have been kept around for our
2445 2445 * benefit.
2446 2446 */
2447 2447 if ((q->ctq_flags & CTQ_REFFED) && (q->ctq_flags & CTQ_DEAD) &&
2448 2448 (q->ctq_nlisteners == 0)) {
2449 2449 cte_queue_destroy(q);
2450 2450 } else {
2451 2451 cte_trim(q, NULL);
2452 2452 mutex_exit(&q->ctq_lock);
2453 2453 }
2454 2454 }
2455 2455
2456 2456 /*
2457 2457 * cte_reset_listener
2458 2458 *
2459 2459 * Moves a listener's queue pointer to the beginning of the queue.
2460 2460 */
2461 2461 void
2462 2462 cte_reset_listener(ct_listener_t *l)
2463 2463 {
2464 2464 ct_equeue_t *q = l->ctl_equeue;
2465 2465
2466 2466 mutex_enter(&q->ctq_lock);
2467 2467
2468 2468 /*
2469 2469 * We allow an asynchronous reset because it doesn't make a
2470 2470 * whole lot of sense to make reset block or fail. We already
2471 2471 * have most of the mechanism needed thanks to queue trimming,
2472 2472 * so implementing it isn't a big deal.
2473 2473 */
2474 2474 if (l->ctl_flags & CTLF_COPYOUT)
2475 2475 l->ctl_flags |= CTLF_RESET;
2476 2476
2477 2477 (void) cte_qmove(q, l, list_head(&q->ctq_events));
2478 2478
2479 2479 /*
2480 2480 * Inform blocked readers.
2481 2481 */
2482 2482 cv_broadcast(&l->ctl_cv);
2483 2483 pollwakeup(&l->ctl_pollhead, POLLIN);
2484 2484 mutex_exit(&q->ctq_lock);
2485 2485 }
2486 2486
2487 2487 /*
2488 2488 * cte_next_event
2489 2489 *
2490 2490 * Moves the event pointer for the specified listener to the next event
2491 2491 * on the queue. To avoid races, this movement only occurs if the
2492 2492 * specified event id matches that of the current event. This is used
2493 2493 * primarily to skip events that have been read but whose extended data
2494 2494 * haven't been copied out.
2495 2495 */
2496 2496 int
2497 2497 cte_next_event(ct_listener_t *l, uint64_t id)
2498 2498 {
2499 2499 ct_equeue_t *q = l->ctl_equeue;
2500 2500 ct_kevent_t *old;
2501 2501
2502 2502 mutex_enter(&q->ctq_lock);
2503 2503
2504 2504 if (l->ctl_flags & CTLF_COPYOUT)
2505 2505 l->ctl_flags |= CTLF_RESET;
2506 2506
2507 2507 if (((old = l->ctl_position) != NULL) && (old->cte_id == id))
2508 2508 (void) cte_qmove(q, l, list_next(&q->ctq_events, old));
2509 2509
2510 2510 mutex_exit(&q->ctq_lock);
2511 2511
2512 2512 return (0);
2513 2513 }
2514 2514
2515 2515 /*
2516 2516 * cte_get_event
2517 2517 *
2518 2518 * Reads an event from an event endpoint. If "nonblock" is clear, we
2519 2519 * block until a suitable event is ready. If "crit" is set, we only
2520 2520 * read critical events. Note that while "cr" is the caller's cred,
2521 2521 * "zuniqid" is the unique id of the zone the calling contract
2522 2522 * filesystem was mounted in.
2523 2523 */
2524 2524 int
2525 2525 cte_get_event(ct_listener_t *l, int nonblock, void *uaddr, const cred_t *cr,
2526 2526 uint64_t zuniqid, int crit)
2527 2527 {
2528 2528 ct_equeue_t *q = l->ctl_equeue;
2529 2529 ct_kevent_t *temp;
2530 2530 int result = 0;
2531 2531 int partial = 0;
2532 2532 size_t size, gsize, len;
2533 2533 model_t mdl = get_udatamodel();
2534 2534 STRUCT_DECL(ct_event, ev);
2535 2535 STRUCT_INIT(ev, mdl);
2536 2536
2537 2537 /*
2538 2538 * cte_qreadable checks for CTLF_COPYOUT as well as ensures
2539 2539 * that there exists, and we are pointing to, an appropriate
2540 2540 * event. It may temporarily drop ctq_lock, but that doesn't
2541 2541 * really matter to us.
2542 2542 */
2543 2543 mutex_enter(&q->ctq_lock);
2544 2544 while (cte_qreadable(q, l, cr, zuniqid, crit)) {
2545 2545 if (nonblock) {
2546 2546 result = EAGAIN;
2547 2547 goto error;
2548 2548 }
2549 2549 if (q->ctq_flags & CTQ_DEAD) {
2550 2550 result = EIDRM;
2551 2551 goto error;
2552 2552 }
2553 2553 result = cv_wait_sig(&l->ctl_cv, &q->ctq_lock);
2554 2554 if (result == 0) {
2555 2555 result = EINTR;
2556 2556 goto error;
2557 2557 }
2558 2558 }
2559 2559 temp = l->ctl_position;
2560 2560 cte_hold(temp);
2561 2561 l->ctl_flags |= CTLF_COPYOUT;
2562 2562 mutex_exit(&q->ctq_lock);
2563 2563
2564 2564 /*
2565 2565 * We now have an event. Copy in the user event structure to
2566 2566 * see how much space we have to work with.
2567 2567 */
2568 2568 result = copyin(uaddr, STRUCT_BUF(ev), STRUCT_SIZE(ev));
2569 2569 if (result)
2570 2570 goto copyerr;
2571 2571
2572 2572 /*
2573 2573 * Determine what data we have and what the user should be
2574 2574 * allowed to see.
2575 2575 */
2576 2576 size = gsize = 0;
2577 2577 if (temp->cte_data) {
2578 2578 VERIFY(nvlist_size(temp->cte_data, &size,
2579 2579 NV_ENCODE_NATIVE) == 0);
2580 2580 ASSERT(size != 0);
2581 2581 }
2582 2582 if (zuniqid == GLOBAL_ZONEUNIQID && temp->cte_gdata) {
2583 2583 VERIFY(nvlist_size(temp->cte_gdata, &gsize,
2584 2584 NV_ENCODE_NATIVE) == 0);
2585 2585 ASSERT(gsize != 0);
2586 2586 }
2587 2587
2588 2588 /*
2589 2589 * If we have enough space, copy out the extended event data.
2590 2590 */
2591 2591 len = size + gsize;
2592 2592 if (len) {
2593 2593 if (STRUCT_FGET(ev, ctev_nbytes) >= len) {
2594 2594 char *buf = kmem_alloc(len, KM_SLEEP);
2595 2595
2596 2596 if (size)
2597 2597 VERIFY(nvlist_pack(temp->cte_data, &buf, &size,
2598 2598 NV_ENCODE_NATIVE, KM_SLEEP) == 0);
2599 2599 if (gsize) {
2600 2600 char *tmp = buf + size;
2601 2601
2602 2602 VERIFY(nvlist_pack(temp->cte_gdata, &tmp,
2603 2603 &gsize, NV_ENCODE_NATIVE, KM_SLEEP) == 0);
2604 2604 }
2605 2605
2606 2606 /* This shouldn't have changed */
2607 2607 ASSERT(size + gsize == len);
2608 2608 result = copyout(buf, STRUCT_FGETP(ev, ctev_buffer),
2609 2609 len);
2610 2610 kmem_free(buf, len);
2611 2611 if (result)
2612 2612 goto copyerr;
2613 2613 } else {
2614 2614 partial = 1;
2615 2615 }
2616 2616 }
2617 2617
2618 2618 /*
2619 2619 * Copy out the common event data.
2620 2620 */
2621 2621 STRUCT_FSET(ev, ctev_id, temp->cte_contract->ct_id);
2622 2622 STRUCT_FSET(ev, ctev_evid, temp->cte_id);
2623 2623 STRUCT_FSET(ev, ctev_cttype,
2624 2624 temp->cte_contract->ct_type->ct_type_index);
2625 2625 STRUCT_FSET(ev, ctev_flags, temp->cte_flags &
2626 2626 (CTE_ACK|CTE_INFO|CTE_NEG));
2627 2627 STRUCT_FSET(ev, ctev_type, temp->cte_type);
2628 2628 STRUCT_FSET(ev, ctev_nbytes, len);
2629 2629 STRUCT_FSET(ev, ctev_goffset, size);
2630 2630 result = copyout(STRUCT_BUF(ev), uaddr, STRUCT_SIZE(ev));
2631 2631
2632 2632 copyerr:
2633 2633 /*
2634 2634 * Only move our location in the queue if all copyouts were
2635 2635 * successful, the caller provided enough space for the entire
2636 2636 * event, and our endpoint wasn't reset or otherwise moved by
2637 2637 * another thread.
2638 2638 */
2639 2639 mutex_enter(&q->ctq_lock);
2640 2640 if (result)
2641 2641 result = EFAULT;
2642 2642 else if (!partial && ((l->ctl_flags & CTLF_RESET) == 0) &&
2643 2643 (l->ctl_position == temp))
2644 2644 (void) cte_qmove(q, l, list_next(&q->ctq_events, temp));
2645 2645 l->ctl_flags &= ~(CTLF_COPYOUT|CTLF_RESET);
2646 2646 /*
2647 2647 * Signal any readers blocked on our CTLF_COPYOUT.
2648 2648 */
2649 2649 cv_signal(&l->ctl_cv);
2650 2650 cte_rele(temp);
2651 2651
2652 2652 error:
2653 2653 mutex_exit(&q->ctq_lock);
2654 2654 return (result);
2655 2655 }
2656 2656
2657 2657 /*
2658 2658 * cte_set_reliable
2659 2659 *
2660 2660 * Requests that events be reliably delivered to an event endpoint.
2661 2661 * Unread informative and acknowledged critical events will not be
2662 2662 * removed from the queue until this listener reads or skips them.
2663 2663 * Because a listener could maliciously request reliable delivery and
2664 2664 * then do nothing, this requires that PRIV_CONTRACT_EVENT be in the
2665 2665 * caller's effective set.
2666 2666 */
2667 2667 int
2668 2668 cte_set_reliable(ct_listener_t *l, const cred_t *cr)
2669 2669 {
2670 2670 ct_equeue_t *q = l->ctl_equeue;
2671 2671 int error;
2672 2672
2673 2673 if ((error = secpolicy_contract_event(cr)) != 0)
2674 2674 return (error);
2675 2675
2676 2676 mutex_enter(&q->ctq_lock);
2677 2677 if ((l->ctl_flags & CTLF_RELIABLE) == 0) {
2678 2678 l->ctl_flags |= CTLF_RELIABLE;
2679 2679 q->ctq_nreliable++;
2680 2680 if (l->ctl_position != NULL)
2681 2681 l->ctl_position->cte_nodes[q->ctq_listno].
2682 2682 ctm_nreliable++;
2683 2683 }
2684 2684 mutex_exit(&q->ctq_lock);
2685 2685
2686 2686 return (0);
2687 2687 }
↓ open down ↓ |
372 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX