1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * Contracts
  28  * ---------
  29  *
  30  * Contracts are a primitive which enrich the relationships between
  31  * processes and system resources.  The primary purpose of contracts is
  32  * to provide a means for the system to negotiate the departure from a
  33  * binding relationship (e.g. pages locked in memory or a thread bound
  34  * to processor), but they can also be used as a purely asynchronous
  35  * error reporting mechanism as they are with process contracts.
  36  *
  37  * More information on how one interfaces with contracts and what
  38  * contracts can do for you can be found in:
  39  *   PSARC 2003/193 Solaris Contracts
  40  *   PSARC 2004/460 Contracts addendum
  41  *
  42  * This file contains the core contracts framework.  By itself it is
  43  * useless: it depends the contracts filesystem (ctfs) to provide an
  44  * interface to user processes and individual contract types to
  45  * implement the process/resource relationships.
  46  *
  47  * Data structure overview
  48  * -----------------------
  49  *
  50  * A contract is represented by a contract_t, which itself points to an
  51  * encapsulating contract-type specific contract object.  A contract_t
  52  * contains the contract's static identity (including its terms), its
  53  * linkage to various bookkeeping structures, the contract-specific
  54  * event queue, and a reference count.
  55  *
  56  * A contract template is represented by a ct_template_t, which, like a
  57  * contract, points to an encapsulating contract-type specific template
  58  * object.  A ct_template_t contains the template's terms.
  59  *
  60  * An event queue is represented by a ct_equeue_t, and consists of a
  61  * list of events, a list of listeners, and a list of listeners who are
  62  * waiting for new events (affectionately referred to as "tail
  63  * listeners").  There are three queue types, defined by ct_listnum_t
  64  * (an enum).  An event may be on one of each type of queue
  65  * simultaneously; the list linkage used by a queue is determined by
  66  * its type.
  67  *
  68  * An event is represented by a ct_kevent_t, which contains mostly
  69  * static event data (e.g. id, payload).  It also has an array of
  70  * ct_member_t structures, each of which contains a list_node_t and
  71  * represent the event's linkage in a specific event queue.
  72  *
  73  * Each open of an event endpoint results in the creation of a new
  74  * listener, represented by a ct_listener_t.  In addition to linkage
  75  * into the aforementioned lists in the event_queue, a ct_listener_t
  76  * contains a pointer to the ct_kevent_t it is currently positioned at
  77  * as well as a set of status flags and other administrative data.
  78  *
  79  * Each process has a list of contracts it owns, p_ct_held; a pointer
  80  * to the process contract it is a member of, p_ct_process; the linkage
  81  * for that membership, p_ct_member; and an array of event queue
  82  * structures representing the process bundle queues.
  83  *
  84  * Each LWP has an array of its active templates, lwp_ct_active; and
  85  * the most recently created contracts, lwp_ct_latest.
  86  *
  87  * A process contract has a list of member processes and a list of
  88  * inherited contracts.
  89  *
  90  * There is a system-wide list of all contracts, as well as per-type
  91  * lists of contracts.
  92  *
  93  * Lock ordering overview
  94  * ----------------------
  95  *
  96  * Locks at the top are taken first:
  97  *
  98  *                   ct_evtlock
  99  *                   regent ct_lock
 100  *                   member ct_lock
 101  *                   pidlock
 102  *                   p_lock
 103  *    contract ctq_lock         contract_lock
 104  *    pbundle ctq_lock
 105  *    cte_lock
 106  *                   ct_reflock
 107  *
 108  * contract_lock and ctq_lock/cte_lock are not currently taken at the
 109  * same time.
 110  *
 111  * Reference counting and locking
 112  * ------------------------------
 113  *
 114  * A contract has a reference count, protected by ct_reflock.
 115  * (ct_reflock is also used in a couple other places where atomic
 116  * access to a variable is needed in an innermost context).  A process
 117  * maintains a hold on each contract it owns.  A process contract has a
 118  * hold on each contract is has inherited.  Each event has a hold on
 119  * the contract which generated it.  Process contract templates have
 120  * holds on the contracts referred to by their transfer terms.  CTFS
 121  * contract directory nodes have holds on contracts.  Lastly, various
 122  * code paths may temporarily take holds on contracts to prevent them
 123  * from disappearing while other processing is going on.  It is
 124  * important to note that the global contract lists do not hold
 125  * references on contracts; a contract is removed from these structures
 126  * atomically with the release of its last reference.
 127  *
 128  * At a given point in time, a contract can either be owned by a
 129  * process, inherited by a regent process contract, or orphaned.  A
 130  * contract_t's  owner and regent pointers, ct_owner and ct_regent, are
 131  * protected by its ct_lock.  The linkage in the holder's (holder =
 132  * owner or regent) list of contracts, ct_ctlist, is protected by
 133  * whatever lock protects the holder's data structure.  In order for
 134  * these two directions to remain consistent, changing the holder of a
 135  * contract requires that both locks be held.
 136  *
 137  * Events also have reference counts.  There is one hold on an event
 138  * per queue it is present on, in addition to those needed for the
 139  * usual sundry reasons.  Individual listeners are associated with
 140  * specific queues, and increase a queue-specific reference count
 141  * stored in the ct_member_t structure.
 142  *
 143  * The dynamic contents of an event (reference count and flags) are
 144  * protected by its cte_lock, while the contents of the embedded
 145  * ct_member_t structures are protected by the locks of the queues they
 146  * are linked into.  A ct_listener_t's contents are also protected by
 147  * its event queue's ctq_lock.
 148  *
 149  * Resource controls
 150  * -----------------
 151  *
 152  * Control:      project.max-contracts (rc_project_contract)
 153  * Description:  Maximum number of contracts allowed a project.
 154  *
 155  *   When a contract is created, the project's allocation is tested and
 156  *   (assuming success) increased.  When the last reference to a
 157  *   contract is released, the creating project's allocation is
 158  *   decreased.
 159  */
 160 
 161 #include <sys/mutex.h>
 162 #include <sys/debug.h>
 163 #include <sys/types.h>
 164 #include <sys/param.h>
 165 #include <sys/kmem.h>
 166 #include <sys/thread.h>
 167 #include <sys/id_space.h>
 168 #include <sys/avl.h>
 169 #include <sys/list.h>
 170 #include <sys/sysmacros.h>
 171 #include <sys/proc.h>
 172 #include <sys/ctfs.h>
 173 #include <sys/contract_impl.h>
 174 #include <sys/contract/process_impl.h>
 175 #include <sys/dditypes.h>
 176 #include <sys/contract/device_impl.h>
 177 #include <sys/systm.h>
 178 #include <sys/atomic.h>
 179 #include <sys/cmn_err.h>
 180 #include <sys/model.h>
 181 #include <sys/policy.h>
 182 #include <sys/zone.h>
 183 #include <sys/task.h>
 184 #include <sys/ddi.h>
 185 #include <sys/sunddi.h>
 186 
 187 extern rctl_hndl_t rc_project_contract;
 188 
 189 static id_space_t       *contract_ids;
 190 static avl_tree_t       contract_avl;
 191 static kmutex_t         contract_lock;
 192 
 193 int                     ct_ntypes = CTT_MAXTYPE;
 194 static ct_type_t        *ct_types_static[CTT_MAXTYPE];
 195 ct_type_t               **ct_types = ct_types_static;
 196 int                     ct_debug;
 197 
 198 static void cte_queue_create(ct_equeue_t *, ct_listnum_t, int, int);
 199 static void cte_queue_destroy(ct_equeue_t *);
 200 static void cte_queue_drain(ct_equeue_t *, int);
 201 static void cte_trim(ct_equeue_t *, contract_t *);
 202 static void cte_copy(ct_equeue_t *, ct_equeue_t *);
 203 
 204 /*
 205  * contract_compar
 206  *
 207  * A contract comparator which sorts on contract ID.
 208  */
 209 int
 210 contract_compar(const void *x, const void *y)
 211 {
 212         const contract_t *ct1 = x;
 213         const contract_t *ct2 = y;
 214 
 215         if (ct1->ct_id < ct2->ct_id)
 216                 return (-1);
 217         if (ct1->ct_id > ct2->ct_id)
 218                 return (1);
 219         return (0);
 220 }
 221 
 222 /*
 223  * contract_init
 224  *
 225  * Initializes the contract subsystem, the specific contract types, and
 226  * process 0.
 227  */
 228 void
 229 contract_init(void)
 230 {
 231         /*
 232          * Initialize contract subsystem.
 233          */
 234         contract_ids = id_space_create("contracts", 1, INT_MAX);
 235         avl_create(&contract_avl, contract_compar, sizeof (contract_t),
 236             offsetof(contract_t, ct_ctavl));
 237         mutex_init(&contract_lock, NULL, MUTEX_DEFAULT, NULL);
 238 
 239         /*
 240          * Initialize contract types.
 241          */
 242         contract_process_init();
 243         contract_device_init();
 244 
 245         /*
 246          * Initialize p0/lwp0 contract state.
 247          */
 248         avl_create(&p0.p_ct_held, contract_compar, sizeof (contract_t),
 249             offsetof(contract_t, ct_ctlist));
 250 }
 251 
 252 /*
 253  * contract_dtor
 254  *
 255  * Performs basic destruction of the common portions of a contract.
 256  * Called from the failure path of contract_ctor and from
 257  * contract_rele.
 258  */
 259 static void
 260 contract_dtor(contract_t *ct)
 261 {
 262         cte_queue_destroy(&ct->ct_events);
 263         list_destroy(&ct->ct_vnodes);
 264         mutex_destroy(&ct->ct_reflock);
 265         mutex_destroy(&ct->ct_lock);
 266         mutex_destroy(&ct->ct_evtlock);
 267 }
 268 
 269 /*
 270  * contract_ctor
 271  *
 272  * Called by a contract type to initialize a contract.  Fails if the
 273  * max-contract resource control would have been exceeded.  After a
 274  * successful call to contract_ctor, the contract is unlocked and
 275  * visible in all namespaces; any type-specific initialization should
 276  * be completed before calling contract_ctor.  Returns 0 on success.
 277  *
 278  * Because not all callers can tolerate failure, a 0 value for canfail
 279  * instructs contract_ctor to ignore the project.max-contracts resource
 280  * control.  Obviously, this "out" should only be employed by callers
 281  * who are sufficiently constrained in other ways (e.g. newproc).
 282  */
 283 int
 284 contract_ctor(contract_t *ct, ct_type_t *type, ct_template_t *tmpl, void *data,
 285     ctflags_t flags, proc_t *author, int canfail)
 286 {
 287         avl_index_t where;
 288         klwp_t *curlwp = ttolwp(curthread);
 289 
 290         ASSERT(author == curproc);
 291 
 292         mutex_init(&ct->ct_lock, NULL, MUTEX_DEFAULT, NULL);
 293         mutex_init(&ct->ct_reflock, NULL, MUTEX_DEFAULT, NULL);
 294         mutex_init(&ct->ct_evtlock, NULL, MUTEX_DEFAULT, NULL);
 295         ct->ct_id = id_alloc(contract_ids);
 296 
 297         cte_queue_create(&ct->ct_events, CTEL_CONTRACT, 20, 0);
 298         list_create(&ct->ct_vnodes, sizeof (contract_vnode_t),
 299             offsetof(contract_vnode_t, ctv_node));
 300 
 301         /*
 302          * Instance data
 303          */
 304         ct->ct_ref = 2;              /* one for the holder, one for "latest" */
 305         ct->ct_cuid = crgetuid(CRED());
 306         ct->ct_type = type;
 307         ct->ct_data = data;
 308         gethrestime(&ct->ct_ctime);
 309         ct->ct_state = CTS_OWNED;
 310         ct->ct_flags = flags;
 311         ct->ct_regent = author->p_ct_process ?
 312             &author->p_ct_process->conp_contract : NULL;
 313         ct->ct_ev_info = tmpl->ctmpl_ev_info;
 314         ct->ct_ev_crit = tmpl->ctmpl_ev_crit;
 315         ct->ct_cookie = tmpl->ctmpl_cookie;
 316         ct->ct_owner = author;
 317         ct->ct_ntime.ctm_total = -1;
 318         ct->ct_qtime.ctm_total = -1;
 319         ct->ct_nevent = NULL;
 320 
 321         /*
 322          * Test project.max-contracts.
 323          */
 324         mutex_enter(&author->p_lock);
 325         mutex_enter(&contract_lock);
 326         if (canfail && rctl_test(rc_project_contract,
 327             author->p_task->tk_proj->kpj_rctls, author, 1,
 328             RCA_SAFE) & RCT_DENY) {
 329                 id_free(contract_ids, ct->ct_id);
 330                 mutex_exit(&contract_lock);
 331                 mutex_exit(&author->p_lock);
 332                 ct->ct_events.ctq_flags |= CTQ_DEAD;
 333                 contract_dtor(ct);
 334                 return (1);
 335         }
 336         ct->ct_proj = author->p_task->tk_proj;
 337         ct->ct_proj->kpj_data.kpd_contract++;
 338         (void) project_hold(ct->ct_proj);
 339         mutex_exit(&contract_lock);
 340 
 341         /*
 342          * Insert into holder's avl of contracts.
 343          * We use an avl not because order is important, but because
 344          * readdir of /proc/contracts requires we be able to use a
 345          * scalar as an index into the process's list of contracts
 346          */
 347         ct->ct_zoneid = author->p_zone->zone_id;
 348         ct->ct_czuniqid = ct->ct_mzuniqid = author->p_zone->zone_uniqid;
 349         VERIFY(avl_find(&author->p_ct_held, ct, &where) == NULL);
 350         avl_insert(&author->p_ct_held, ct, where);
 351         mutex_exit(&author->p_lock);
 352 
 353         /*
 354          * Insert into global contract AVL
 355          */
 356         mutex_enter(&contract_lock);
 357         VERIFY(avl_find(&contract_avl, ct, &where) == NULL);
 358         avl_insert(&contract_avl, ct, where);
 359         mutex_exit(&contract_lock);
 360 
 361         /*
 362          * Insert into type AVL
 363          */
 364         mutex_enter(&type->ct_type_lock);
 365         VERIFY(avl_find(&type->ct_type_avl, ct, &where) == NULL);
 366         avl_insert(&type->ct_type_avl, ct, where);
 367         type->ct_type_timestruc = ct->ct_ctime;
 368         mutex_exit(&type->ct_type_lock);
 369 
 370         if (curlwp->lwp_ct_latest[type->ct_type_index])
 371                 contract_rele(curlwp->lwp_ct_latest[type->ct_type_index]);
 372         curlwp->lwp_ct_latest[type->ct_type_index] = ct;
 373 
 374         return (0);
 375 }
 376 
 377 /*
 378  * contract_rele
 379  *
 380  * Releases a reference to a contract.  If the caller had the last
 381  * reference, the contract is removed from all namespaces, its
 382  * allocation against the max-contracts resource control is released,
 383  * and the contract type's free entry point is invoked for any
 384  * type-specific deconstruction and to (presumably) free the object.
 385  */
 386 void
 387 contract_rele(contract_t *ct)
 388 {
 389         uint64_t nref;
 390 
 391         mutex_enter(&ct->ct_reflock);
 392         ASSERT(ct->ct_ref > 0);
 393         nref = --ct->ct_ref;
 394         mutex_exit(&ct->ct_reflock);
 395         if (nref == 0) {
 396                 /*
 397                  * ct_owner is cleared when it drops its reference.
 398                  */
 399                 ASSERT(ct->ct_owner == NULL);
 400                 ASSERT(ct->ct_evcnt == 0);
 401 
 402                 /*
 403                  * Remove from global contract AVL
 404                  */
 405                 mutex_enter(&contract_lock);
 406                 avl_remove(&contract_avl, ct);
 407                 mutex_exit(&contract_lock);
 408 
 409                 /*
 410                  * Remove from type AVL
 411                  */
 412                 mutex_enter(&ct->ct_type->ct_type_lock);
 413                 avl_remove(&ct->ct_type->ct_type_avl, ct);
 414                 mutex_exit(&ct->ct_type->ct_type_lock);
 415 
 416                 /*
 417                  * Release the contract's ID
 418                  */
 419                 id_free(contract_ids, ct->ct_id);
 420 
 421                 /*
 422                  * Release project hold
 423                  */
 424                 mutex_enter(&contract_lock);
 425                 ct->ct_proj->kpj_data.kpd_contract--;
 426                 project_rele(ct->ct_proj);
 427                 mutex_exit(&contract_lock);
 428 
 429                 /*
 430                  * Free the contract
 431                  */
 432                 contract_dtor(ct);
 433                 ct->ct_type->ct_type_ops->contop_free(ct);
 434         }
 435 }
 436 
 437 /*
 438  * contract_hold
 439  *
 440  * Adds a reference to a contract
 441  */
 442 void
 443 contract_hold(contract_t *ct)
 444 {
 445         mutex_enter(&ct->ct_reflock);
 446         ASSERT(ct->ct_ref < UINT64_MAX);
 447         ct->ct_ref++;
 448         mutex_exit(&ct->ct_reflock);
 449 }
 450 
 451 /*
 452  * contract_getzuniqid
 453  *
 454  * Get a contract's zone unique ID.  Needed because 64-bit reads and
 455  * writes aren't atomic on x86.  Since there are contexts where we are
 456  * unable to take ct_lock, we instead use ct_reflock; in actuality any
 457  * lock would do.
 458  */
 459 uint64_t
 460 contract_getzuniqid(contract_t *ct)
 461 {
 462         uint64_t zuniqid;
 463 
 464         mutex_enter(&ct->ct_reflock);
 465         zuniqid = ct->ct_mzuniqid;
 466         mutex_exit(&ct->ct_reflock);
 467 
 468         return (zuniqid);
 469 }
 470 
 471 /*
 472  * contract_setzuniqid
 473  *
 474  * Sets a contract's zone unique ID.   See contract_getzuniqid.
 475  */
 476 void
 477 contract_setzuniqid(contract_t *ct, uint64_t zuniqid)
 478 {
 479         mutex_enter(&ct->ct_reflock);
 480         ct->ct_mzuniqid = zuniqid;
 481         mutex_exit(&ct->ct_reflock);
 482 }
 483 
 484 /*
 485  * contract_abandon
 486  *
 487  * Abandons the specified contract.  If "explicit" is clear, the
 488  * contract was implicitly abandoned (by process exit) and should be
 489  * inherited if its terms allow it and its owner was a member of a
 490  * regent contract.  Otherwise, the contract type's abandon entry point
 491  * is invoked to either destroy or orphan the contract.
 492  */
 493 int
 494 contract_abandon(contract_t *ct, proc_t *p, int explicit)
 495 {
 496         ct_equeue_t *q = NULL;
 497         contract_t *parent = &p->p_ct_process->conp_contract;
 498         int inherit = 0;
 499 
 500         VERIFY(p == curproc);
 501 
 502         mutex_enter(&ct->ct_lock);
 503 
 504         /*
 505          * Multiple contract locks are taken contract -> subcontract.
 506          * Check if the contract will be inherited so we can acquire
 507          * all the necessary locks before making sensitive changes.
 508          */
 509         if (!explicit && (ct->ct_flags & CTF_INHERIT) &&
 510             contract_process_accept(parent)) {
 511                 mutex_exit(&ct->ct_lock);
 512                 mutex_enter(&parent->ct_lock);
 513                 mutex_enter(&ct->ct_lock);
 514                 inherit = 1;
 515         }
 516 
 517         if (ct->ct_owner != p) {
 518                 mutex_exit(&ct->ct_lock);
 519                 if (inherit)
 520                         mutex_exit(&parent->ct_lock);
 521                 return (EINVAL);
 522         }
 523 
 524         mutex_enter(&p->p_lock);
 525         if (explicit)
 526                 avl_remove(&p->p_ct_held, ct);
 527         ct->ct_owner = NULL;
 528         mutex_exit(&p->p_lock);
 529 
 530         /*
 531          * Since we can't call cte_trim with the contract lock held,
 532          * we grab the queue pointer here.
 533          */
 534         if (p->p_ct_equeue)
 535                 q = p->p_ct_equeue[ct->ct_type->ct_type_index];
 536 
 537         /*
 538          * contop_abandon may destroy the contract so we rely on it to
 539          * drop ct_lock.  We retain a reference on the contract so that
 540          * the cte_trim which follows functions properly.  Even though
 541          * cte_trim doesn't dereference the contract pointer, it is
 542          * still necessary to retain a reference to the contract so
 543          * that we don't trim events which are sent by a subsequently
 544          * allocated contract infortuitously located at the same address.
 545          */
 546         contract_hold(ct);
 547 
 548         if (inherit) {
 549                 ct->ct_state = CTS_INHERITED;
 550                 VERIFY(ct->ct_regent == parent);
 551                 contract_process_take(parent, ct);
 552 
 553                 /*
 554                  * We are handing off the process's reference to the
 555                  * parent contract.  For this reason, the order in
 556                  * which we drop the contract locks is also important.
 557                  */
 558                 mutex_exit(&ct->ct_lock);
 559                 mutex_exit(&parent->ct_lock);
 560         } else {
 561                 ct->ct_regent = NULL;
 562                 ct->ct_type->ct_type_ops->contop_abandon(ct);
 563         }
 564 
 565         /*
 566          * ct_lock has been dropped; we can safely trim the event
 567          * queue now.
 568          */
 569         if (q) {
 570                 mutex_enter(&q->ctq_lock);
 571                 cte_trim(q, ct);
 572                 mutex_exit(&q->ctq_lock);
 573         }
 574 
 575         contract_rele(ct);
 576 
 577         return (0);
 578 }
 579 
 580 int
 581 contract_newct(contract_t *ct)
 582 {
 583         return (ct->ct_type->ct_type_ops->contop_newct(ct));
 584 }
 585 
 586 /*
 587  * contract_adopt
 588  *
 589  * Adopts a contract.  After a successful call to this routine, the
 590  * previously inherited contract will belong to the calling process,
 591  * and its events will have been appended to its new owner's process
 592  * bundle queue.
 593  */
 594 int
 595 contract_adopt(contract_t *ct, proc_t *p)
 596 {
 597         avl_index_t where;
 598         ct_equeue_t *q;
 599         contract_t *parent;
 600 
 601         ASSERT(p == curproc);
 602 
 603         /*
 604          * Ensure the process has an event queue.  Checked by ASSERTs
 605          * below.
 606          */
 607         (void) contract_type_pbundle(ct->ct_type, p);
 608 
 609         mutex_enter(&ct->ct_lock);
 610         parent = ct->ct_regent;
 611         if (ct->ct_state != CTS_INHERITED ||
 612             &p->p_ct_process->conp_contract != parent ||
 613             p->p_zone->zone_uniqid != ct->ct_czuniqid) {
 614                 mutex_exit(&ct->ct_lock);
 615                 return (EINVAL);
 616         }
 617 
 618         /*
 619          * Multiple contract locks are taken contract -> subcontract.
 620          */
 621         mutex_exit(&ct->ct_lock);
 622         mutex_enter(&parent->ct_lock);
 623         mutex_enter(&ct->ct_lock);
 624 
 625         /*
 626          * It is possible that the contract was adopted by someone else
 627          * while its lock was dropped.  It isn't possible for the
 628          * contract to have been inherited by a different regent
 629          * contract.
 630          */
 631         if (ct->ct_state != CTS_INHERITED) {
 632                 mutex_exit(&parent->ct_lock);
 633                 mutex_exit(&ct->ct_lock);
 634                 return (EBUSY);
 635         }
 636         ASSERT(ct->ct_regent == parent);
 637 
 638         ct->ct_state = CTS_OWNED;
 639 
 640         contract_process_adopt(ct, p);
 641 
 642         mutex_enter(&p->p_lock);
 643         ct->ct_owner = p;
 644         VERIFY(avl_find(&p->p_ct_held, ct, &where) == NULL);
 645         avl_insert(&p->p_ct_held, ct, where);
 646         mutex_exit(&p->p_lock);
 647 
 648         ASSERT(ct->ct_owner->p_ct_equeue);
 649         ASSERT(ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index]);
 650         q = ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index];
 651         cte_copy(&ct->ct_events, q);
 652         mutex_exit(&ct->ct_lock);
 653 
 654         return (0);
 655 }
 656 
 657 /*
 658  * contract_ack
 659  *
 660  * Acknowledges receipt of a critical event.
 661  */
 662 int
 663 contract_ack(contract_t *ct, uint64_t evid, int ack)
 664 {
 665         ct_kevent_t *ev;
 666         list_t *queue = &ct->ct_events.ctq_events;
 667         int error = ESRCH;
 668         int nego = 0;
 669         uint_t evtype;
 670 
 671         ASSERT(ack == CT_ACK || ack == CT_NACK);
 672 
 673         mutex_enter(&ct->ct_lock);
 674         mutex_enter(&ct->ct_events.ctq_lock);
 675         /*
 676          * We are probably ACKing something near the head of the queue.
 677          */
 678         for (ev = list_head(queue); ev; ev = list_next(queue, ev)) {
 679                 if (ev->cte_id == evid) {
 680                         if (ev->cte_flags & CTE_NEG)
 681                                 nego = 1;
 682                         else if (ack == CT_NACK)
 683                                 break;
 684                         if ((ev->cte_flags & (CTE_INFO | CTE_ACK)) == 0) {
 685                                 ev->cte_flags |= CTE_ACK;
 686                                 ct->ct_evcnt--;
 687                                 evtype = ev->cte_type;
 688                                 error = 0;
 689                         }
 690                         break;
 691                 }
 692         }
 693         mutex_exit(&ct->ct_events.ctq_lock);
 694         mutex_exit(&ct->ct_lock);
 695 
 696         /*
 697          * Not all critical events are negotiation events, however
 698          * every negotiation event is a critical event. NEGEND events
 699          * are critical events but are not negotiation events
 700          */
 701         if (error || !nego)
 702                 return (error);
 703 
 704         if (ack == CT_ACK)
 705                 error = ct->ct_type->ct_type_ops->contop_ack(ct, evtype, evid);
 706         else
 707                 error = ct->ct_type->ct_type_ops->contop_nack(ct, evtype, evid);
 708 
 709         return (error);
 710 }
 711 
 712 /*ARGSUSED*/
 713 int
 714 contract_ack_inval(contract_t *ct, uint_t evtype, uint64_t evid)
 715 {
 716         cmn_err(CE_PANIC, "contract_ack_inval: unsupported call: ctid: %u",
 717             ct->ct_id);
 718         return (ENOSYS);
 719 }
 720 
 721 /*ARGSUSED*/
 722 int
 723 contract_qack_inval(contract_t *ct, uint_t evtype, uint64_t evid)
 724 {
 725         cmn_err(CE_PANIC, "contract_ack_inval: unsupported call: ctid: %u",
 726             ct->ct_id);
 727         return (ENOSYS);
 728 }
 729 
 730 /*ARGSUSED*/
 731 int
 732 contract_qack_notsup(contract_t *ct, uint_t evtype, uint64_t evid)
 733 {
 734         return (ERANGE);
 735 }
 736 
 737 /*
 738  * contract_qack
 739  *
 740  * Asks that negotiations be extended by another time quantum
 741  */
 742 int
 743 contract_qack(contract_t *ct, uint64_t evid)
 744 {
 745         ct_kevent_t *ev;
 746         list_t *queue = &ct->ct_events.ctq_events;
 747         int nego = 0;
 748         uint_t evtype;
 749 
 750         mutex_enter(&ct->ct_lock);
 751         mutex_enter(&ct->ct_events.ctq_lock);
 752 
 753         for (ev = list_head(queue); ev; ev = list_next(queue, ev)) {
 754                 if (ev->cte_id == evid) {
 755                         if ((ev->cte_flags & (CTE_NEG | CTE_ACK)) == CTE_NEG) {
 756                                 evtype = ev->cte_type;
 757                                 nego = 1;
 758                         }
 759                         break;
 760                 }
 761         }
 762         mutex_exit(&ct->ct_events.ctq_lock);
 763         mutex_exit(&ct->ct_lock);
 764 
 765         /*
 766          * Only a negotiated event (which is by definition also a critical
 767          * event) which has not yet been acknowledged can provide
 768          * time quanta to a negotiating owner process.
 769          */
 770         if (!nego)
 771                 return (ESRCH);
 772 
 773         return (ct->ct_type->ct_type_ops->contop_qack(ct, evtype, evid));
 774 }
 775 
 776 /*
 777  * contract_orphan
 778  *
 779  * Icky-poo.  This is a process-contract special, used to ACK all
 780  * critical messages when a contract is orphaned.
 781  */
 782 void
 783 contract_orphan(contract_t *ct)
 784 {
 785         ct_kevent_t *ev;
 786         list_t *queue = &ct->ct_events.ctq_events;
 787 
 788         ASSERT(MUTEX_HELD(&ct->ct_lock));
 789         ASSERT(ct->ct_state != CTS_ORPHAN);
 790 
 791         mutex_enter(&ct->ct_events.ctq_lock);
 792         ct->ct_state = CTS_ORPHAN;
 793         for (ev = list_head(queue); ev; ev = list_next(queue, ev)) {
 794                 if ((ev->cte_flags & (CTE_INFO | CTE_ACK)) == 0) {
 795                         ev->cte_flags |= CTE_ACK;
 796                         ct->ct_evcnt--;
 797                 }
 798         }
 799         mutex_exit(&ct->ct_events.ctq_lock);
 800 
 801         ASSERT(ct->ct_evcnt == 0);
 802 }
 803 
 804 /*
 805  * contract_destroy
 806  *
 807  * Explicit contract destruction.  Called when contract is empty.
 808  * The contract will actually stick around until all of its events are
 809  * removed from the bundle and and process bundle queues, and all fds
 810  * which refer to it are closed.  See contract_dtor if you are looking
 811  * for what destroys the contract structure.
 812  */
 813 void
 814 contract_destroy(contract_t *ct)
 815 {
 816         ASSERT(MUTEX_HELD(&ct->ct_lock));
 817         ASSERT(ct->ct_state != CTS_DEAD);
 818         ASSERT(ct->ct_owner == NULL);
 819 
 820         ct->ct_state = CTS_DEAD;
 821         cte_queue_drain(&ct->ct_events, 1);
 822         mutex_exit(&ct->ct_lock);
 823         mutex_enter(&ct->ct_type->ct_type_events.ctq_lock);
 824         cte_trim(&ct->ct_type->ct_type_events, ct);
 825         mutex_exit(&ct->ct_type->ct_type_events.ctq_lock);
 826         mutex_enter(&ct->ct_lock);
 827         ct->ct_type->ct_type_ops->contop_destroy(ct);
 828         mutex_exit(&ct->ct_lock);
 829         contract_rele(ct);
 830 }
 831 
 832 /*
 833  * contract_vnode_get
 834  *
 835  * Obtains the contract directory vnode for this contract, if there is
 836  * one.  The caller must VN_RELE the vnode when they are through using
 837  * it.
 838  */
 839 vnode_t *
 840 contract_vnode_get(contract_t *ct, vfs_t *vfsp)
 841 {
 842         contract_vnode_t *ctv;
 843         vnode_t *vp = NULL;
 844 
 845         mutex_enter(&ct->ct_lock);
 846         for (ctv = list_head(&ct->ct_vnodes); ctv != NULL;
 847             ctv = list_next(&ct->ct_vnodes, ctv))
 848                 if (ctv->ctv_vnode->v_vfsp == vfsp) {
 849                         vp = ctv->ctv_vnode;
 850                         VN_HOLD(vp);
 851                         break;
 852                 }
 853         mutex_exit(&ct->ct_lock);
 854         return (vp);
 855 }
 856 
 857 /*
 858  * contract_vnode_set
 859  *
 860  * Sets the contract directory vnode for this contract.  We don't hold
 861  * a reference on the vnode because we don't want to prevent it from
 862  * being freed.  The vnode's inactive entry point will take care of
 863  * notifying us when it should be removed.
 864  */
 865 void
 866 contract_vnode_set(contract_t *ct, contract_vnode_t *ctv, vnode_t *vnode)
 867 {
 868         mutex_enter(&ct->ct_lock);
 869         ctv->ctv_vnode = vnode;
 870         list_insert_head(&ct->ct_vnodes, ctv);
 871         mutex_exit(&ct->ct_lock);
 872 }
 873 
 874 /*
 875  * contract_vnode_clear
 876  *
 877  * Removes this vnode as the contract directory vnode for this
 878  * contract.  Called from a contract directory's inactive entry point,
 879  * this may return 0 indicating that the vnode gained another reference
 880  * because of a simultaneous call to contract_vnode_get.
 881  */
 882 int
 883 contract_vnode_clear(contract_t *ct, contract_vnode_t *ctv)
 884 {
 885         vnode_t *vp = ctv->ctv_vnode;
 886         int result;
 887 
 888         mutex_enter(&ct->ct_lock);
 889         mutex_enter(&vp->v_lock);
 890         if (vp->v_count == 1) {
 891                 list_remove(&ct->ct_vnodes, ctv);
 892                 result = 1;
 893         } else {
 894                 vp->v_count--;
 895                 result = 0;
 896         }
 897         mutex_exit(&vp->v_lock);
 898         mutex_exit(&ct->ct_lock);
 899 
 900         return (result);
 901 }
 902 
 903 /*
 904  * contract_exit
 905  *
 906  * Abandons all contracts held by process p, and drains process p's
 907  * bundle queues.  Called on process exit.
 908  */
 909 void
 910 contract_exit(proc_t *p)
 911 {
 912         contract_t *ct;
 913         void *cookie = NULL;
 914         int i;
 915 
 916         ASSERT(p == curproc);
 917 
 918         /*
 919          * Abandon held contracts.  contract_abandon knows enough not
 920          * to remove the contract from the list a second time.  We are
 921          * exiting, so no locks are needed here.  But because
 922          * contract_abandon will take p_lock, we need to make sure we
 923          * aren't holding it.
 924          */
 925         ASSERT(MUTEX_NOT_HELD(&p->p_lock));
 926         while ((ct = avl_destroy_nodes(&p->p_ct_held, &cookie)) != NULL)
 927                 VERIFY(contract_abandon(ct, p, 0) == 0);
 928 
 929         /*
 930          * Drain pbundles.  Because a process bundle queue could have
 931          * been passed to another process, they may not be freed right
 932          * away.
 933          */
 934         if (p->p_ct_equeue) {
 935                 for (i = 0; i < CTT_MAXTYPE; i++)
 936                         if (p->p_ct_equeue[i])
 937                                 cte_queue_drain(p->p_ct_equeue[i], 0);
 938                 kmem_free(p->p_ct_equeue, CTT_MAXTYPE * sizeof (ct_equeue_t *));
 939         }
 940 }
 941 
 942 static int
 943 get_time_left(struct ct_time *t)
 944 {
 945         clock_t ticks_elapsed;
 946         int secs_elapsed;
 947 
 948         if (t->ctm_total == -1)
 949                 return (-1);
 950 
 951         ticks_elapsed = ddi_get_lbolt() - t->ctm_start;
 952         secs_elapsed = t->ctm_total - (drv_hztousec(ticks_elapsed)/MICROSEC);
 953         return (secs_elapsed > 0 ? secs_elapsed : 0);
 954 }
 955 
 956 /*
 957  * contract_status_common
 958  *
 959  * Populates a ct_status structure.  Used by contract types in their
 960  * status entry points and ctfs when only common information is
 961  * requested.
 962  */
 963 void
 964 contract_status_common(contract_t *ct, zone_t *zone, void *status,
 965     model_t model)
 966 {
 967         STRUCT_HANDLE(ct_status, lstatus);
 968 
 969         STRUCT_SET_HANDLE(lstatus, model, status);
 970         ASSERT(MUTEX_HELD(&ct->ct_lock));
 971         if (zone->zone_uniqid == GLOBAL_ZONEUNIQID ||
 972             zone->zone_uniqid == ct->ct_czuniqid) {
 973                 zone_t *czone;
 974                 zoneid_t zoneid = -1;
 975 
 976                 /*
 977                  * Contracts don't have holds on the zones they were
 978                  * created by.  If the contract's zone no longer
 979                  * exists, we say its zoneid is -1.
 980                  */
 981                 if (zone->zone_uniqid == ct->ct_czuniqid ||
 982                     ct->ct_czuniqid == GLOBAL_ZONEUNIQID) {
 983                         zoneid = ct->ct_zoneid;
 984                 } else if ((czone = zone_find_by_id(ct->ct_zoneid)) != NULL) {
 985                         if (czone->zone_uniqid == ct->ct_mzuniqid)
 986                                 zoneid = ct->ct_zoneid;
 987                         zone_rele(czone);
 988                 }
 989 
 990                 STRUCT_FSET(lstatus, ctst_zoneid, zoneid);
 991                 STRUCT_FSET(lstatus, ctst_holder,
 992                     (ct->ct_state == CTS_OWNED) ? ct->ct_owner->p_pid :
 993                     (ct->ct_state == CTS_INHERITED) ? ct->ct_regent->ct_id : 0);
 994                 STRUCT_FSET(lstatus, ctst_state, ct->ct_state);
 995         } else {
 996                 /*
 997                  * We are looking at a contract which was created by a
 998                  * process outside of our zone.  We provide fake zone,
 999                  * holder, and state information.
1000                  */
1001 
1002                 STRUCT_FSET(lstatus, ctst_zoneid, zone->zone_id);
1003                 /*
1004                  * Since "zone" can't disappear until the calling ctfs
1005                  * is unmounted, zone_zsched must be valid.
1006                  */
1007                 STRUCT_FSET(lstatus, ctst_holder, (ct->ct_state < CTS_ORPHAN) ?
1008                     zone->zone_zsched->p_pid : 0);
1009                 STRUCT_FSET(lstatus, ctst_state, (ct->ct_state < CTS_ORPHAN) ?
1010                     CTS_OWNED : ct->ct_state);
1011         }
1012         STRUCT_FSET(lstatus, ctst_nevents, ct->ct_evcnt);
1013         STRUCT_FSET(lstatus, ctst_ntime, get_time_left(&ct->ct_ntime));
1014         STRUCT_FSET(lstatus, ctst_qtime, get_time_left(&ct->ct_qtime));
1015         STRUCT_FSET(lstatus, ctst_nevid,
1016             ct->ct_nevent ? ct->ct_nevent->cte_id : 0);
1017         STRUCT_FSET(lstatus, ctst_critical, ct->ct_ev_crit);
1018         STRUCT_FSET(lstatus, ctst_informative, ct->ct_ev_info);
1019         STRUCT_FSET(lstatus, ctst_cookie, ct->ct_cookie);
1020         STRUCT_FSET(lstatus, ctst_type, ct->ct_type->ct_type_index);
1021         STRUCT_FSET(lstatus, ctst_id, ct->ct_id);
1022 }
1023 
1024 /*
1025  * contract_checkcred
1026  *
1027  * Determines if the specified contract is owned by a process with the
1028  * same effective uid as the specified credential.  The caller must
1029  * ensure that the uid spaces are the same.  Returns 1 on success.
1030  */
1031 static int
1032 contract_checkcred(contract_t *ct, const cred_t *cr)
1033 {
1034         proc_t *p;
1035         int fail = 1;
1036 
1037         mutex_enter(&ct->ct_lock);
1038         if ((p = ct->ct_owner) != NULL) {
1039                 mutex_enter(&p->p_crlock);
1040                 fail = crgetuid(cr) != crgetuid(p->p_cred);
1041                 mutex_exit(&p->p_crlock);
1042         }
1043         mutex_exit(&ct->ct_lock);
1044 
1045         return (!fail);
1046 }
1047 
1048 /*
1049  * contract_owned
1050  *
1051  * Determines if the specified credential can view an event generated
1052  * by the specified contract.  If locked is set, the contract's ct_lock
1053  * is held and the caller will need to do additional work to determine
1054  * if they truly can see the event.  Returns 1 on success.
1055  */
1056 int
1057 contract_owned(contract_t *ct, const cred_t *cr, int locked)
1058 {
1059         int owner, cmatch, zmatch;
1060         uint64_t zuniqid, mzuniqid;
1061         uid_t euid;
1062 
1063         ASSERT(locked || MUTEX_NOT_HELD(&ct->ct_lock));
1064 
1065         zuniqid = curproc->p_zone->zone_uniqid;
1066         mzuniqid = contract_getzuniqid(ct);
1067         euid = crgetuid(cr);
1068 
1069         /*
1070          * owner: we own the contract
1071          * cmatch: we are in the creator's (and holder's) zone and our
1072          *   uid matches the creator's or holder's
1073          * zmatch: we are in the effective zone of a contract created
1074          *   in the global zone, and our uid matches that of the
1075          *   virtualized holder's (zsched/kcred)
1076          */
1077         owner = (ct->ct_owner == curproc);
1078         cmatch = (zuniqid == ct->ct_czuniqid) &&
1079             ((ct->ct_cuid == euid) || (!locked && contract_checkcred(ct, cr)));
1080         zmatch = (ct->ct_czuniqid != mzuniqid) && (zuniqid == mzuniqid) &&
1081             (crgetuid(kcred) == euid);
1082 
1083         return (owner || cmatch || zmatch);
1084 }
1085 
1086 
1087 /*
1088  * contract_type_init
1089  *
1090  * Called by contract types to register themselves with the contracts
1091  * framework.
1092  */
1093 ct_type_t *
1094 contract_type_init(ct_typeid_t type, const char *name, contops_t *ops,
1095     ct_f_default_t *dfault)
1096 {
1097         ct_type_t *result;
1098 
1099         ASSERT(type < CTT_MAXTYPE);
1100 
1101         result = kmem_alloc(sizeof (ct_type_t), KM_SLEEP);
1102 
1103         mutex_init(&result->ct_type_lock, NULL, MUTEX_DEFAULT, NULL);
1104         avl_create(&result->ct_type_avl, contract_compar, sizeof (contract_t),
1105             offsetof(contract_t, ct_cttavl));
1106         cte_queue_create(&result->ct_type_events, CTEL_BUNDLE, 20, 0);
1107         result->ct_type_name = name;
1108         result->ct_type_ops = ops;
1109         result->ct_type_default = dfault;
1110         result->ct_type_evid = 0;
1111         gethrestime(&result->ct_type_timestruc);
1112         result->ct_type_index = type;
1113 
1114         ct_types[type] = result;
1115 
1116         return (result);
1117 }
1118 
1119 /*
1120  * contract_type_count
1121  *
1122  * Obtains the number of contracts of a particular type.
1123  */
1124 int
1125 contract_type_count(ct_type_t *type)
1126 {
1127         ulong_t count;
1128 
1129         mutex_enter(&type->ct_type_lock);
1130         count = avl_numnodes(&type->ct_type_avl);
1131         mutex_exit(&type->ct_type_lock);
1132 
1133         return (count);
1134 }
1135 
1136 /*
1137  * contract_type_max
1138  *
1139  * Obtains the maximum contract id of of a particular type.
1140  */
1141 ctid_t
1142 contract_type_max(ct_type_t *type)
1143 {
1144         contract_t *ct;
1145         ctid_t res;
1146 
1147         mutex_enter(&type->ct_type_lock);
1148         ct = avl_last(&type->ct_type_avl);
1149         res = ct ? ct->ct_id : -1;
1150         mutex_exit(&type->ct_type_lock);
1151 
1152         return (res);
1153 }
1154 
1155 /*
1156  * contract_max
1157  *
1158  * Obtains the maximum contract id.
1159  */
1160 ctid_t
1161 contract_max(void)
1162 {
1163         contract_t *ct;
1164         ctid_t res;
1165 
1166         mutex_enter(&contract_lock);
1167         ct = avl_last(&contract_avl);
1168         res = ct ? ct->ct_id : -1;
1169         mutex_exit(&contract_lock);
1170 
1171         return (res);
1172 }
1173 
1174 /*
1175  * contract_lookup_common
1176  *
1177  * Common code for contract_lookup and contract_type_lookup.  Takes a
1178  * pointer to an AVL tree to search in.  Should be called with the
1179  * appropriate tree-protecting lock held (unfortunately unassertable).
1180  */
1181 static ctid_t
1182 contract_lookup_common(avl_tree_t *tree, uint64_t zuniqid, ctid_t current)
1183 {
1184         contract_t template, *ct;
1185         avl_index_t where;
1186         ctid_t res;
1187 
1188         template.ct_id = current;
1189         ct = avl_find(tree, &template, &where);
1190         if (ct == NULL)
1191                 ct = avl_nearest(tree, where, AVL_AFTER);
1192         if (zuniqid != GLOBAL_ZONEUNIQID)
1193                 while (ct && (contract_getzuniqid(ct) != zuniqid))
1194                         ct = AVL_NEXT(tree, ct);
1195         res = ct ? ct->ct_id : -1;
1196 
1197         return (res);
1198 }
1199 
1200 /*
1201  * contract_type_lookup
1202  *
1203  * Returns the next type contract after the specified id, visible from
1204  * the specified zone.
1205  */
1206 ctid_t
1207 contract_type_lookup(ct_type_t *type, uint64_t zuniqid, ctid_t current)
1208 {
1209         ctid_t res;
1210 
1211         mutex_enter(&type->ct_type_lock);
1212         res = contract_lookup_common(&type->ct_type_avl, zuniqid, current);
1213         mutex_exit(&type->ct_type_lock);
1214 
1215         return (res);
1216 }
1217 
1218 /*
1219  * contract_lookup
1220  *
1221  * Returns the next contract after the specified id, visible from the
1222  * specified zone.
1223  */
1224 ctid_t
1225 contract_lookup(uint64_t zuniqid, ctid_t current)
1226 {
1227         ctid_t res;
1228 
1229         mutex_enter(&contract_lock);
1230         res = contract_lookup_common(&contract_avl, zuniqid, current);
1231         mutex_exit(&contract_lock);
1232 
1233         return (res);
1234 }
1235 
1236 /*
1237  * contract_plookup
1238  *
1239  * Returns the next contract held by process p after the specified id,
1240  * visible from the specified zone.  Made complicated by the fact that
1241  * contracts visible in a zone but held by processes outside of the
1242  * zone need to appear as being held by zsched to zone members.
1243  */
1244 ctid_t
1245 contract_plookup(proc_t *p, ctid_t current, uint64_t zuniqid)
1246 {
1247         contract_t template, *ct;
1248         avl_index_t where;
1249         ctid_t res;
1250 
1251         template.ct_id = current;
1252         if (zuniqid != GLOBAL_ZONEUNIQID &&
1253             (p->p_flag & (SSYS|SZONETOP)) == (SSYS|SZONETOP)) {
1254                 /* This is inelegant. */
1255                 mutex_enter(&contract_lock);
1256                 ct = avl_find(&contract_avl, &template, &where);
1257                 if (ct == NULL)
1258                         ct = avl_nearest(&contract_avl, where, AVL_AFTER);
1259                 while (ct && !(ct->ct_state < CTS_ORPHAN &&
1260                     contract_getzuniqid(ct) == zuniqid &&
1261                     ct->ct_czuniqid == GLOBAL_ZONEUNIQID))
1262                         ct = AVL_NEXT(&contract_avl, ct);
1263                 res = ct ? ct->ct_id : -1;
1264                 mutex_exit(&contract_lock);
1265         } else {
1266                 mutex_enter(&p->p_lock);
1267                 ct = avl_find(&p->p_ct_held, &template, &where);
1268                 if (ct == NULL)
1269                         ct = avl_nearest(&p->p_ct_held, where, AVL_AFTER);
1270                 res = ct ? ct->ct_id : -1;
1271                 mutex_exit(&p->p_lock);
1272         }
1273 
1274         return (res);
1275 }
1276 
1277 /*
1278  * contract_ptr_common
1279  *
1280  * Common code for contract_ptr and contract_type_ptr.  Takes a pointer
1281  * to an AVL tree to search in.  Should be called with the appropriate
1282  * tree-protecting lock held (unfortunately unassertable).
1283  */
1284 static contract_t *
1285 contract_ptr_common(avl_tree_t *tree, ctid_t id, uint64_t zuniqid)
1286 {
1287         contract_t template, *ct;
1288 
1289         template.ct_id = id;
1290         ct = avl_find(tree, &template, NULL);
1291         if (ct == NULL || (zuniqid != GLOBAL_ZONEUNIQID &&
1292             contract_getzuniqid(ct) != zuniqid)) {
1293                 return (NULL);
1294         }
1295 
1296         /*
1297          * Check to see if a thread is in the window in contract_rele
1298          * between dropping the reference count and removing the
1299          * contract from the type AVL.
1300          */
1301         mutex_enter(&ct->ct_reflock);
1302         if (ct->ct_ref) {
1303                 ct->ct_ref++;
1304                 mutex_exit(&ct->ct_reflock);
1305         } else {
1306                 mutex_exit(&ct->ct_reflock);
1307                 ct = NULL;
1308         }
1309 
1310         return (ct);
1311 }
1312 
1313 /*
1314  * contract_type_ptr
1315  *
1316  * Returns a pointer to the contract with the specified id.  The
1317  * contract is held, so the caller needs to release the reference when
1318  * it is through with the contract.
1319  */
1320 contract_t *
1321 contract_type_ptr(ct_type_t *type, ctid_t id, uint64_t zuniqid)
1322 {
1323         contract_t *ct;
1324 
1325         mutex_enter(&type->ct_type_lock);
1326         ct = contract_ptr_common(&type->ct_type_avl, id, zuniqid);
1327         mutex_exit(&type->ct_type_lock);
1328 
1329         return (ct);
1330 }
1331 
1332 /*
1333  * contract_ptr
1334  *
1335  * Returns a pointer to the contract with the specified id.  The
1336  * contract is held, so the caller needs to release the reference when
1337  * it is through with the contract.
1338  */
1339 contract_t *
1340 contract_ptr(ctid_t id, uint64_t zuniqid)
1341 {
1342         contract_t *ct;
1343 
1344         mutex_enter(&contract_lock);
1345         ct = contract_ptr_common(&contract_avl, id, zuniqid);
1346         mutex_exit(&contract_lock);
1347 
1348         return (ct);
1349 }
1350 
1351 /*
1352  * contract_type_time
1353  *
1354  * Obtains the last time a contract of a particular type was created.
1355  */
1356 void
1357 contract_type_time(ct_type_t *type, timestruc_t *time)
1358 {
1359         mutex_enter(&type->ct_type_lock);
1360         *time = type->ct_type_timestruc;
1361         mutex_exit(&type->ct_type_lock);
1362 }
1363 
1364 /*
1365  * contract_type_bundle
1366  *
1367  * Obtains a type's bundle queue.
1368  */
1369 ct_equeue_t *
1370 contract_type_bundle(ct_type_t *type)
1371 {
1372         return (&type->ct_type_events);
1373 }
1374 
1375 /*
1376  * contract_type_pbundle
1377  *
1378  * Obtain's a process's bundle queue.  If one doesn't exist, one is
1379  * created.  Often used simply to ensure that a bundle queue is
1380  * allocated.
1381  */
1382 ct_equeue_t *
1383 contract_type_pbundle(ct_type_t *type, proc_t *pp)
1384 {
1385         /*
1386          * If there isn't an array of bundle queues, allocate one.
1387          */
1388         if (pp->p_ct_equeue == NULL) {
1389                 size_t size = CTT_MAXTYPE * sizeof (ct_equeue_t *);
1390                 ct_equeue_t **qa = kmem_zalloc(size, KM_SLEEP);
1391 
1392                 mutex_enter(&pp->p_lock);
1393                 if (pp->p_ct_equeue)
1394                         kmem_free(qa, size);
1395                 else
1396                         pp->p_ct_equeue = qa;
1397                 mutex_exit(&pp->p_lock);
1398         }
1399 
1400         /*
1401          * If there isn't a bundle queue of the required type, allocate
1402          * one.
1403          */
1404         if (pp->p_ct_equeue[type->ct_type_index] == NULL) {
1405                 ct_equeue_t *q = kmem_zalloc(sizeof (ct_equeue_t), KM_SLEEP);
1406                 cte_queue_create(q, CTEL_PBUNDLE, 20, 1);
1407 
1408                 mutex_enter(&pp->p_lock);
1409                 if (pp->p_ct_equeue[type->ct_type_index])
1410                         cte_queue_drain(q, 0);
1411                 else
1412                         pp->p_ct_equeue[type->ct_type_index] = q;
1413                 mutex_exit(&pp->p_lock);
1414         }
1415 
1416         return (pp->p_ct_equeue[type->ct_type_index]);
1417 }
1418 
1419 /*
1420  * ctparam_copyin
1421  *
1422  * copyin a ct_param_t for CT_TSET or CT_TGET commands.
1423  * If ctparam_copyout() is not called after ctparam_copyin(), then
1424  * the caller must kmem_free() the buffer pointed by kparam->ctpm_kbuf.
1425  *
1426  * The copyin/out of ct_param_t is not done in ctmpl_set() and ctmpl_get()
1427  * because prctioctl() calls ctmpl_set() and ctmpl_get() while holding a
1428  * process lock.
1429  */
1430 int
1431 ctparam_copyin(const void *uaddr, ct_kparam_t *kparam, int flag, int cmd)
1432 {
1433         uint32_t size;
1434         void *ubuf;
1435         ct_param_t *param = &kparam->param;
1436         STRUCT_DECL(ct_param, uarg);
1437 
1438         STRUCT_INIT(uarg, flag);
1439         if (copyin(uaddr, STRUCT_BUF(uarg), STRUCT_SIZE(uarg)))
1440                 return (EFAULT);
1441         size = STRUCT_FGET(uarg, ctpm_size);
1442         ubuf = STRUCT_FGETP(uarg, ctpm_value);
1443 
1444         if (size > CT_PARAM_MAX_SIZE || size == 0)
1445                 return (EINVAL);
1446 
1447         kparam->ctpm_kbuf = kmem_alloc(size, KM_SLEEP);
1448         if (cmd == CT_TSET) {
1449                 if (copyin(ubuf, kparam->ctpm_kbuf, size)) {
1450                         kmem_free(kparam->ctpm_kbuf, size);
1451                         return (EFAULT);
1452                 }
1453         }
1454         param->ctpm_id = STRUCT_FGET(uarg, ctpm_id);
1455         param->ctpm_size = size;
1456         param->ctpm_value = ubuf;
1457         kparam->ret_size = 0;
1458 
1459         return (0);
1460 }
1461 
1462 /*
1463  * ctparam_copyout
1464  *
1465  * copyout a ct_kparam_t and frees the buffer pointed by the member
1466  * ctpm_kbuf of ct_kparam_t
1467  */
1468 int
1469 ctparam_copyout(ct_kparam_t *kparam, void *uaddr, int flag)
1470 {
1471         int r = 0;
1472         ct_param_t *param = &kparam->param;
1473         STRUCT_DECL(ct_param, uarg);
1474 
1475         STRUCT_INIT(uarg, flag);
1476 
1477         STRUCT_FSET(uarg, ctpm_id, param->ctpm_id);
1478         STRUCT_FSET(uarg, ctpm_size, kparam->ret_size);
1479         STRUCT_FSETP(uarg, ctpm_value, param->ctpm_value);
1480         if (copyout(STRUCT_BUF(uarg), uaddr, STRUCT_SIZE(uarg))) {
1481                 r = EFAULT;
1482                 goto error;
1483         }
1484         if (copyout(kparam->ctpm_kbuf, param->ctpm_value,
1485             MIN(kparam->ret_size, param->ctpm_size))) {
1486                 r = EFAULT;
1487         }
1488 
1489 error:
1490         kmem_free(kparam->ctpm_kbuf, param->ctpm_size);
1491 
1492         return (r);
1493 }
1494 
1495 /*
1496  * ctmpl_free
1497  *
1498  * Frees a template.
1499  */
1500 void
1501 ctmpl_free(ct_template_t *template)
1502 {
1503         mutex_destroy(&template->ctmpl_lock);
1504         template->ctmpl_ops->ctop_free(template);
1505 }
1506 
1507 /*
1508  * ctmpl_dup
1509  *
1510  * Creates a copy of a template.
1511  */
1512 ct_template_t *
1513 ctmpl_dup(ct_template_t *template)
1514 {
1515         ct_template_t *new;
1516 
1517         if (template == NULL)
1518                 return (NULL);
1519 
1520         new = template->ctmpl_ops->ctop_dup(template);
1521         /*
1522          * ctmpl_lock was taken by ctop_dup's call to ctmpl_copy and
1523          * should have remain held until now.
1524          */
1525         mutex_exit(&template->ctmpl_lock);
1526 
1527         return (new);
1528 }
1529 
1530 /*
1531  * ctmpl_set
1532  *
1533  * Sets the requested terms of a template.
1534  */
1535 int
1536 ctmpl_set(ct_template_t *template, ct_kparam_t *kparam, const cred_t *cr)
1537 {
1538         int result = 0;
1539         ct_param_t *param = &kparam->param;
1540         uint64_t param_value;
1541 
1542         if (param->ctpm_id == CTP_COOKIE ||
1543             param->ctpm_id == CTP_EV_INFO ||
1544             param->ctpm_id == CTP_EV_CRITICAL) {
1545                 if (param->ctpm_size < sizeof (uint64_t)) {
1546                         return (EINVAL);
1547                 } else {
1548                         param_value = *(uint64_t *)kparam->ctpm_kbuf;
1549                 }
1550         }
1551 
1552         mutex_enter(&template->ctmpl_lock);
1553         switch (param->ctpm_id) {
1554         case CTP_COOKIE:
1555                 template->ctmpl_cookie = param_value;
1556                 break;
1557         case CTP_EV_INFO:
1558                 if (param_value & ~(uint64_t)template->ctmpl_ops->allevents)
1559                         result = EINVAL;
1560                 else
1561                         template->ctmpl_ev_info = param_value;
1562                 break;
1563         case CTP_EV_CRITICAL:
1564                 if (param_value & ~(uint64_t)template->ctmpl_ops->allevents) {
1565                         result = EINVAL;
1566                         break;
1567                 } else if ((~template->ctmpl_ev_crit & param_value) == 0) {
1568                         /*
1569                          * Assume that a pure reduction of the critical
1570                          * set is allowed by the contract type.
1571                          */
1572                         template->ctmpl_ev_crit = param_value;
1573                         break;
1574                 }
1575                 /*
1576                  * There may be restrictions on what we can make
1577                  * critical, so we defer to the judgement of the
1578                  * contract type.
1579                  */
1580                 /* FALLTHROUGH */
1581         default:
1582                 result = template->ctmpl_ops->ctop_set(template, kparam, cr);
1583         }
1584         mutex_exit(&template->ctmpl_lock);
1585 
1586         return (result);
1587 }
1588 
1589 /*
1590  * ctmpl_get
1591  *
1592  * Obtains the requested terms from a template.
1593  *
1594  * If the term requested is a variable-sized term and the buffer
1595  * provided is too small for the data, we truncate the data and return
1596  * the buffer size necessary to fit the term in kparam->ret_size. If the
1597  * term requested is fix-sized (uint64_t) and the buffer provided is too
1598  * small, we return EINVAL.  This should never happen if you're using
1599  * libcontract(3LIB), only if you call ioctl with a hand constructed
1600  * ct_param_t argument.
1601  *
1602  * Currently, only contract specific parameters have variable-sized
1603  * parameters.
1604  */
1605 int
1606 ctmpl_get(ct_template_t *template, ct_kparam_t *kparam)
1607 {
1608         int result = 0;
1609         ct_param_t *param = &kparam->param;
1610         uint64_t *param_value;
1611 
1612         if (param->ctpm_id == CTP_COOKIE ||
1613             param->ctpm_id == CTP_EV_INFO ||
1614             param->ctpm_id == CTP_EV_CRITICAL) {
1615                 if (param->ctpm_size < sizeof (uint64_t)) {
1616                         return (EINVAL);
1617                 } else {
1618                         param_value = kparam->ctpm_kbuf;
1619                         kparam->ret_size = sizeof (uint64_t);
1620                 }
1621         }
1622 
1623         mutex_enter(&template->ctmpl_lock);
1624         switch (param->ctpm_id) {
1625         case CTP_COOKIE:
1626                 *param_value = template->ctmpl_cookie;
1627                 break;
1628         case CTP_EV_INFO:
1629                 *param_value = template->ctmpl_ev_info;
1630                 break;
1631         case CTP_EV_CRITICAL:
1632                 *param_value = template->ctmpl_ev_crit;
1633                 break;
1634         default:
1635                 result = template->ctmpl_ops->ctop_get(template, kparam);
1636         }
1637         mutex_exit(&template->ctmpl_lock);
1638 
1639         return (result);
1640 }
1641 
1642 /*
1643  * ctmpl_makecurrent
1644  *
1645  * Used by ctmpl_activate and ctmpl_clear to set the current thread's
1646  * active template.  Frees the old active template, if there was one.
1647  */
1648 static void
1649 ctmpl_makecurrent(ct_template_t *template, ct_template_t *new)
1650 {
1651         klwp_t *curlwp = ttolwp(curthread);
1652         proc_t *p = curproc;
1653         ct_template_t *old;
1654 
1655         mutex_enter(&p->p_lock);
1656         old = curlwp->lwp_ct_active[template->ctmpl_type->ct_type_index];
1657         curlwp->lwp_ct_active[template->ctmpl_type->ct_type_index] = new;
1658         mutex_exit(&p->p_lock);
1659 
1660         if (old)
1661                 ctmpl_free(old);
1662 }
1663 
1664 /*
1665  * ctmpl_activate
1666  *
1667  * Copy the specified template as the current thread's activate
1668  * template of that type.
1669  */
1670 void
1671 ctmpl_activate(ct_template_t *template)
1672 {
1673         ctmpl_makecurrent(template, ctmpl_dup(template));
1674 }
1675 
1676 /*
1677  * ctmpl_clear
1678  *
1679  * Clears the current thread's activate template of the same type as
1680  * the specified template.
1681  */
1682 void
1683 ctmpl_clear(ct_template_t *template)
1684 {
1685         ctmpl_makecurrent(template, NULL);
1686 }
1687 
1688 /*
1689  * ctmpl_create
1690  *
1691  * Creates a new contract using the specified template.
1692  */
1693 int
1694 ctmpl_create(ct_template_t *template, ctid_t *ctidp)
1695 {
1696         return (template->ctmpl_ops->ctop_create(template, ctidp));
1697 }
1698 
1699 /*
1700  * ctmpl_init
1701  *
1702  * Initializes the common portion of a new contract template.
1703  */
1704 void
1705 ctmpl_init(ct_template_t *new, ctmplops_t *ops, ct_type_t *type, void *data)
1706 {
1707         mutex_init(&new->ctmpl_lock, NULL, MUTEX_DEFAULT, NULL);
1708         new->ctmpl_ops = ops;
1709         new->ctmpl_type = type;
1710         new->ctmpl_data = data;
1711         new->ctmpl_ev_info = new->ctmpl_ev_crit = 0;
1712         new->ctmpl_cookie = 0;
1713 }
1714 
1715 /*
1716  * ctmpl_copy
1717  *
1718  * Copies the common portions of a contract template.  Intended for use
1719  * by a contract type's ctop_dup template op.  Returns with the old
1720  * template's lock held, which will should remain held until the
1721  * template op returns (it is dropped by ctmpl_dup).
1722  */
1723 void
1724 ctmpl_copy(ct_template_t *new, ct_template_t *old)
1725 {
1726         mutex_init(&new->ctmpl_lock, NULL, MUTEX_DEFAULT, NULL);
1727         mutex_enter(&old->ctmpl_lock);
1728         new->ctmpl_ops = old->ctmpl_ops;
1729         new->ctmpl_type = old->ctmpl_type;
1730         new->ctmpl_ev_crit = old->ctmpl_ev_crit;
1731         new->ctmpl_ev_info = old->ctmpl_ev_info;
1732         new->ctmpl_cookie = old->ctmpl_cookie;
1733 }
1734 
1735 /*
1736  * ctmpl_create_inval
1737  *
1738  * Returns EINVAL.  Provided for the convenience of those contract
1739  * types which don't support ct_tmpl_create(3contract) and would
1740  * otherwise need to create their own stub for the ctop_create template
1741  * op.
1742  */
1743 /*ARGSUSED*/
1744 int
1745 ctmpl_create_inval(ct_template_t *template, ctid_t *ctidp)
1746 {
1747         return (EINVAL);
1748 }
1749 
1750 
1751 /*
1752  * cte_queue_create
1753  *
1754  * Initializes a queue of a particular type.  If dynamic is set, the
1755  * queue is to be freed when its last listener is removed after being
1756  * drained.
1757  */
1758 static void
1759 cte_queue_create(ct_equeue_t *q, ct_listnum_t list, int maxinf, int dynamic)
1760 {
1761         mutex_init(&q->ctq_lock, NULL, MUTEX_DEFAULT, NULL);
1762         q->ctq_listno = list;
1763         list_create(&q->ctq_events, sizeof (ct_kevent_t),
1764             offsetof(ct_kevent_t, cte_nodes[list].ctm_node));
1765         list_create(&q->ctq_listeners, sizeof (ct_listener_t),
1766             offsetof(ct_listener_t, ctl_allnode));
1767         list_create(&q->ctq_tail, sizeof (ct_listener_t),
1768             offsetof(ct_listener_t, ctl_tailnode));
1769         gethrestime(&q->ctq_atime);
1770         q->ctq_nlisteners = 0;
1771         q->ctq_nreliable = 0;
1772         q->ctq_ninf = 0;
1773         q->ctq_max = maxinf;
1774 
1775         /*
1776          * Bundle queues and contract queues are embedded in other
1777          * structures and are implicitly referenced counted by virtue
1778          * of their vnodes' indirect hold on their contracts.  Process
1779          * bundle queues are dynamically allocated and may persist
1780          * after the death of the process, so they must be explicitly
1781          * reference counted.
1782          */
1783         q->ctq_flags = dynamic ? CTQ_REFFED : 0;
1784 }
1785 
1786 /*
1787  * cte_queue_destroy
1788  *
1789  * Destroys the specified queue.  The queue is freed if referenced
1790  * counted.
1791  */
1792 static void
1793 cte_queue_destroy(ct_equeue_t *q)
1794 {
1795         ASSERT(q->ctq_flags & CTQ_DEAD);
1796         ASSERT(q->ctq_nlisteners == 0);
1797         ASSERT(q->ctq_nreliable == 0);
1798         list_destroy(&q->ctq_events);
1799         list_destroy(&q->ctq_listeners);
1800         list_destroy(&q->ctq_tail);
1801         mutex_destroy(&q->ctq_lock);
1802         if (q->ctq_flags & CTQ_REFFED)
1803                 kmem_free(q, sizeof (ct_equeue_t));
1804 }
1805 
1806 /*
1807  * cte_hold
1808  *
1809  * Takes a hold on the specified event.
1810  */
1811 static void
1812 cte_hold(ct_kevent_t *e)
1813 {
1814         mutex_enter(&e->cte_lock);
1815         ASSERT(e->cte_refs > 0);
1816         e->cte_refs++;
1817         mutex_exit(&e->cte_lock);
1818 }
1819 
1820 /*
1821  * cte_rele
1822  *
1823  * Releases a hold on the specified event.  If the caller had the last
1824  * reference, frees the event and releases its hold on the contract
1825  * that generated it.
1826  */
1827 static void
1828 cte_rele(ct_kevent_t *e)
1829 {
1830         mutex_enter(&e->cte_lock);
1831         ASSERT(e->cte_refs > 0);
1832         if (--e->cte_refs) {
1833                 mutex_exit(&e->cte_lock);
1834                 return;
1835         }
1836 
1837         contract_rele(e->cte_contract);
1838 
1839         mutex_destroy(&e->cte_lock);
1840         nvlist_free(e->cte_data);
1841         nvlist_free(e->cte_gdata);
1842         kmem_free(e, sizeof (ct_kevent_t));
1843 }
1844 
1845 /*
1846  * cte_qrele
1847  *
1848  * Remove this listener's hold on the specified event, removing and
1849  * releasing the queue's hold on the event if appropriate.
1850  */
1851 static void
1852 cte_qrele(ct_equeue_t *q, ct_listener_t *l, ct_kevent_t *e)
1853 {
1854         ct_member_t *member = &e->cte_nodes[q->ctq_listno];
1855 
1856         ASSERT(MUTEX_HELD(&q->ctq_lock));
1857 
1858         if (l->ctl_flags & CTLF_RELIABLE)
1859                 member->ctm_nreliable--;
1860         if ((--member->ctm_refs == 0) && member->ctm_trimmed) {
1861                 member->ctm_trimmed = 0;
1862                 list_remove(&q->ctq_events, e);
1863                 cte_rele(e);
1864         }
1865 }
1866 
1867 /*
1868  * cte_qmove
1869  *
1870  * Move this listener to the specified event in the queue.
1871  */
1872 static ct_kevent_t *
1873 cte_qmove(ct_equeue_t *q, ct_listener_t *l, ct_kevent_t *e)
1874 {
1875         ct_kevent_t *olde;
1876 
1877         ASSERT(MUTEX_HELD(&q->ctq_lock));
1878         ASSERT(l->ctl_equeue == q);
1879 
1880         if ((olde = l->ctl_position) == NULL)
1881                 list_remove(&q->ctq_tail, l);
1882 
1883         while (e != NULL && e->cte_nodes[q->ctq_listno].ctm_trimmed)
1884                 e = list_next(&q->ctq_events, e);
1885 
1886         if (e != NULL) {
1887                 e->cte_nodes[q->ctq_listno].ctm_refs++;
1888                 if (l->ctl_flags & CTLF_RELIABLE)
1889                         e->cte_nodes[q->ctq_listno].ctm_nreliable++;
1890         } else {
1891                 list_insert_tail(&q->ctq_tail, l);
1892         }
1893 
1894         l->ctl_position = e;
1895         if (olde)
1896                 cte_qrele(q, l, olde);
1897 
1898         return (e);
1899 }
1900 
1901 /*
1902  * cte_checkcred
1903  *
1904  * Determines if the specified event's contract is owned by a process
1905  * with the same effective uid as the specified credential.  Called
1906  * after a failed call to contract_owned with locked set.  Because it
1907  * drops the queue lock, its caller (cte_qreadable) needs to make sure
1908  * we're still in the same place after we return.  Returns 1 on
1909  * success.
1910  */
1911 static int
1912 cte_checkcred(ct_equeue_t *q, ct_kevent_t *e, const cred_t *cr)
1913 {
1914         int result;
1915         contract_t *ct = e->cte_contract;
1916 
1917         cte_hold(e);
1918         mutex_exit(&q->ctq_lock);
1919         result = curproc->p_zone->zone_uniqid == ct->ct_czuniqid &&
1920             contract_checkcred(ct, cr);
1921         mutex_enter(&q->ctq_lock);
1922         cte_rele(e);
1923 
1924         return (result);
1925 }
1926 
1927 /*
1928  * cte_qreadable
1929  *
1930  * Ensures that the listener is pointing to a valid event that the
1931  * caller has the credentials to read.  Returns 0 if we can read the
1932  * event we're pointing to.
1933  */
1934 static int
1935 cte_qreadable(ct_equeue_t *q, ct_listener_t *l, const cred_t *cr,
1936     uint64_t zuniqid, int crit)
1937 {
1938         ct_kevent_t *e, *next;
1939         contract_t *ct;
1940 
1941         ASSERT(MUTEX_HELD(&q->ctq_lock));
1942         ASSERT(l->ctl_equeue == q);
1943 
1944         if (l->ctl_flags & CTLF_COPYOUT)
1945                 return (1);
1946 
1947         next = l->ctl_position;
1948         while (e = cte_qmove(q, l, next)) {
1949                 ct = e->cte_contract;
1950                 /*
1951                  * Check obvious things first.  If we are looking for a
1952                  * critical message, is this one?  If we aren't in the
1953                  * global zone, is this message meant for us?
1954                  */
1955                 if ((crit && (e->cte_flags & (CTE_INFO | CTE_ACK))) ||
1956                     (cr != NULL && zuniqid != GLOBAL_ZONEUNIQID &&
1957                     zuniqid != contract_getzuniqid(ct))) {
1958 
1959                         next = list_next(&q->ctq_events, e);
1960 
1961                 /*
1962                  * Next, see if our effective uid equals that of owner
1963                  * or author of the contract.  Since we are holding the
1964                  * queue lock, contract_owned can't always check if we
1965                  * have the same effective uid as the contract's
1966                  * owner.  If it comes to that, it fails and we take
1967                  * the slow(er) path.
1968                  */
1969                 } else if (cr != NULL && !contract_owned(ct, cr, B_TRUE)) {
1970 
1971                         /*
1972                          * At this point we either don't have any claim
1973                          * to this contract or we match the effective
1974                          * uid of the owner but couldn't tell.  We
1975                          * first test for a NULL holder so that events
1976                          * from orphans and inherited contracts avoid
1977                          * the penalty phase.
1978                          */
1979                         if (e->cte_contract->ct_owner == NULL &&
1980                             !secpolicy_contract_observer_choice(cr))
1981                                 next = list_next(&q->ctq_events, e);
1982 
1983                         /*
1984                          * cte_checkcred will juggle locks to see if we
1985                          * have the same uid as the event's contract's
1986                          * current owner.  If it succeeds, we have to
1987                          * make sure we are in the same point in the
1988                          * queue.
1989                          */
1990                         else if (cte_checkcred(q, e, cr) &&
1991                             l->ctl_position == e)
1992                                 break;
1993 
1994                         /*
1995                          * cte_checkcred failed; see if we're in the
1996                          * same place.
1997                          */
1998                         else if (l->ctl_position == e)
1999                                 if (secpolicy_contract_observer_choice(cr))
2000                                         break;
2001                                 else
2002                                         next = list_next(&q->ctq_events, e);
2003 
2004                         /*
2005                          * cte_checkcred failed, and our position was
2006                          * changed.  Start from there.
2007                          */
2008                         else
2009                                 next = l->ctl_position;
2010                 } else {
2011                         break;
2012                 }
2013         }
2014 
2015         /*
2016          * We check for CTLF_COPYOUT again in case we dropped the queue
2017          * lock in cte_checkcred.
2018          */
2019         return ((l->ctl_flags & CTLF_COPYOUT) || (l->ctl_position == NULL));
2020 }
2021 
2022 /*
2023  * cte_qwakeup
2024  *
2025  * Wakes up any waiting listeners and points them at the specified event.
2026  */
2027 static void
2028 cte_qwakeup(ct_equeue_t *q, ct_kevent_t *e)
2029 {
2030         ct_listener_t *l;
2031 
2032         ASSERT(MUTEX_HELD(&q->ctq_lock));
2033 
2034         while (l = list_head(&q->ctq_tail)) {
2035                 list_remove(&q->ctq_tail, l);
2036                 e->cte_nodes[q->ctq_listno].ctm_refs++;
2037                 if (l->ctl_flags & CTLF_RELIABLE)
2038                         e->cte_nodes[q->ctq_listno].ctm_nreliable++;
2039                 l->ctl_position = e;
2040                 cv_signal(&l->ctl_cv);
2041                 pollwakeup(&l->ctl_pollhead, POLLIN);
2042         }
2043 }
2044 
2045 /*
2046  * cte_copy
2047  *
2048  * Copies events from the specified contract event queue to the
2049  * end of the specified process bundle queue.  Only called from
2050  * contract_adopt.
2051  *
2052  * We copy to the end of the target queue instead of mixing the events
2053  * in their proper order because otherwise the act of adopting a
2054  * contract would require a process to reset all process bundle
2055  * listeners it needed to see the new events.  This would, in turn,
2056  * require the process to keep track of which preexisting events had
2057  * already been processed.
2058  */
2059 static void
2060 cte_copy(ct_equeue_t *q, ct_equeue_t *newq)
2061 {
2062         ct_kevent_t *e, *first = NULL;
2063 
2064         VERIFY(q->ctq_listno == CTEL_CONTRACT);
2065         VERIFY(newq->ctq_listno == CTEL_PBUNDLE);
2066 
2067         mutex_enter(&q->ctq_lock);
2068         mutex_enter(&newq->ctq_lock);
2069 
2070         /*
2071          * For now, only copy critical events.
2072          */
2073         for (e = list_head(&q->ctq_events); e != NULL;
2074             e = list_next(&q->ctq_events, e)) {
2075                 if ((e->cte_flags & (CTE_INFO | CTE_ACK)) == 0) {
2076                         if (first == NULL)
2077                                 first = e;
2078                         /*
2079                          * It is possible for adoption to race with an owner's
2080                          * cte_publish_all(); we must only enqueue events that
2081                          * have not already been enqueued.
2082                          */
2083                         if (!list_link_active((list_node_t *)
2084                             ((uintptr_t)e + newq->ctq_events.list_offset))) {
2085                                 list_insert_tail(&newq->ctq_events, e);
2086                                 cte_hold(e);
2087                         }
2088                 }
2089         }
2090 
2091         mutex_exit(&q->ctq_lock);
2092 
2093         if (first)
2094                 cte_qwakeup(newq, first);
2095 
2096         mutex_exit(&newq->ctq_lock);
2097 }
2098 
2099 /*
2100  * cte_trim
2101  *
2102  * Trims unneeded events from an event queue.  Algorithm works as
2103  * follows:
2104  *
2105  *   Removes all informative and acknowledged critical events until the
2106  *   first referenced event is found.
2107  *
2108  *   If a contract is specified, removes all events (regardless of
2109  *   acknowledgement) generated by that contract until the first event
2110  *   referenced by a reliable listener is found.  Reference events are
2111  *   removed by marking them "trimmed".  Such events will be removed
2112  *   when the last reference is dropped and will be skipped by future
2113  *   listeners.
2114  *
2115  * This is pretty basic.  Ideally this should remove from the middle of
2116  * the list (i.e. beyond the first referenced event), and even
2117  * referenced events.
2118  */
2119 static void
2120 cte_trim(ct_equeue_t *q, contract_t *ct)
2121 {
2122         ct_kevent_t *e, *next;
2123         int flags, stopper;
2124         int start = 1;
2125 
2126         VERIFY(MUTEX_HELD(&q->ctq_lock));
2127 
2128         for (e = list_head(&q->ctq_events); e != NULL; e = next) {
2129                 next = list_next(&q->ctq_events, e);
2130                 flags = e->cte_flags;
2131                 stopper = (q->ctq_listno != CTEL_PBUNDLE) &&
2132                     (e->cte_nodes[q->ctq_listno].ctm_nreliable > 0);
2133                 if (e->cte_nodes[q->ctq_listno].ctm_refs == 0) {
2134                         if ((start && (flags & (CTE_INFO | CTE_ACK))) ||
2135                             (e->cte_contract == ct)) {
2136                                 /*
2137                                  * Toss informative and ACKed critical messages.
2138                                  */
2139                                 list_remove(&q->ctq_events, e);
2140                                 cte_rele(e);
2141                         }
2142                 } else if ((e->cte_contract == ct) && !stopper) {
2143                         ASSERT(q->ctq_nlisteners != 0);
2144                         e->cte_nodes[q->ctq_listno].ctm_trimmed = 1;
2145                 } else if (ct && !stopper) {
2146                         start = 0;
2147                 } else {
2148                         /*
2149                          * Don't free messages past the first reader.
2150                          */
2151                         break;
2152                 }
2153         }
2154 }
2155 
2156 /*
2157  * cte_queue_drain
2158  *
2159  * Drain all events from the specified queue, and mark it dead.  If
2160  * "ack" is set, acknowledge any critical events we find along the
2161  * way.
2162  */
2163 static void
2164 cte_queue_drain(ct_equeue_t *q, int ack)
2165 {
2166         ct_kevent_t *e, *next;
2167         ct_listener_t *l;
2168 
2169         mutex_enter(&q->ctq_lock);
2170 
2171         for (e = list_head(&q->ctq_events); e != NULL; e = next) {
2172                 next = list_next(&q->ctq_events, e);
2173                 if (ack && ((e->cte_flags & (CTE_INFO | CTE_ACK)) == 0)) {
2174                         /*
2175                          * Make sure critical messages are eventually
2176                          * removed from the bundle queues.
2177                          */
2178                         mutex_enter(&e->cte_lock);
2179                         e->cte_flags |= CTE_ACK;
2180                         mutex_exit(&e->cte_lock);
2181                         ASSERT(MUTEX_HELD(&e->cte_contract->ct_lock));
2182                         e->cte_contract->ct_evcnt--;
2183                 }
2184                 list_remove(&q->ctq_events, e);
2185                 e->cte_nodes[q->ctq_listno].ctm_refs = 0;
2186                 e->cte_nodes[q->ctq_listno].ctm_nreliable = 0;
2187                 e->cte_nodes[q->ctq_listno].ctm_trimmed = 0;
2188                 cte_rele(e);
2189         }
2190 
2191         /*
2192          * This is necessary only because of CTEL_PBUNDLE listeners;
2193          * the events they point to can move from one pbundle to
2194          * another.  Fortunately, this only happens if the contract is
2195          * inherited, which (in turn) only happens if the process
2196          * exits, which means it's an all-or-nothing deal.  If this
2197          * wasn't the case, we would instead need to keep track of
2198          * listeners on a per-event basis, not just a per-queue basis.
2199          * This would have the side benefit of letting us clean up
2200          * trimmed events sooner (i.e. immediately), but would
2201          * unfortunately make events even bigger than they already
2202          * are.
2203          */
2204         for (l = list_head(&q->ctq_listeners); l;
2205             l = list_next(&q->ctq_listeners, l)) {
2206                 l->ctl_flags |= CTLF_DEAD;
2207                 if (l->ctl_position) {
2208                         l->ctl_position = NULL;
2209                         list_insert_tail(&q->ctq_tail, l);
2210                 }
2211                 cv_broadcast(&l->ctl_cv);
2212         }
2213 
2214         /*
2215          * Disallow events.
2216          */
2217         q->ctq_flags |= CTQ_DEAD;
2218 
2219         /*
2220          * If we represent the last reference to a reference counted
2221          * process bundle queue, free it.
2222          */
2223         if ((q->ctq_flags & CTQ_REFFED) && (q->ctq_nlisteners == 0))
2224                 cte_queue_destroy(q);
2225         else
2226                 mutex_exit(&q->ctq_lock);
2227 }
2228 
2229 /*
2230  * cte_publish
2231  *
2232  * Publishes an event to a specific queue.  Only called by
2233  * cte_publish_all.
2234  */
2235 static void
2236 cte_publish(ct_equeue_t *q, ct_kevent_t *e, timespec_t *tsp, boolean_t mayexist)
2237 {
2238         ASSERT(MUTEX_HELD(&q->ctq_lock));
2239 
2240         q->ctq_atime = *tsp;
2241 
2242         /*
2243          * If this event may already exist on this queue, check to see if it
2244          * is already there and return if so.
2245          */
2246         if (mayexist && list_link_active((list_node_t *)((uintptr_t)e +
2247             q->ctq_events.list_offset))) {
2248                 mutex_exit(&q->ctq_lock);
2249                 cte_rele(e);
2250                 return;
2251         }
2252 
2253         /*
2254          * Don't publish if the event is informative and there aren't
2255          * any listeners, or if the queue has been shut down.
2256          */
2257         if (((q->ctq_nlisteners == 0) && (e->cte_flags & (CTE_INFO|CTE_ACK))) ||
2258             (q->ctq_flags & CTQ_DEAD)) {
2259                 mutex_exit(&q->ctq_lock);
2260                 cte_rele(e);
2261                 return;
2262         }
2263 
2264         /*
2265          * Enqueue event
2266          */
2267         VERIFY(!list_link_active((list_node_t *)
2268             ((uintptr_t)e + q->ctq_events.list_offset)));
2269         list_insert_tail(&q->ctq_events, e);
2270 
2271         /*
2272          * Check for waiting listeners
2273          */
2274         cte_qwakeup(q, e);
2275 
2276         /*
2277          * Trim unnecessary events from the queue.
2278          */
2279         cte_trim(q, NULL);
2280         mutex_exit(&q->ctq_lock);
2281 }
2282 
2283 /*
2284  * cte_publish_all
2285  *
2286  * Publish an event to all necessary event queues.  The event, e, must
2287  * be zallocated by the caller, and the event's flags and type must be
2288  * set.  The rest of the event's fields are initialized here.
2289  */
2290 uint64_t
2291 cte_publish_all(contract_t *ct, ct_kevent_t *e, nvlist_t *data, nvlist_t *gdata)
2292 {
2293         ct_equeue_t *q;
2294         timespec_t ts;
2295         uint64_t evid;
2296         ct_kevent_t *negev;
2297         int negend;
2298 
2299         e->cte_contract = ct;
2300         e->cte_data = data;
2301         e->cte_gdata = gdata;
2302         e->cte_refs = 3;
2303         evid = e->cte_id = atomic_inc_64_nv(&ct->ct_type->ct_type_evid);
2304         contract_hold(ct);
2305 
2306         /*
2307          * For a negotiation event we set the ct->ct_nevent field of the
2308          * contract for the duration of the negotiation
2309          */
2310         negend = 0;
2311         if (e->cte_flags & CTE_NEG) {
2312                 cte_hold(e);
2313                 ct->ct_nevent = e;
2314         } else if (e->cte_type == CT_EV_NEGEND) {
2315                 negend = 1;
2316         }
2317 
2318         gethrestime(&ts);
2319 
2320         /*
2321          * ct_evtlock simply (and only) ensures that two events sent
2322          * from the same contract are delivered to all queues in the
2323          * same order.
2324          */
2325         mutex_enter(&ct->ct_evtlock);
2326 
2327         /*
2328          * CTEL_CONTRACT - First deliver to the contract queue, acking
2329          * the event if the contract has been orphaned.
2330          */
2331         mutex_enter(&ct->ct_lock);
2332         mutex_enter(&ct->ct_events.ctq_lock);
2333         if ((e->cte_flags & CTE_INFO) == 0) {
2334                 if (ct->ct_state >= CTS_ORPHAN)
2335                         e->cte_flags |= CTE_ACK;
2336                 else
2337                         ct->ct_evcnt++;
2338         }
2339         mutex_exit(&ct->ct_lock);
2340         cte_publish(&ct->ct_events, e, &ts, B_FALSE);
2341 
2342         /*
2343          * CTEL_BUNDLE - Next deliver to the contract type's bundle
2344          * queue.
2345          */
2346         mutex_enter(&ct->ct_type->ct_type_events.ctq_lock);
2347         cte_publish(&ct->ct_type->ct_type_events, e, &ts, B_FALSE);
2348 
2349         /*
2350          * CTEL_PBUNDLE - Finally, if the contract has an owner,
2351          * deliver to the owner's process bundle queue.
2352          */
2353         mutex_enter(&ct->ct_lock);
2354         if (ct->ct_owner) {
2355                 /*
2356                  * proc_exit doesn't free event queues until it has
2357                  * abandoned all contracts.
2358                  */
2359                 ASSERT(ct->ct_owner->p_ct_equeue);
2360                 ASSERT(ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index]);
2361                 q = ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index];
2362                 mutex_enter(&q->ctq_lock);
2363                 mutex_exit(&ct->ct_lock);
2364 
2365                 /*
2366                  * It is possible for this code to race with adoption; we
2367                  * publish the event indicating that the event may already
2368                  * be enqueued because adoption beat us to it (in which case
2369                  * cte_pubish() does nothing).
2370                  */
2371                 cte_publish(q, e, &ts, B_TRUE);
2372         } else {
2373                 mutex_exit(&ct->ct_lock);
2374                 cte_rele(e);
2375         }
2376 
2377         if (negend) {
2378                 mutex_enter(&ct->ct_lock);
2379                 negev = ct->ct_nevent;
2380                 ct->ct_nevent = NULL;
2381                 cte_rele(negev);
2382                 mutex_exit(&ct->ct_lock);
2383         }
2384 
2385         mutex_exit(&ct->ct_evtlock);
2386 
2387         return (evid);
2388 }
2389 
2390 /*
2391  * cte_add_listener
2392  *
2393  * Add a new listener to an event queue.
2394  */
2395 void
2396 cte_add_listener(ct_equeue_t *q, ct_listener_t *l)
2397 {
2398         cv_init(&l->ctl_cv, NULL, CV_DEFAULT, NULL);
2399         l->ctl_equeue = q;
2400         l->ctl_position = NULL;
2401         l->ctl_flags = 0;
2402 
2403         mutex_enter(&q->ctq_lock);
2404         list_insert_head(&q->ctq_tail, l);
2405         list_insert_head(&q->ctq_listeners, l);
2406         q->ctq_nlisteners++;
2407         mutex_exit(&q->ctq_lock);
2408 }
2409 
2410 /*
2411  * cte_remove_listener
2412  *
2413  * Remove a listener from an event queue.  No other queue activities
2414  * (e.g. cte_get event) may be in progress at this endpoint when this
2415  * is called.
2416  */
2417 void
2418 cte_remove_listener(ct_listener_t *l)
2419 {
2420         ct_equeue_t *q = l->ctl_equeue;
2421         ct_kevent_t *e;
2422 
2423         mutex_enter(&q->ctq_lock);
2424 
2425         ASSERT((l->ctl_flags & (CTLF_COPYOUT|CTLF_RESET)) == 0);
2426 
2427         if ((e = l->ctl_position) != NULL)
2428                 cte_qrele(q, l, e);
2429         else
2430                 list_remove(&q->ctq_tail, l);
2431         l->ctl_position = NULL;
2432 
2433         q->ctq_nlisteners--;
2434         list_remove(&q->ctq_listeners, l);
2435 
2436         if (l->ctl_flags & CTLF_RELIABLE)
2437                 q->ctq_nreliable--;
2438 
2439         /*
2440          * If we are a the last listener of a dead reference counted
2441          * queue (i.e. a process bundle) we free it.  Otherwise we just
2442          * trim any events which may have been kept around for our
2443          * benefit.
2444          */
2445         if ((q->ctq_flags & CTQ_REFFED) && (q->ctq_flags & CTQ_DEAD) &&
2446             (q->ctq_nlisteners == 0)) {
2447                 cte_queue_destroy(q);
2448         } else {
2449                 cte_trim(q, NULL);
2450                 mutex_exit(&q->ctq_lock);
2451         }
2452 }
2453 
2454 /*
2455  * cte_reset_listener
2456  *
2457  * Moves a listener's queue pointer to the beginning of the queue.
2458  */
2459 void
2460 cte_reset_listener(ct_listener_t *l)
2461 {
2462         ct_equeue_t *q = l->ctl_equeue;
2463 
2464         mutex_enter(&q->ctq_lock);
2465 
2466         /*
2467          * We allow an asynchronous reset because it doesn't make a
2468          * whole lot of sense to make reset block or fail.  We already
2469          * have most of the mechanism needed thanks to queue trimming,
2470          * so implementing it isn't a big deal.
2471          */
2472         if (l->ctl_flags & CTLF_COPYOUT)
2473                 l->ctl_flags |= CTLF_RESET;
2474 
2475         (void) cte_qmove(q, l, list_head(&q->ctq_events));
2476 
2477         /*
2478          * Inform blocked readers.
2479          */
2480         cv_broadcast(&l->ctl_cv);
2481         pollwakeup(&l->ctl_pollhead, POLLIN);
2482         mutex_exit(&q->ctq_lock);
2483 }
2484 
2485 /*
2486  * cte_next_event
2487  *
2488  * Moves the event pointer for the specified listener to the next event
2489  * on the queue.  To avoid races, this movement only occurs if the
2490  * specified event id matches that of the current event.  This is used
2491  * primarily to skip events that have been read but whose extended data
2492  * haven't been copied out.
2493  */
2494 int
2495 cte_next_event(ct_listener_t *l, uint64_t id)
2496 {
2497         ct_equeue_t *q = l->ctl_equeue;
2498         ct_kevent_t *old;
2499 
2500         mutex_enter(&q->ctq_lock);
2501 
2502         if (l->ctl_flags & CTLF_COPYOUT)
2503                 l->ctl_flags |= CTLF_RESET;
2504 
2505         if (((old = l->ctl_position) != NULL) && (old->cte_id == id))
2506                 (void) cte_qmove(q, l, list_next(&q->ctq_events, old));
2507 
2508         mutex_exit(&q->ctq_lock);
2509 
2510         return (0);
2511 }
2512 
2513 /*
2514  * cte_get_event
2515  *
2516  * Reads an event from an event endpoint.  If "nonblock" is clear, we
2517  * block until a suitable event is ready.  If "crit" is set, we only
2518  * read critical events.  Note that while "cr" is the caller's cred,
2519  * "zuniqid" is the unique id of the zone the calling contract
2520  * filesystem was mounted in.
2521  */
2522 int
2523 cte_get_event(ct_listener_t *l, int nonblock, void *uaddr, const cred_t *cr,
2524     uint64_t zuniqid, int crit)
2525 {
2526         ct_equeue_t *q = l->ctl_equeue;
2527         ct_kevent_t *temp;
2528         int result = 0;
2529         int partial = 0;
2530         size_t size, gsize, len;
2531         model_t mdl = get_udatamodel();
2532         STRUCT_DECL(ct_event, ev);
2533         STRUCT_INIT(ev, mdl);
2534 
2535         /*
2536          * cte_qreadable checks for CTLF_COPYOUT as well as ensures
2537          * that there exists, and we are pointing to, an appropriate
2538          * event.  It may temporarily drop ctq_lock, but that doesn't
2539          * really matter to us.
2540          */
2541         mutex_enter(&q->ctq_lock);
2542         while (cte_qreadable(q, l, cr, zuniqid, crit)) {
2543                 if (nonblock) {
2544                         result = EAGAIN;
2545                         goto error;
2546                 }
2547                 if (q->ctq_flags & CTQ_DEAD) {
2548                         result = EIDRM;
2549                         goto error;
2550                 }
2551                 result = cv_wait_sig(&l->ctl_cv, &q->ctq_lock);
2552                 if (result == 0) {
2553                         result = EINTR;
2554                         goto error;
2555                 }
2556         }
2557         temp = l->ctl_position;
2558         cte_hold(temp);
2559         l->ctl_flags |= CTLF_COPYOUT;
2560         mutex_exit(&q->ctq_lock);
2561 
2562         /*
2563          * We now have an event.  Copy in the user event structure to
2564          * see how much space we have to work with.
2565          */
2566         result = copyin(uaddr, STRUCT_BUF(ev), STRUCT_SIZE(ev));
2567         if (result)
2568                 goto copyerr;
2569 
2570         /*
2571          * Determine what data we have and what the user should be
2572          * allowed to see.
2573          */
2574         size = gsize = 0;
2575         if (temp->cte_data) {
2576                 VERIFY(nvlist_size(temp->cte_data, &size,
2577                     NV_ENCODE_NATIVE) == 0);
2578                 ASSERT(size != 0);
2579         }
2580         if (zuniqid == GLOBAL_ZONEUNIQID && temp->cte_gdata) {
2581                 VERIFY(nvlist_size(temp->cte_gdata, &gsize,
2582                     NV_ENCODE_NATIVE) == 0);
2583                 ASSERT(gsize != 0);
2584         }
2585 
2586         /*
2587          * If we have enough space, copy out the extended event data.
2588          */
2589         len = size + gsize;
2590         if (len) {
2591                 if (STRUCT_FGET(ev, ctev_nbytes) >= len) {
2592                         char *buf = kmem_alloc(len, KM_SLEEP);
2593 
2594                         if (size)
2595                                 VERIFY(nvlist_pack(temp->cte_data, &buf, &size,
2596                                     NV_ENCODE_NATIVE, KM_SLEEP) == 0);
2597                         if (gsize) {
2598                                 char *tmp = buf + size;
2599 
2600                                 VERIFY(nvlist_pack(temp->cte_gdata, &tmp,
2601                                     &gsize, NV_ENCODE_NATIVE, KM_SLEEP) == 0);
2602                         }
2603 
2604                         /* This shouldn't have changed */
2605                         ASSERT(size + gsize == len);
2606                         result = copyout(buf, STRUCT_FGETP(ev, ctev_buffer),
2607                             len);
2608                         kmem_free(buf, len);
2609                         if (result)
2610                                 goto copyerr;
2611                 } else {
2612                         partial = 1;
2613                 }
2614         }
2615 
2616         /*
2617          * Copy out the common event data.
2618          */
2619         STRUCT_FSET(ev, ctev_id, temp->cte_contract->ct_id);
2620         STRUCT_FSET(ev, ctev_evid, temp->cte_id);
2621         STRUCT_FSET(ev, ctev_cttype,
2622             temp->cte_contract->ct_type->ct_type_index);
2623         STRUCT_FSET(ev, ctev_flags, temp->cte_flags &
2624             (CTE_ACK|CTE_INFO|CTE_NEG));
2625         STRUCT_FSET(ev, ctev_type, temp->cte_type);
2626         STRUCT_FSET(ev, ctev_nbytes, len);
2627         STRUCT_FSET(ev, ctev_goffset, size);
2628         result = copyout(STRUCT_BUF(ev), uaddr, STRUCT_SIZE(ev));
2629 
2630 copyerr:
2631         /*
2632          * Only move our location in the queue if all copyouts were
2633          * successful, the caller provided enough space for the entire
2634          * event, and our endpoint wasn't reset or otherwise moved by
2635          * another thread.
2636          */
2637         mutex_enter(&q->ctq_lock);
2638         if (result)
2639                 result = EFAULT;
2640         else if (!partial && ((l->ctl_flags & CTLF_RESET) == 0) &&
2641             (l->ctl_position == temp))
2642                 (void) cte_qmove(q, l, list_next(&q->ctq_events, temp));
2643         l->ctl_flags &= ~(CTLF_COPYOUT|CTLF_RESET);
2644         /*
2645          * Signal any readers blocked on our CTLF_COPYOUT.
2646          */
2647         cv_signal(&l->ctl_cv);
2648         cte_rele(temp);
2649 
2650 error:
2651         mutex_exit(&q->ctq_lock);
2652         return (result);
2653 }
2654 
2655 /*
2656  * cte_set_reliable
2657  *
2658  * Requests that events be reliably delivered to an event endpoint.
2659  * Unread informative and acknowledged critical events will not be
2660  * removed from the queue until this listener reads or skips them.
2661  * Because a listener could maliciously request reliable delivery and
2662  * then do nothing, this requires that PRIV_CONTRACT_EVENT be in the
2663  * caller's effective set.
2664  */
2665 int
2666 cte_set_reliable(ct_listener_t *l, const cred_t *cr)
2667 {
2668         ct_equeue_t *q = l->ctl_equeue;
2669         int error;
2670 
2671         if ((error = secpolicy_contract_event(cr)) != 0)
2672                 return (error);
2673 
2674         mutex_enter(&q->ctq_lock);
2675         if ((l->ctl_flags & CTLF_RELIABLE) == 0) {
2676                 l->ctl_flags |= CTLF_RELIABLE;
2677                 q->ctq_nreliable++;
2678                 if (l->ctl_position != NULL)
2679                         l->ctl_position->cte_nodes[q->ctq_listno].
2680                             ctm_nreliable++;
2681         }
2682         mutex_exit(&q->ctq_lock);
2683 
2684         return (0);
2685 }