1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 #include <sys/mutex.h>
  26 #include <sys/debug.h>
  27 #include <sys/types.h>
  28 #include <sys/param.h>
  29 #include <sys/kmem.h>
  30 #include <sys/thread.h>
  31 #include <sys/id_space.h>
  32 #include <sys/avl.h>
  33 #include <sys/list.h>
  34 #include <sys/sysmacros.h>
  35 #include <sys/proc.h>
  36 #include <sys/contract.h>
  37 #include <sys/contract_impl.h>
  38 #include <sys/contract/device.h>
  39 #include <sys/contract/device_impl.h>
  40 #include <sys/cmn_err.h>
  41 #include <sys/nvpair.h>
  42 #include <sys/policy.h>
  43 #include <sys/ddi_impldefs.h>
  44 #include <sys/ddi_implfuncs.h>
  45 #include <sys/systm.h>
  46 #include <sys/stat.h>
  47 #include <sys/sunddi.h>
  48 #include <sys/esunddi.h>
  49 #include <sys/ddi.h>
  50 #include <sys/fs/dv_node.h>
  51 #include <sys/sunndi.h>
  52 #undef ct_lock  /* needed because clnt.h defines ct_lock as a macro */
  53 
  54 /*
  55  * Device Contracts
  56  * -----------------
  57  * This file contains the core code for the device contracts framework.
  58  * A device contract is an agreement or a contract between a process and
  59  * the kernel regarding the state of the device. A device contract may be
  60  * created when a relationship is formed between a device and a process
  61  * i.e. at open(2) time, or it may be created at some point after the device
  62  * has been opened. A device contract once formed may be broken by either party.
  63  * A device contract can be broken by the process by an explicit abandon of the
  64  * contract or by an implicit abandon when the process exits. A device contract
  65  * can be broken by the kernel either asynchronously (without negotiation) or
  66  * synchronously (with negotiation). Exactly which happens depends on the device
  67  * state transition. The following state diagram shows the transitions between
  68  * device states. Only device state transitions currently supported by device
  69  * contracts is shown.
  70  *
  71  *                              <-- A -->
  72  *                       /-----------------> DEGRADED
  73  *                       |                      |
  74  *                       |                      |
  75  *                       |                      | S
  76  *                       |                      | |
  77  *                       |                      | v
  78  *                       v       S -->          v
  79  *                      ONLINE ------------> OFFLINE
  80  *
  81  *
  82  * In the figure above, the arrows indicate the direction of transition. The
  83  * letter S refers to transitions which are inherently synchronous i.e.
  84  * require negotiation and the letter A indicates transitions which are
  85  * asynchronous i.e. are done without contract negotiations. A good example
  86  * of a synchronous transition is the ONLINE -> OFFLINE transition. This
  87  * transition cannot happen as long as there are consumers which have the
  88  * device open. Thus some form of negotiation needs to happen between the
  89  * consumers and the kernel to ensure that consumers either close devices
  90  * or disallow the move to OFFLINE. Certain other transitions such as
  91  * ONLINE --> DEGRADED for example, are inherently asynchronous i.e.
  92  * non-negotiable. A device that suffers a fault that degrades its
  93  * capabilities will become degraded irrespective of what consumers it has,
  94  * so a negotiation in this case is pointless.
  95  *
  96  * The following device states are currently defined for device contracts:
  97  *
  98  *      CT_DEV_EV_ONLINE
  99  *              The device is online and functioning normally
 100  *      CT_DEV_EV_DEGRADED
 101  *              The device is online but is functioning in a degraded capacity
 102  *      CT_DEV_EV_OFFLINE
 103  *              The device is offline and is no longer configured
 104  *
 105  * A typical consumer of device contracts starts out with a contract
 106  * template and adds terms to that template. These include the
 107  * "acceptable set" (A-set) term, which is a bitset of device states which
 108  * are guaranteed by the contract. If the device moves out of a state in
 109  * the A-set, the contract is broken. The breaking of the contract can
 110  * be asynchronous in which case a critical contract event is sent to the
 111  * contract holder but no negotiations take place. If the breaking of the
 112  * contract is synchronous, negotations are opened between the affected
 113  * consumer and the kernel. The kernel does this by sending a critical
 114  * event to the consumer with the CTE_NEG flag set indicating that this
 115  * is a negotiation event. The consumer can accept this change by sending
 116  * a ACK message to the kernel. Alternatively, if it has the necessary
 117  * privileges, it can send a NACK message to the kernel which will block
 118  * the device state change. To NACK a negotiable event, a process must
 119  * have the {PRIV_SYS_DEVICES} privilege asserted in its effective set.
 120  *
 121  * Other terms include the "minor path" term, specified explicitly if the
 122  * contract is not being created at open(2) time or specified implicitly
 123  * if the contract is being created at open time via an activated template.
 124  *
 125  * A contract event is sent on any state change to which the contract
 126  * owner has subscribed via the informative or critical event sets. Only
 127  * critical events are guaranteed to be delivered. Since all device state
 128  * changes are controlled by the kernel and cannot be arbitrarily generated
 129  * by a non-privileged user, the {PRIV_CONTRACT_EVENT} privilege does not
 130  * need to be asserted in a process's effective set to designate an event as
 131  * critical. To ensure privacy, a process must either have the same effective
 132  * userid as the contract holder or have the {PRIV_CONTRACT_OBSERVER} privilege
 133  * asserted in its effective set in order to observe device contract events
 134  * off the device contract type specific endpoint.
 135  *
 136  * Yet another term available with device contracts is the "non-negotiable"
 137  * term. This term is used to pre-specify a NACK to any contract negotiation.
 138  * This term is ignored for asynchronous state changes. For example, a
 139  * provcess may have the A-set {ONLINE|DEGRADED} and make the contract
 140  * non-negotiable. In this case, the device contract framework assumes a
 141  * NACK for any transition to OFFLINE and blocks the offline. If the A-set
 142  * is {ONLINE} and the non-negotiable term is set, transitions to OFFLINE
 143  * are NACKed but transitions to DEGRADE succeed.
 144  *
 145  * The OFFLINE negotiation (if OFFLINE state is not in the A-set for a contract)
 146  * happens just before the I/O framework attempts to offline a device
 147  * (i.e. detach a device and set the offline flag so that it cannot be
 148  * reattached). A device contract holder is expected to either NACK the offline
 149  * (if privileged) or release the device and allow the offline to proceed.
 150  *
 151  * The DEGRADE contract event (if DEGRADE is not in the A-set for a contract)
 152  * is generated just before the I/O framework transitions the device state
 153  * to "degraded" (i.e. DEVI_DEVICE_DEGRADED in I/O framework terminology).
 154  *
 155  * The contract holder is expected to ACK or NACK a negotiation event
 156  * within a certain period of time. If the ACK/NACK is not received
 157  * within the timeout period, the device contract framework will behave
 158  * as if the contract does not exist and will proceed with the event.
 159  *
 160  * Unlike a process contract a device contract does not need to exist
 161  * once it is abandoned, since it does not define a fault boundary. It
 162  * merely represents an agreement between a process and the kernel
 163  * regarding the state of the device. Once the process has abandoned
 164  * the contract (either implicitly via a process exit or explicitly)
 165  * the kernel has no reason to retain the contract. As a result
 166  * device contracts are neither inheritable nor need to exist in an
 167  * orphan state.
 168  *
 169  * A device unlike a process may exist in multiple contracts and has
 170  * a "life" outside a device contract. A device unlike a process
 171  * may exist without an associated contract. Unlike a process contract
 172  * a device contract may be formed after a binding relationship is
 173  * formed between a process and a device.
 174  *
 175  *      IMPLEMENTATION NOTES
 176  *      ====================
 177  * DATA STRUCTURES
 178  * ----------------
 179  *      The heart of the device contracts implementation is the device contract
 180  *      private cont_device_t (or ctd for short) data structure. It encapsulates
 181  *      the generic contract_t data structure and has a number of private
 182  *      fields.
 183  *      These include:
 184  *              cond_minor: The minor device that is the subject of the contract
 185  *              cond_aset:  The bitset of states which are guaranteed by the
 186  *                         contract
 187  *              cond_noneg: If set, indicates that the result of negotiation has
 188  *                          been predefined to be a NACK
 189  *      In addition, there are other device identifiers such the devinfo node,
 190  *      dev_t and spec_type of the minor node. There are also a few fields that
 191  *      are used during negotiation to maintain state. See
 192  *              uts/common/sys/contract/device_impl.h
 193  *      for details.
 194  *      The ctd structure represents the device private part of a contract of
 195  *      type "device"
 196  *
 197  *      Another data structure used by device contracts is ctmpl_device. It is
 198  *      the device contracts private part of the contract template structure. It
 199  *      encapsulates the generic template structure "ct_template_t" and includes
 200  *      the following device contract specific fields
 201  *              ctd_aset:   The bitset of states that should be guaranteed by a
 202  *                          contract
 203  *              ctd_noneg:  If set, indicates that contract should NACK a
 204  *                          negotiation
 205  *              ctd_minor:  The devfs_path (without the /devices prefix) of the
 206  *                          minor node that is the subject of the contract.
 207  *
 208  * ALGORITHMS
 209  * ---------
 210  * There are three sets of routines in this file
 211  *      Template related routines
 212  *      -------------------------
 213  *      These routines provide support for template related operations initated
 214  *      via the generic template operations. These include routines that dup
 215  *      a template, free it, and set various terms in the template
 216  *      (such as the minor node path, the acceptable state set (or A-set)
 217  *      and the non-negotiable term) as well as a routine to query the
 218  *      device specific portion of the template for the abovementioned terms.
 219  *      There is also a routine to create (ctmpl_device_create) that is used to
 220  *      create a contract from a template. This routine calls (after initial
 221  *      setup) the common function used to create a device contract
 222  *      (contract_device_create).
 223  *
 224  *      core device contract implementation
 225  *      ----------------------------------
 226  *      These routines support the generic contract framework to provide
 227  *      functionality that allows contracts to be created, managed and
 228  *      destroyed. The contract_device_create() routine is a routine used
 229  *      to create a contract from a template (either via an explicit create
 230  *      operation on a template or implicitly via an open with an
 231  *      activated template.). The contract_device_free() routine assists
 232  *      in freeing the device contract specific parts. There are routines
 233  *      used to abandon (contract_device_abandon) a device contract as well
 234  *      as a routine to destroy (which despite its name does not destroy,
 235  *      it only moves a contract to a dead state) a contract.
 236  *      There is also a routine to return status information about a
 237  *      contract - the level of detail depends on what is requested by the
 238  *      user. A value of CTD_FIXED only returns fixed length fields such
 239  *      as the A-set, state of device and value of the "noneg" term. If
 240  *      CTD_ALL is specified, the minor node path is returned as well.
 241  *
 242  *      In addition there are interfaces (contract_device_ack/nack) which
 243  *      are used to support negotiation between userland processes and
 244  *      device contracts. These interfaces record the acknowledgement
 245  *      or lack thereof for negotiation events and help determine if the
 246  *      negotiated event should occur.
 247  *
 248  *      "backend routines"
 249  *      -----------------
 250  *      The backend routines form the interface between the I/O framework
 251  *      and the device contract subsystem. These routines, allow the I/O
 252  *      framework to call into the device contract subsystem to notify it of
 253  *      impending changes to a device state as well as to inform of the
 254  *      final disposition of such attempted state changes. Routines in this
 255  *      class include contract_device_offline() that indicates an attempt to
 256  *      offline a device, contract_device_degrade() that indicates that
 257  *      a device is moving to the degraded state and contract_device_negend()
 258  *      that is used by the I/O framework to inform the contracts subsystem of
 259  *      the final disposition of an attempted operation.
 260  *
 261  *      SUMMARY
 262  *      -------
 263  *      A contract starts its life as a template. A process allocates a device
 264  *      contract template and sets various terms:
 265  *              The A-set
 266  *              The device minor node
 267  *              Critical and informative events
 268  *              The noneg i.e. no negotition term
 269  *      Setting of these terms in the template is done via the
 270  *      ctmpl_device_set() entry point in this file. A process can query a
 271  *      template to determine the terms already set in the template - this is
 272  *      facilitated by the ctmpl_device_get() routine.
 273  *
 274  *      Once all the appropriate terms are set, the contract is instantiated via
 275  *      one of two methods
 276  *      - via an explicit create operation - this is facilitated by the
 277  *        ctmpl_device_create() entry point
 278  *      - synchronously with the open(2) system call - this is achieved via the
 279  *        contract_device_open() routine.
 280  *      The core work for both these above functions is done by
 281  *      contract_device_create()
 282  *
 283  *      A contract once created can be queried for its status. Support for
 284  *      status info is provided by both the common contracts framework and by
 285  *      the "device" contract type. If the level of detail requested is
 286  *      CTD_COMMON, only the common contract framework data is used. Higher
 287  *      levels of detail result in calls to contract_device_status() to supply
 288  *      device contract type specific status information.
 289  *
 290  *      A contract once created may be abandoned either explicitly or implictly.
 291  *      In either case, the contract_device_abandon() function is invoked. This
 292  *      function merely calls contract_destroy() which moves the contract to
 293  *      the DEAD state. The device contract portion of destroy processing is
 294  *      provided by contract_device_destroy() which merely disassociates the
 295  *      contract from its device devinfo node. A contract in the DEAD state is
 296  *      not freed. It hanbgs around until all references to the contract are
 297  *      gone. When that happens, the contract is finally deallocated. The
 298  *      device contract specific portion of the free is done by
 299  *      contract_device_free() which finally frees the device contract specific
 300  *      data structure (cont_device_t).
 301  *
 302  *      When a device undergoes a state change, the I/O framework calls the
 303  *      corresponding device contract entry point. For example, when a device
 304  *      is about to go OFFLINE, the routine contract_device_offline() is
 305  *      invoked. Similarly if a device moves to DEGRADED state, the routine
 306  *      contract_device_degrade() function is called. These functions call the
 307  *      core routine contract_device_publish(). This function determines via
 308  *      the function is_sync_neg() whether an event is a synchronous (i.e.
 309  *      negotiable) event or not. In the former case contract_device_publish()
 310  *      publishes a CTE_NEG event and then waits in wait_for_acks() for ACKs
 311  *      and/or NACKs from contract holders. In the latter case, it simply
 312  *      publishes the event and does not wait. In the negotiation case, ACKs or
 313  *      NACKs from userland consumers results in contract_device_ack_nack()
 314  *      being called where the result of the negotiation is recorded in the
 315  *      contract data structure. Once all outstanding contract owners have
 316  *      responded, the device contract code in wait_for_acks() determines the
 317  *      final result of the negotiation. A single NACK overrides all other ACKs
 318  *      If there is no NACK, then a single ACK will result in an overall ACK
 319  *      result. If there are no ACKs or NACKs, then the result CT_NONE is
 320  *      returned back to the I/O framework. Once the event is permitted or
 321  *      blocked, the I/O framework proceeds or aborts the state change. The
 322  *      I/O framework then calls contract_device_negend() with a result code
 323  *      indicating final disposition of the event. This call releases the
 324  *      barrier and other state associated with the previous negotiation,
 325  *      which permits the next event (if any) to come into the device contract
 326  *      framework.
 327  *
 328  *      Finally, a device that has outstanding contracts may be removed from
 329  *      the system which results in its devinfo node being freed. The devinfo
 330  *      free routine in the I/O framework, calls into the device contract
 331  *      function - contract_device_remove_dip(). This routine, disassociates
 332  *      the dip from all contracts associated with the contract being freed,
 333  *      allowing the devinfo node to be freed.
 334  *
 335  * LOCKING
 336  * ---------
 337  *      There are four sets of data that need to be protected by locks
 338  *
 339  *      i) device contract specific portion of the contract template - This data
 340  *      is protected by the template lock ctmpl_lock.
 341  *
 342  *      ii) device contract specific portion of the contract - This data is
 343  *      protected by the contract lock ct_lock
 344  *
 345  *      iii) The linked list of contracts hanging off a devinfo node - This
 346  *      list is protected by the per-devinfo node lock devi_ct_lock
 347  *
 348  *      iv) Finally there is a barrier, controlled by devi_ct_lock, devi_ct_cv
 349  *      and devi_ct_count that controls state changes to a dip
 350  *
 351  *      The template lock is independent in that none of the other locks in this
 352  *      file may be taken while holding the template lock (and vice versa).
 353  *
 354  *      The remaining three locks have the following lock order
 355  *
 356  *      devi_ct_lock  -> ct_count barrier ->  ct_lock
 357  *
 358  */
 359 
 360 static cont_device_t *contract_device_create(ctmpl_device_t *dtmpl, dev_t dev,
 361     int spec_type, proc_t *owner, int *errorp);
 362 
 363 /* barrier routines */
 364 static void ct_barrier_acquire(dev_info_t *dip);
 365 static void ct_barrier_release(dev_info_t *dip);
 366 static int ct_barrier_held(dev_info_t *dip);
 367 static int ct_barrier_empty(dev_info_t *dip);
 368 static void ct_barrier_wait_for_release(dev_info_t *dip);
 369 static int ct_barrier_wait_for_empty(dev_info_t *dip, int secs);
 370 static void ct_barrier_decr(dev_info_t *dip);
 371 static void ct_barrier_incr(dev_info_t *dip);
 372 
 373 ct_type_t *device_type;
 374 
 375 /*
 376  * Macro predicates for determining when events should be sent and how.
 377  */
 378 #define EVSENDP(ctd, flag) \
 379         ((ctd->cond_contract.ct_ev_info | ctd->cond_contract.ct_ev_crit) & flag)
 380 
 381 #define EVINFOP(ctd, flag) \
 382         ((ctd->cond_contract.ct_ev_crit & flag) == 0)
 383 
 384 /*
 385  * State transition table showing which transitions are synchronous and which
 386  * are not.
 387  */
 388 struct ct_dev_negtable {
 389         uint_t  st_old;
 390         uint_t  st_new;
 391         uint_t  st_neg;
 392 } ct_dev_negtable[] = {
 393         {CT_DEV_EV_ONLINE, CT_DEV_EV_OFFLINE,   1},
 394         {CT_DEV_EV_ONLINE, CT_DEV_EV_DEGRADED,  0},
 395         {CT_DEV_EV_DEGRADED, CT_DEV_EV_ONLINE,  0},
 396         {CT_DEV_EV_DEGRADED, CT_DEV_EV_OFFLINE, 1},
 397         {0}
 398 };
 399 
 400 /*
 401  * Device contract template implementation
 402  */
 403 
 404 /*
 405  * ctmpl_device_dup
 406  *
 407  * The device contract template dup entry point.
 408  * This simply copies all the fields (generic as well as device contract
 409  * specific) fields of the original.
 410  */
 411 static struct ct_template *
 412 ctmpl_device_dup(struct ct_template *template)
 413 {
 414         ctmpl_device_t *new;
 415         ctmpl_device_t *old = template->ctmpl_data;
 416         char *buf;
 417         char *minor;
 418 
 419         new = kmem_zalloc(sizeof (ctmpl_device_t), KM_SLEEP);
 420         buf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
 421 
 422         /*
 423          * copy generic fields.
 424          * ctmpl_copy returns with old template lock held
 425          */
 426         ctmpl_copy(&new->ctd_ctmpl, template);
 427 
 428         new->ctd_ctmpl.ctmpl_data = new;
 429         new->ctd_aset = old->ctd_aset;
 430         new->ctd_minor = NULL;
 431         new->ctd_noneg = old->ctd_noneg;
 432 
 433         if (old->ctd_minor) {
 434                 ASSERT(strlen(old->ctd_minor) + 1 <= MAXPATHLEN);
 435                 bcopy(old->ctd_minor, buf, strlen(old->ctd_minor) + 1);
 436         } else {
 437                 kmem_free(buf, MAXPATHLEN);
 438                 buf = NULL;
 439         }
 440 
 441         mutex_exit(&template->ctmpl_lock);
 442         if (buf) {
 443                 minor = i_ddi_strdup(buf, KM_SLEEP);
 444                 kmem_free(buf, MAXPATHLEN);
 445                 buf = NULL;
 446         } else {
 447                 minor = NULL;
 448         }
 449         mutex_enter(&template->ctmpl_lock);
 450 
 451         if (minor) {
 452                 new->ctd_minor = minor;
 453         }
 454 
 455         ASSERT(buf == NULL);
 456         return (&new->ctd_ctmpl);
 457 }
 458 
 459 /*
 460  * ctmpl_device_free
 461  *
 462  * The device contract template free entry point.  Just
 463  * frees the template.
 464  */
 465 static void
 466 ctmpl_device_free(struct ct_template *template)
 467 {
 468         ctmpl_device_t *dtmpl = template->ctmpl_data;
 469 
 470         if (dtmpl->ctd_minor)
 471                 kmem_free(dtmpl->ctd_minor, strlen(dtmpl->ctd_minor) + 1);
 472 
 473         kmem_free(dtmpl, sizeof (ctmpl_device_t));
 474 }
 475 
 476 /*
 477  * SAFE_EV is the set of events which a non-privileged process is
 478  * allowed to make critical. An unprivileged device contract owner has
 479  * no control over when a device changes state, so all device events
 480  * can be in the critical set.
 481  *
 482  * EXCESS tells us if "value", a critical event set, requires
 483  * additional privilege. For device contracts EXCESS currently
 484  * evaluates to 0.
 485  */
 486 #define SAFE_EV         (CT_DEV_ALLEVENT)
 487 #define EXCESS(value)   ((value) & ~SAFE_EV)
 488 
 489 
 490 /*
 491  * ctmpl_device_set
 492  *
 493  * The device contract template set entry point. Sets various terms in the
 494  * template. The non-negotiable  term can only be set if the process has
 495  * the {PRIV_SYS_DEVICES} privilege asserted in its effective set.
 496  */
 497 static int
 498 ctmpl_device_set(struct ct_template *tmpl, ct_kparam_t *kparam,
 499     const cred_t *cr)
 500 {
 501         ctmpl_device_t *dtmpl = tmpl->ctmpl_data;
 502         ct_param_t *param = &kparam->param;
 503         int error;
 504         dev_info_t *dip;
 505         int spec_type;
 506         uint64_t param_value;
 507         char *str_value;
 508 
 509         ASSERT(MUTEX_HELD(&tmpl->ctmpl_lock));
 510 
 511         if (param->ctpm_id == CTDP_MINOR) {
 512                 str_value = (char *)kparam->ctpm_kbuf;
 513                 str_value[param->ctpm_size - 1] = '\0';
 514         } else {
 515                 if (param->ctpm_size < sizeof (uint64_t))
 516                         return (EINVAL);
 517                 param_value = *(uint64_t *)kparam->ctpm_kbuf;
 518         }
 519 
 520         switch (param->ctpm_id) {
 521         case CTDP_ACCEPT:
 522                 if (param_value & ~CT_DEV_ALLEVENT)
 523                         return (EINVAL);
 524                 if (param_value == 0)
 525                         return (EINVAL);
 526                 if (param_value == CT_DEV_ALLEVENT)
 527                         return (EINVAL);
 528 
 529                 dtmpl->ctd_aset = param_value;
 530                 break;
 531         case CTDP_NONEG:
 532                 if (param_value != CTDP_NONEG_SET &&
 533                     param_value != CTDP_NONEG_CLEAR)
 534                         return (EINVAL);
 535 
 536                 /*
 537                  * only privileged processes can designate a contract
 538                  * non-negotiatble.
 539                  */
 540                 if (param_value == CTDP_NONEG_SET &&
 541                     (error = secpolicy_sys_devices(cr)) != 0) {
 542                         return (error);
 543                 }
 544 
 545                 dtmpl->ctd_noneg = param_value;
 546                 break;
 547 
 548         case CTDP_MINOR:
 549                 if (*str_value != '/' ||
 550                     strncmp(str_value, "/devices/",
 551                     strlen("/devices/")) == 0 ||
 552                     strstr(str_value, "../devices/") != NULL ||
 553                     strchr(str_value, ':') == NULL) {
 554                         return (EINVAL);
 555                 }
 556 
 557                 spec_type = 0;
 558                 dip = NULL;
 559                 if (resolve_pathname(str_value, &dip, NULL, &spec_type) != 0) {
 560                         return (ERANGE);
 561                 }
 562                 ddi_release_devi(dip);
 563 
 564                 if (spec_type != S_IFCHR && spec_type != S_IFBLK) {
 565                         return (EINVAL);
 566                 }
 567 
 568                 if (dtmpl->ctd_minor != NULL) {
 569                         kmem_free(dtmpl->ctd_minor,
 570                             strlen(dtmpl->ctd_minor) + 1);
 571                 }
 572                 dtmpl->ctd_minor = i_ddi_strdup(str_value, KM_SLEEP);
 573                 break;
 574         case CTP_EV_CRITICAL:
 575                 /*
 576                  * Currently for device contracts, any event
 577                  * may be added to the critical set. We retain the
 578                  * following code however for future enhancements.
 579                  */
 580                 if (EXCESS(param_value) &&
 581                     (error = secpolicy_contract_event(cr)) != 0)
 582                         return (error);
 583                 tmpl->ctmpl_ev_crit = param_value;
 584                 break;
 585         default:
 586                 return (EINVAL);
 587         }
 588 
 589         return (0);
 590 }
 591 
 592 /*
 593  * ctmpl_device_get
 594  *
 595  * The device contract template get entry point.  Simply fetches and
 596  * returns the value of the requested term.
 597  */
 598 static int
 599 ctmpl_device_get(struct ct_template *template, ct_kparam_t *kparam)
 600 {
 601         ctmpl_device_t *dtmpl = template->ctmpl_data;
 602         ct_param_t *param = &kparam->param;
 603         uint64_t *param_value = kparam->ctpm_kbuf;
 604 
 605         ASSERT(MUTEX_HELD(&template->ctmpl_lock));
 606 
 607         if (param->ctpm_id == CTDP_ACCEPT ||
 608             param->ctpm_id == CTDP_NONEG) {
 609                 if (param->ctpm_size < sizeof (uint64_t))
 610                         return (EINVAL);
 611                 kparam->ret_size = sizeof (uint64_t);
 612         }
 613 
 614         switch (param->ctpm_id) {
 615         case CTDP_ACCEPT:
 616                 *param_value = dtmpl->ctd_aset;
 617                 break;
 618         case CTDP_NONEG:
 619                 *param_value = dtmpl->ctd_noneg;
 620                 break;
 621         case CTDP_MINOR:
 622                 if (dtmpl->ctd_minor) {
 623                         kparam->ret_size = strlcpy((char *)kparam->ctpm_kbuf,
 624                             dtmpl->ctd_minor, param->ctpm_size);
 625                         kparam->ret_size++;
 626                 } else {
 627                         return (ENOENT);
 628                 }
 629                 break;
 630         default:
 631                 return (EINVAL);
 632         }
 633 
 634         return (0);
 635 }
 636 
 637 /*
 638  * Device contract type specific portion of creating a contract using
 639  * a specified template
 640  */
 641 /*ARGSUSED*/
 642 int
 643 ctmpl_device_create(ct_template_t *template, ctid_t *ctidp)
 644 {
 645         ctmpl_device_t *dtmpl;
 646         char *buf;
 647         dev_t dev;
 648         int spec_type;
 649         int error;
 650         cont_device_t *ctd;
 651 
 652         if (ctidp == NULL)
 653                 return (EINVAL);
 654 
 655         buf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
 656 
 657         dtmpl = template->ctmpl_data;
 658 
 659         mutex_enter(&template->ctmpl_lock);
 660         if (dtmpl->ctd_minor == NULL) {
 661                 /* incomplete template */
 662                 mutex_exit(&template->ctmpl_lock);
 663                 kmem_free(buf, MAXPATHLEN);
 664                 return (EINVAL);
 665         } else {
 666                 ASSERT(strlen(dtmpl->ctd_minor) < MAXPATHLEN);
 667                 bcopy(dtmpl->ctd_minor, buf, strlen(dtmpl->ctd_minor) + 1);
 668         }
 669         mutex_exit(&template->ctmpl_lock);
 670 
 671         spec_type = 0;
 672         dev = NODEV;
 673         if (resolve_pathname(buf, NULL, &dev, &spec_type) != 0 ||
 674             dev == NODEV || dev == DDI_DEV_T_ANY || dev == DDI_DEV_T_NONE ||
 675             (spec_type != S_IFCHR && spec_type != S_IFBLK)) {
 676                 CT_DEBUG((CE_WARN,
 677                     "tmpl_create: failed to find device: %s", buf));
 678                 kmem_free(buf, MAXPATHLEN);
 679                 return (ERANGE);
 680         }
 681         kmem_free(buf, MAXPATHLEN);
 682 
 683         ctd = contract_device_create(template->ctmpl_data,
 684             dev, spec_type, curproc, &error);
 685 
 686         if (ctd == NULL) {
 687                 CT_DEBUG((CE_WARN, "Failed to create device contract for "
 688                     "process (%d) with device (devt = %lu, spec_type = %s)",
 689                     curproc->p_pid, dev,
 690                     spec_type == S_IFCHR ? "S_IFCHR" : "S_IFBLK"));
 691                 return (error);
 692         }
 693 
 694         mutex_enter(&ctd->cond_contract.ct_lock);
 695         *ctidp = ctd->cond_contract.ct_id;
 696         mutex_exit(&ctd->cond_contract.ct_lock);
 697 
 698         return (0);
 699 }
 700 
 701 /*
 702  * Device contract specific template entry points
 703  */
 704 static ctmplops_t ctmpl_device_ops = {
 705         ctmpl_device_dup,               /* ctop_dup */
 706         ctmpl_device_free,              /* ctop_free */
 707         ctmpl_device_set,               /* ctop_set */
 708         ctmpl_device_get,               /* ctop_get */
 709         ctmpl_device_create,            /* ctop_create */
 710         CT_DEV_ALLEVENT                 /* all device events bitmask */
 711 };
 712 
 713 
 714 /*
 715  * Device contract implementation
 716  */
 717 
 718 /*
 719  * contract_device_default
 720  *
 721  * The device contract default template entry point.  Creates a
 722  * device contract template with a default A-set and no "noneg" ,
 723  * with informative degrade events and critical offline events.
 724  * There is no default minor path.
 725  */
 726 static ct_template_t *
 727 contract_device_default(void)
 728 {
 729         ctmpl_device_t *new;
 730 
 731         new = kmem_zalloc(sizeof (ctmpl_device_t), KM_SLEEP);
 732         ctmpl_init(&new->ctd_ctmpl, &ctmpl_device_ops, device_type, new);
 733 
 734         new->ctd_aset = CT_DEV_EV_ONLINE | CT_DEV_EV_DEGRADED;
 735         new->ctd_noneg = 0;
 736         new->ctd_ctmpl.ctmpl_ev_info = CT_DEV_EV_DEGRADED;
 737         new->ctd_ctmpl.ctmpl_ev_crit = CT_DEV_EV_OFFLINE;
 738 
 739         return (&new->ctd_ctmpl);
 740 }
 741 
 742 /*
 743  * contract_device_free
 744  *
 745  * Destroys the device contract specific portion of a contract and
 746  * frees the contract.
 747  */
 748 static void
 749 contract_device_free(contract_t *ct)
 750 {
 751         cont_device_t *ctd = ct->ct_data;
 752 
 753         ASSERT(ctd->cond_minor);
 754         ASSERT(strlen(ctd->cond_minor) < MAXPATHLEN);
 755         kmem_free(ctd->cond_minor, strlen(ctd->cond_minor) + 1);
 756 
 757         ASSERT(ctd->cond_devt != DDI_DEV_T_ANY &&
 758             ctd->cond_devt != DDI_DEV_T_NONE && ctd->cond_devt != NODEV);
 759 
 760         ASSERT(ctd->cond_spec == S_IFBLK || ctd->cond_spec == S_IFCHR);
 761 
 762         ASSERT(!(ctd->cond_aset & ~CT_DEV_ALLEVENT));
 763         ASSERT(ctd->cond_noneg == 0 || ctd->cond_noneg == 1);
 764 
 765         ASSERT(!(ctd->cond_currev_type & ~CT_DEV_ALLEVENT));
 766         ASSERT(!(ctd->cond_currev_ack & ~(CT_ACK | CT_NACK)));
 767 
 768         ASSERT((ctd->cond_currev_id > 0) ^ (ctd->cond_currev_type == 0));
 769         ASSERT((ctd->cond_currev_id > 0) || (ctd->cond_currev_ack == 0));
 770 
 771         ASSERT(!list_link_active(&ctd->cond_next));
 772 
 773         kmem_free(ctd, sizeof (cont_device_t));
 774 }
 775 
 776 /*
 777  * contract_device_abandon
 778  *
 779  * The device contract abandon entry point.
 780  */
 781 static void
 782 contract_device_abandon(contract_t *ct)
 783 {
 784         ASSERT(MUTEX_HELD(&ct->ct_lock));
 785 
 786         /*
 787          * device contracts cannot be inherited or orphaned.
 788          * Move the contract to the DEAD_STATE. It will be freed
 789          * once all references to it are gone.
 790          */
 791         contract_destroy(ct);
 792 }
 793 
 794 /*
 795  * contract_device_destroy
 796  *
 797  * The device contract destroy entry point.
 798  * Called from contract_destroy() to do any type specific destroy. Note
 799  * that destroy is a misnomer - this does not free the contract, it only
 800  * moves it to the dead state. A contract is actually freed via
 801  *      contract_rele() -> contract_dtor(), contop_free()
 802  */
 803 static void
 804 contract_device_destroy(contract_t *ct)
 805 {
 806         cont_device_t   *ctd;
 807         dev_info_t      *dip;
 808 
 809         ASSERT(MUTEX_HELD(&ct->ct_lock));
 810 
 811         for (;;) {
 812                 ctd = ct->ct_data;
 813                 dip = ctd->cond_dip;
 814                 if (dip == NULL) {
 815                         /*
 816                          * The dip has been removed, this is a dangling contract
 817                          * Check that dip linkages are NULL
 818                          */
 819                         ASSERT(!list_link_active(&ctd->cond_next));
 820                         CT_DEBUG((CE_NOTE, "contract_device_destroy:"
 821                             " contract has no devinfo node. contract ctid : %d",
 822                             ct->ct_id));
 823                         return;
 824                 }
 825 
 826                 /*
 827                  * The intended lock order is : devi_ct_lock -> ct_count
 828                  * barrier -> ct_lock.
 829                  * However we can't do this here as dropping the ct_lock allows
 830                  * a race condition with i_ddi_free_node()/
 831                  * contract_device_remove_dip() which may free off dip before
 832                  * we can take devi_ct_lock. So use mutex_tryenter to avoid
 833                  * dropping ct_lock until we have acquired devi_ct_lock.
 834                  */
 835                 if (mutex_tryenter(&(DEVI(dip)->devi_ct_lock)) != 0)
 836                         break;
 837                 mutex_exit(&ct->ct_lock);
 838                 delay(drv_usectohz(1000));
 839                 mutex_enter(&ct->ct_lock);
 840         }
 841         mutex_exit(&ct->ct_lock);
 842 
 843         /*
 844          * Waiting for the barrier to be released is strictly speaking not
 845          * necessary. But it simplifies the implementation of
 846          * contract_device_publish() by establishing the invariant that
 847          * device contracts cannot go away during negotiation.
 848          */
 849         ct_barrier_wait_for_release(dip);
 850         mutex_enter(&ct->ct_lock);
 851 
 852         list_remove(&(DEVI(dip)->devi_ct), ctd);
 853         ctd->cond_dip = NULL; /* no longer linked to dip */
 854         contract_rele(ct);      /* remove hold for dip linkage */
 855 
 856         mutex_exit(&ct->ct_lock);
 857         mutex_exit(&(DEVI(dip)->devi_ct_lock));
 858         mutex_enter(&ct->ct_lock);
 859 }
 860 
 861 /*
 862  * contract_device_status
 863  *
 864  * The device contract status entry point. Called when level of "detail"
 865  * is either CTD_FIXED or CTD_ALL
 866  *
 867  */
 868 static void
 869 contract_device_status(contract_t *ct, zone_t *zone, int detail, nvlist_t *nvl,
 870     void *status, model_t model)
 871 {
 872         cont_device_t *ctd = ct->ct_data;
 873 
 874         ASSERT(detail == CTD_FIXED || detail == CTD_ALL);
 875 
 876         mutex_enter(&ct->ct_lock);
 877         contract_status_common(ct, zone, status, model);
 878 
 879         /*
 880          * There's no need to hold the contract lock while accessing static
 881          * data like aset or noneg. But since we need the lock to access other
 882          * data like state, we hold it anyway.
 883          */
 884         VERIFY(nvlist_add_uint32(nvl, CTDS_STATE, ctd->cond_state) == 0);
 885         VERIFY(nvlist_add_uint32(nvl, CTDS_ASET, ctd->cond_aset) == 0);
 886         VERIFY(nvlist_add_uint32(nvl, CTDS_NONEG, ctd->cond_noneg) == 0);
 887 
 888         if (detail == CTD_FIXED) {
 889                 mutex_exit(&ct->ct_lock);
 890                 return;
 891         }
 892 
 893         ASSERT(ctd->cond_minor);
 894         VERIFY(nvlist_add_string(nvl, CTDS_MINOR, ctd->cond_minor) == 0);
 895 
 896         mutex_exit(&ct->ct_lock);
 897 }
 898 
 899 /*
 900  * Converts a result integer into the corresponding string. Used for printing
 901  * messages
 902  */
 903 static char *
 904 result_str(uint_t result)
 905 {
 906         switch (result) {
 907         case CT_ACK:
 908                 return ("CT_ACK");
 909         case CT_NACK:
 910                 return ("CT_NACK");
 911         case CT_NONE:
 912                 return ("CT_NONE");
 913         default:
 914                 return ("UNKNOWN");
 915         }
 916 }
 917 
 918 /*
 919  * Converts a device state integer constant into the corresponding string.
 920  * Used to print messages.
 921  */
 922 static char *
 923 state_str(uint_t state)
 924 {
 925         switch (state) {
 926         case CT_DEV_EV_ONLINE:
 927                 return ("ONLINE");
 928         case CT_DEV_EV_DEGRADED:
 929                 return ("DEGRADED");
 930         case CT_DEV_EV_OFFLINE:
 931                 return ("OFFLINE");
 932         default:
 933                 return ("UNKNOWN");
 934         }
 935 }
 936 
 937 /*
 938  * Routine that determines if a particular CT_DEV_EV_? event corresponds to a
 939  * synchronous state change or not.
 940  */
 941 static int
 942 is_sync_neg(uint_t old, uint_t new)
 943 {
 944         int     i;
 945 
 946         ASSERT(old & CT_DEV_ALLEVENT);
 947         ASSERT(new & CT_DEV_ALLEVENT);
 948 
 949         if (old == new) {
 950                 CT_DEBUG((CE_WARN, "is_sync_neg: transition to same state: %s",
 951                     state_str(new)));
 952                 return (-2);
 953         }
 954 
 955         for (i = 0; ct_dev_negtable[i].st_new != 0; i++) {
 956                 if (old == ct_dev_negtable[i].st_old &&
 957                     new == ct_dev_negtable[i].st_new) {
 958                         return (ct_dev_negtable[i].st_neg);
 959                 }
 960         }
 961 
 962         CT_DEBUG((CE_WARN, "is_sync_neg: Unsupported state transition: "
 963             "old = %s -> new = %s", state_str(old), state_str(new)));
 964 
 965         return (-1);
 966 }
 967 
 968 /*
 969  * Used to cleanup cached dv_nodes so that when a device is released by
 970  * a contract holder, its devinfo node can be successfully detached.
 971  */
 972 static int
 973 contract_device_dvclean(dev_info_t *dip)
 974 {
 975         char            *devnm;
 976         dev_info_t      *pdip;
 977         int             error;
 978 
 979         ASSERT(dip);
 980 
 981         /* pdip can be NULL if we have contracts against the root dip */
 982         pdip = ddi_get_parent(dip);
 983 
 984         if (pdip && DEVI_BUSY_OWNED(pdip) || !pdip && DEVI_BUSY_OWNED(dip)) {
 985                 char            *path;
 986 
 987                 path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
 988                 (void) ddi_pathname(dip, path);
 989                 CT_DEBUG((CE_WARN, "ct_dv_clean: Parent node is busy owned, "
 990                     "device=%s", path));
 991                 kmem_free(path, MAXPATHLEN);
 992                 return (EDEADLOCK);
 993         }
 994 
 995         if (pdip) {
 996                 devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
 997                 (void) ddi_deviname(dip, devnm);
 998                 error = devfs_clean(pdip, devnm + 1, DV_CLEAN_FORCE);
 999                 kmem_free(devnm, MAXNAMELEN + 1);
1000         } else {
1001                 error = devfs_clean(dip, NULL, DV_CLEAN_FORCE);
1002         }
1003 
1004         return (error);
1005 }
1006 
1007 /*
1008  * Endpoint of a ct_ctl_ack() or ct_ctl_nack() call from userland.
1009  * Results in the ACK or NACK being recorded on the dip for one particular
1010  * contract. The device contracts framework evaluates the ACK/NACKs for all
1011  * contracts against a device to determine if a particular device state change
1012  * should be allowed.
1013  */
1014 static int
1015 contract_device_ack_nack(contract_t *ct, uint_t evtype, uint64_t evid,
1016     uint_t cmd)
1017 {
1018         cont_device_t *ctd = ct->ct_data;
1019         dev_info_t *dip;
1020         ctid_t  ctid;
1021         int error;
1022 
1023         ctid = ct->ct_id;
1024 
1025         CT_DEBUG((CE_NOTE, "ack_nack: entered: ctid %d", ctid));
1026 
1027         mutex_enter(&ct->ct_lock);
1028         CT_DEBUG((CE_NOTE, "ack_nack: contract lock acquired: %d", ctid));
1029 
1030         dip = ctd->cond_dip;
1031 
1032         ASSERT(ctd->cond_minor);
1033         ASSERT(strlen(ctd->cond_minor) < MAXPATHLEN);
1034 
1035         /*
1036          * Negotiation only if new state is not in A-set
1037          */
1038         ASSERT(!(ctd->cond_aset & evtype));
1039 
1040         /*
1041          * Negotiation only if transition is synchronous
1042          */
1043         ASSERT(is_sync_neg(ctd->cond_state, evtype));
1044 
1045         /*
1046          * We shouldn't be negotiating if the "noneg" flag is set
1047          */
1048         ASSERT(!ctd->cond_noneg);
1049 
1050         if (dip)
1051                 ndi_hold_devi(dip);
1052 
1053         mutex_exit(&ct->ct_lock);
1054 
1055         /*
1056          * dv_clean only if !NACK and offline state change
1057          */
1058         if (cmd != CT_NACK && evtype == CT_DEV_EV_OFFLINE && dip) {
1059                 CT_DEBUG((CE_NOTE, "ack_nack: dv_clean: %d", ctid));
1060                 error = contract_device_dvclean(dip);
1061                 if (error != 0) {
1062                         CT_DEBUG((CE_NOTE, "ack_nack: dv_clean: failed: %d",
1063                             ctid));
1064                         ddi_release_devi(dip);
1065                 }
1066         }
1067 
1068         mutex_enter(&ct->ct_lock);
1069 
1070         if (dip)
1071                 ddi_release_devi(dip);
1072 
1073         if (dip == NULL) {
1074                 if (ctd->cond_currev_id != evid) {
1075                         CT_DEBUG((CE_WARN, "%sACK for non-current event "
1076                             "(type=%s, id=%llu) on removed device",
1077                             cmd == CT_NACK ? "N" : "",
1078                             state_str(evtype), (unsigned long long)evid));
1079                         CT_DEBUG((CE_NOTE, "ack_nack: error: ESRCH, ctid: %d",
1080                             ctid));
1081                 } else {
1082                         ASSERT(ctd->cond_currev_type == evtype);
1083                         CT_DEBUG((CE_WARN, "contract_ack: no such device: "
1084                             "ctid: %d", ctid));
1085                 }
1086                 error = (ct->ct_state == CTS_DEAD) ? ESRCH :
1087                     ((cmd == CT_NACK) ? ETIMEDOUT : 0);
1088                 mutex_exit(&ct->ct_lock);
1089                 return (error);
1090         }
1091 
1092         /*
1093          * Must follow lock order: devi_ct_lock -> ct_count barrier - >ct_lock
1094          */
1095         mutex_exit(&ct->ct_lock);
1096 
1097         mutex_enter(&DEVI(dip)->devi_ct_lock);
1098         mutex_enter(&ct->ct_lock);
1099         if (ctd->cond_currev_id != evid) {
1100                 char *buf;
1101                 mutex_exit(&ct->ct_lock);
1102                 mutex_exit(&DEVI(dip)->devi_ct_lock);
1103                 ndi_hold_devi(dip);
1104                 buf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1105                 (void) ddi_pathname(dip, buf);
1106                 ddi_release_devi(dip);
1107                 CT_DEBUG((CE_WARN, "%sACK for non-current event"
1108                     "(type=%s, id=%llu) on device %s",
1109                     cmd == CT_NACK ? "N" : "",
1110                     state_str(evtype), (unsigned long long)evid, buf));
1111                 kmem_free(buf, MAXPATHLEN);
1112                 CT_DEBUG((CE_NOTE, "ack_nack: error: %d, ctid: %d",
1113                     cmd == CT_NACK ? ETIMEDOUT : 0, ctid));
1114                 return (cmd == CT_ACK ? 0 : ETIMEDOUT);
1115         }
1116 
1117         ASSERT(ctd->cond_currev_type == evtype);
1118         ASSERT(cmd == CT_ACK || cmd == CT_NACK);
1119 
1120         CT_DEBUG((CE_NOTE, "ack_nack: setting %sACK for ctid: %d",
1121             cmd == CT_NACK ? "N" : "", ctid));
1122 
1123         ctd->cond_currev_ack = cmd;
1124         mutex_exit(&ct->ct_lock);
1125 
1126         ct_barrier_decr(dip);
1127         mutex_exit(&DEVI(dip)->devi_ct_lock);
1128 
1129         CT_DEBUG((CE_NOTE, "ack_nack: normal exit: ctid: %d", ctid));
1130 
1131         return (0);
1132 }
1133 
1134 /*
1135  * Invoked when a userland contract holder approves (i.e. ACKs) a state change
1136  */
1137 static int
1138 contract_device_ack(contract_t *ct, uint_t evtype, uint64_t evid)
1139 {
1140         return (contract_device_ack_nack(ct, evtype, evid, CT_ACK));
1141 }
1142 
1143 /*
1144  * Invoked when a userland contract holder blocks (i.e. NACKs) a state change
1145  */
1146 static int
1147 contract_device_nack(contract_t *ct, uint_t evtype, uint64_t evid)
1148 {
1149         return (contract_device_ack_nack(ct, evtype, evid, CT_NACK));
1150 }
1151 
1152 /*
1153  * Creates a new contract synchronously with the breaking of an existing
1154  * contract. Currently not supported.
1155  */
1156 /*ARGSUSED*/
1157 static int
1158 contract_device_newct(contract_t *ct)
1159 {
1160         return (ENOTSUP);
1161 }
1162 
1163 /*
1164  * Core device contract implementation entry points
1165  */
1166 static contops_t contract_device_ops = {
1167         contract_device_free,           /* contop_free */
1168         contract_device_abandon,        /* contop_abandon */
1169         contract_device_destroy,        /* contop_destroy */
1170         contract_device_status,         /* contop_status */
1171         contract_device_ack,            /* contop_ack */
1172         contract_device_nack,           /* contop_nack */
1173         contract_qack_notsup,           /* contop_qack */
1174         contract_device_newct           /* contop_newct */
1175 };
1176 
1177 /*
1178  * contract_device_init
1179  *
1180  * Initializes the device contract type.
1181  */
1182 void
1183 contract_device_init(void)
1184 {
1185         device_type = contract_type_init(CTT_DEVICE, "device",
1186             &contract_device_ops, contract_device_default);
1187 }
1188 
1189 /*
1190  * contract_device_create
1191  *
1192  * create a device contract given template "tmpl" and the "owner" process.
1193  * May fail and return NULL if project.max-contracts would have been exceeded.
1194  *
1195  * Common device contract creation routine called for both open-time and
1196  * non-open time device contract creation
1197  */
1198 static cont_device_t *
1199 contract_device_create(ctmpl_device_t *dtmpl, dev_t dev, int spec_type,
1200     proc_t *owner, int *errorp)
1201 {
1202         cont_device_t *ctd;
1203         char *minor;
1204         char *path;
1205         dev_info_t *dip;
1206 
1207         ASSERT(dtmpl != NULL);
1208         ASSERT(dev != NODEV && dev != DDI_DEV_T_ANY && dev != DDI_DEV_T_NONE);
1209         ASSERT(spec_type == S_IFCHR || spec_type == S_IFBLK);
1210         ASSERT(errorp);
1211 
1212         *errorp = 0;
1213 
1214         path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1215 
1216         mutex_enter(&dtmpl->ctd_ctmpl.ctmpl_lock);
1217         ASSERT(strlen(dtmpl->ctd_minor) < MAXPATHLEN);
1218         bcopy(dtmpl->ctd_minor, path, strlen(dtmpl->ctd_minor) + 1);
1219         mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock);
1220 
1221         dip = e_ddi_hold_devi_by_path(path, 0);
1222         if (dip == NULL) {
1223                 cmn_err(CE_WARN, "contract_create: Cannot find devinfo node "
1224                     "for device path (%s)", path);
1225                 kmem_free(path, MAXPATHLEN);
1226                 *errorp = ERANGE;
1227                 return (NULL);
1228         }
1229 
1230         /*
1231          * Lock out any parallel contract negotiations
1232          */
1233         mutex_enter(&(DEVI(dip)->devi_ct_lock));
1234         ct_barrier_acquire(dip);
1235         mutex_exit(&(DEVI(dip)->devi_ct_lock));
1236 
1237         minor = i_ddi_strdup(path, KM_SLEEP);
1238         kmem_free(path, MAXPATHLEN);
1239 
1240         (void) contract_type_pbundle(device_type, owner);
1241 
1242         ctd = kmem_zalloc(sizeof (cont_device_t), KM_SLEEP);
1243 
1244         /*
1245          * Only we hold a refernce to this contract. Safe to access
1246          * the fields without a ct_lock
1247          */
1248         ctd->cond_minor = minor;
1249         /*
1250          * It is safe to set the dip pointer in the contract
1251          * as the contract will always be destroyed before the dip
1252          * is released
1253          */
1254         ctd->cond_dip = dip;
1255         ctd->cond_devt = dev;
1256         ctd->cond_spec = spec_type;
1257 
1258         /*
1259          * Since we are able to lookup the device, it is either
1260          * online or degraded
1261          */
1262         ctd->cond_state = DEVI_IS_DEVICE_DEGRADED(dip) ?
1263             CT_DEV_EV_DEGRADED : CT_DEV_EV_ONLINE;
1264 
1265         mutex_enter(&dtmpl->ctd_ctmpl.ctmpl_lock);
1266         ctd->cond_aset = dtmpl->ctd_aset;
1267         ctd->cond_noneg = dtmpl->ctd_noneg;
1268 
1269         /*
1270          * contract_ctor() initailizes the common portion of a contract
1271          * contract_dtor() destroys the common portion of a contract
1272          */
1273         if (contract_ctor(&ctd->cond_contract, device_type, &dtmpl->ctd_ctmpl,
1274             ctd, 0, owner, B_TRUE)) {
1275                 mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock);
1276                 /*
1277                  * contract_device_free() destroys the type specific
1278                  * portion of a contract and frees the contract.
1279                  * The "minor" path and "cred" is a part of the type specific
1280                  * portion of the contract and will be freed by
1281                  * contract_device_free()
1282                  */
1283                 contract_device_free(&ctd->cond_contract);
1284 
1285                 /* release barrier */
1286                 mutex_enter(&(DEVI(dip)->devi_ct_lock));
1287                 ct_barrier_release(dip);
1288                 mutex_exit(&(DEVI(dip)->devi_ct_lock));
1289 
1290                 ddi_release_devi(dip);
1291                 *errorp = EAGAIN;
1292                 return (NULL);
1293         }
1294         mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock);
1295 
1296         mutex_enter(&ctd->cond_contract.ct_lock);
1297         ctd->cond_contract.ct_ntime.ctm_total = CT_DEV_ACKTIME;
1298         ctd->cond_contract.ct_qtime.ctm_total = CT_DEV_ACKTIME;
1299         ctd->cond_contract.ct_ntime.ctm_start = -1;
1300         ctd->cond_contract.ct_qtime.ctm_start = -1;
1301         mutex_exit(&ctd->cond_contract.ct_lock);
1302 
1303         /*
1304          * Insert device contract into list hanging off the dip
1305          * Bump up the ref-count on the contract to reflect this
1306          */
1307         contract_hold(&ctd->cond_contract);
1308         mutex_enter(&(DEVI(dip)->devi_ct_lock));
1309         list_insert_tail(&(DEVI(dip)->devi_ct), ctd);
1310 
1311         /* release barrier */
1312         ct_barrier_release(dip);
1313         mutex_exit(&(DEVI(dip)->devi_ct_lock));
1314 
1315         ddi_release_devi(dip);
1316 
1317         return (ctd);
1318 }
1319 
1320 /*
1321  * Called when a device is successfully opened to create an open-time contract
1322  * i.e. synchronously with a device open.
1323  */
1324 int
1325 contract_device_open(dev_t dev, int spec_type, contract_t **ctpp)
1326 {
1327         ctmpl_device_t *dtmpl;
1328         ct_template_t  *tmpl;
1329         cont_device_t *ctd;
1330         char *path;
1331         klwp_t *lwp;
1332         int error;
1333 
1334         if (ctpp)
1335                 *ctpp = NULL;
1336 
1337         /*
1338          * Check if we are in user-context i.e. if we have an lwp
1339          */
1340         lwp = ttolwp(curthread);
1341         if (lwp == NULL) {
1342                 CT_DEBUG((CE_NOTE, "contract_open: Not user-context"));
1343                 return (0);
1344         }
1345 
1346         tmpl = ctmpl_dup(lwp->lwp_ct_active[device_type->ct_type_index]);
1347         if (tmpl == NULL) {
1348                 return (0);
1349         }
1350         dtmpl = tmpl->ctmpl_data;
1351 
1352         /*
1353          * If the user set a minor path in the template before an open,
1354          * ignore it. We use the minor path of the actual minor opened.
1355          */
1356         mutex_enter(&tmpl->ctmpl_lock);
1357         if (dtmpl->ctd_minor != NULL) {
1358                 CT_DEBUG((CE_NOTE, "contract_device_open(): Process %d: "
1359                     "ignoring device minor path in active template: %s",
1360                     curproc->p_pid, dtmpl->ctd_minor));
1361                 /*
1362                  * This is a copy of the actual activated template.
1363                  * Safe to make changes such as freeing the minor
1364                  * path in the template.
1365                  */
1366                 kmem_free(dtmpl->ctd_minor, strlen(dtmpl->ctd_minor) + 1);
1367                 dtmpl->ctd_minor = NULL;
1368         }
1369         mutex_exit(&tmpl->ctmpl_lock);
1370 
1371         path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1372 
1373         if (ddi_dev_pathname(dev, spec_type, path) != DDI_SUCCESS) {
1374                 CT_DEBUG((CE_NOTE, "contract_device_open(): Failed to derive "
1375                     "minor path from dev_t,spec {%lu, %d} for process (%d)",
1376                     dev, spec_type, curproc->p_pid));
1377                 ctmpl_free(tmpl);
1378                 kmem_free(path, MAXPATHLEN);
1379                 return (1);
1380         }
1381 
1382         mutex_enter(&tmpl->ctmpl_lock);
1383         ASSERT(dtmpl->ctd_minor == NULL);
1384         dtmpl->ctd_minor = path;
1385         mutex_exit(&tmpl->ctmpl_lock);
1386 
1387         ctd = contract_device_create(dtmpl, dev, spec_type, curproc, &error);
1388 
1389         mutex_enter(&tmpl->ctmpl_lock);
1390         ASSERT(dtmpl->ctd_minor);
1391         dtmpl->ctd_minor = NULL;
1392         mutex_exit(&tmpl->ctmpl_lock);
1393         ctmpl_free(tmpl);
1394         kmem_free(path, MAXPATHLEN);
1395 
1396         if (ctd == NULL) {
1397                 cmn_err(CE_NOTE, "contract_device_open(): Failed to "
1398                     "create device contract for process (%d) holding "
1399                     "device (devt = %lu, spec_type = %d)",
1400                     curproc->p_pid, dev, spec_type);
1401                 return (1);
1402         }
1403 
1404         if (ctpp) {
1405                 mutex_enter(&ctd->cond_contract.ct_lock);
1406                 *ctpp = &ctd->cond_contract;
1407                 mutex_exit(&ctd->cond_contract.ct_lock);
1408         }
1409         return (0);
1410 }
1411 
1412 /*
1413  * Called during contract negotiation by the device contract framework to wait
1414  * for ACKs or NACKs from contract holders. If all responses are not received
1415  * before a specified timeout, this routine times out.
1416  */
1417 static uint_t
1418 wait_for_acks(dev_info_t *dip, dev_t dev, int spec_type, uint_t evtype)
1419 {
1420         cont_device_t *ctd;
1421         int timed_out = 0;
1422         int result = CT_NONE;
1423         int ack;
1424         char *f = "wait_for_acks";
1425 
1426         ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
1427         ASSERT(dip);
1428         ASSERT(evtype & CT_DEV_ALLEVENT);
1429         ASSERT(dev != NODEV && dev != DDI_DEV_T_NONE);
1430         ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) ||
1431             (spec_type == S_IFBLK || spec_type == S_IFCHR));
1432 
1433         CT_DEBUG((CE_NOTE, "%s: entered: dip: %p", f, (void *)dip));
1434 
1435         if (ct_barrier_wait_for_empty(dip, CT_DEV_ACKTIME) == -1) {
1436                 /*
1437                  * some contract owner(s) didn't respond in time
1438                  */
1439                 CT_DEBUG((CE_NOTE, "%s: timed out: %p", f, (void *)dip));
1440                 timed_out = 1;
1441         }
1442 
1443         ack = 0;
1444         for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL;
1445             ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) {
1446 
1447                 mutex_enter(&ctd->cond_contract.ct_lock);
1448 
1449                 ASSERT(ctd->cond_dip == dip);
1450 
1451                 if (dev != DDI_DEV_T_ANY && dev != ctd->cond_devt) {
1452                         mutex_exit(&ctd->cond_contract.ct_lock);
1453                         continue;
1454                 }
1455                 if (dev != DDI_DEV_T_ANY && spec_type != ctd->cond_spec) {
1456                         mutex_exit(&ctd->cond_contract.ct_lock);
1457                         continue;
1458                 }
1459 
1460                 /* skip if non-negotiable contract */
1461                 if (ctd->cond_noneg) {
1462                         mutex_exit(&ctd->cond_contract.ct_lock);
1463                         continue;
1464                 }
1465 
1466                 ASSERT(ctd->cond_currev_type == evtype);
1467                 if (ctd->cond_currev_ack == CT_NACK) {
1468                         CT_DEBUG((CE_NOTE, "%s: found a NACK,result = NACK: %p",
1469                             f, (void *)dip));
1470                         mutex_exit(&ctd->cond_contract.ct_lock);
1471                         return (CT_NACK);
1472                 } else if (ctd->cond_currev_ack == CT_ACK) {
1473                         ack = 1;
1474                         CT_DEBUG((CE_NOTE, "%s: found a ACK: %p",
1475                             f, (void *)dip));
1476                 }
1477                 mutex_exit(&ctd->cond_contract.ct_lock);
1478         }
1479 
1480         if (ack) {
1481                 result = CT_ACK;
1482                 CT_DEBUG((CE_NOTE, "%s: result = ACK, dip=%p", f, (void *)dip));
1483         } else if (timed_out) {
1484                 result = CT_NONE;
1485                 CT_DEBUG((CE_NOTE, "%s: result = NONE (timed-out), dip=%p",
1486                     f, (void *)dip));
1487         } else {
1488                 CT_DEBUG((CE_NOTE, "%s: result = NONE, dip=%p",
1489                     f, (void *)dip));
1490         }
1491 
1492 
1493         return (result);
1494 }
1495 
1496 /*
1497  * Determines the current state of a device (i.e a devinfo node
1498  */
1499 static int
1500 get_state(dev_info_t *dip)
1501 {
1502         if (DEVI_IS_DEVICE_OFFLINE(dip) || DEVI_IS_DEVICE_DOWN(dip))
1503                 return (CT_DEV_EV_OFFLINE);
1504         else if (DEVI_IS_DEVICE_DEGRADED(dip))
1505                 return (CT_DEV_EV_DEGRADED);
1506         else
1507                 return (CT_DEV_EV_ONLINE);
1508 }
1509 
1510 /*
1511  * Sets the current state of a device in a device contract
1512  */
1513 static void
1514 set_cond_state(dev_info_t *dip)
1515 {
1516         uint_t state = get_state(dip);
1517         cont_device_t *ctd;
1518 
1519         /* verify that barrier is held */
1520         ASSERT(ct_barrier_held(dip));
1521 
1522         for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL;
1523             ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) {
1524                 mutex_enter(&ctd->cond_contract.ct_lock);
1525                 ASSERT(ctd->cond_dip == dip);
1526                 ctd->cond_state = state;
1527                 mutex_exit(&ctd->cond_contract.ct_lock);
1528         }
1529 }
1530 
1531 /*
1532  * Core routine called by event-specific routines when an event occurs.
1533  * Determines if an event should be be published, and if it is to be
1534  * published, whether a negotiation should take place. Also implements
1535  * NEGEND events which publish the final disposition of an event after
1536  * negotiations are complete.
1537  *
1538  * When an event occurs on a minor node, this routine walks the list of
1539  * contracts hanging off a devinfo node and for each contract on the affected
1540  * dip, evaluates the following cases
1541  *
1542  *      a. an event that is synchronous, breaks the contract and NONEG not set
1543  *              - bumps up the outstanding negotiation counts on the dip
1544  *              - marks the dip as undergoing negotiation (devi_ct_neg)
1545  *              - event of type CTE_NEG is published
1546  *      b. an event that is synchronous, breaks the contract and NONEG is set
1547  *              - sets the final result to CT_NACK, event is blocked
1548  *              - does not publish an event
1549  *      c. event is asynchronous and breaks the contract
1550  *              - publishes a critical event irrespect of whether the NONEG
1551  *                flag is set, since the contract will be broken and contract
1552  *                owner needs to be informed.
1553  *      d. No contract breakage but the owner has subscribed to the event
1554  *              - publishes the event irrespective of the NONEG event as the
1555  *                owner has explicitly subscribed to the event.
1556  *      e. NEGEND event
1557  *              - publishes a critical event. Should only be doing this if
1558  *                if NONEG is not set.
1559  *      f. all other events
1560  *              - Since a contract is not broken and this event has not been
1561  *                subscribed to, this event does not need to be published for
1562  *                for this contract.
1563  *
1564  *      Once an event is published, what happens next depends on the type of
1565  *      event:
1566  *
1567  *      a. NEGEND event
1568  *              - cleanup all state associated with the preceding negotiation
1569  *                and return CT_ACK to the caller of contract_device_publish()
1570  *      b. NACKed event
1571  *              - One or more contracts had the NONEG term, so the event was
1572  *                blocked. Return CT_NACK to the caller.
1573  *      c. Negotiated event
1574  *              - Call wait_for_acks() to wait for responses from contract
1575  *              holders. The end result is either CT_ACK (event is permitted),
1576  *              CT_NACK (event is blocked) or CT_NONE (no contract owner)
1577  *              responded. This result is returned back to the caller.
1578  *      d. All other events
1579  *              - If the event was asynchronous (i.e. not negotiated) or
1580  *              a contract was not broken return CT_ACK to the caller.
1581  */
1582 static uint_t
1583 contract_device_publish(dev_info_t *dip, dev_t dev, int spec_type,
1584     uint_t evtype, nvlist_t *tnvl)
1585 {
1586         cont_device_t *ctd;
1587         uint_t result = CT_NONE;
1588         uint64_t evid = 0;
1589         uint64_t nevid = 0;
1590         char *path = NULL;
1591         int negend;
1592         int match;
1593         int sync = 0;
1594         contract_t *ct;
1595         ct_kevent_t *event;
1596         nvlist_t *nvl;
1597         int broken = 0;
1598 
1599         ASSERT(dip);
1600         ASSERT(dev != NODEV && dev != DDI_DEV_T_NONE);
1601         ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) ||
1602             (spec_type == S_IFBLK || spec_type == S_IFCHR));
1603         ASSERT(evtype == 0 || (evtype & CT_DEV_ALLEVENT));
1604 
1605         /* Is this a synchronous state change ? */
1606         if (evtype != CT_EV_NEGEND) {
1607                 sync = is_sync_neg(get_state(dip), evtype);
1608                 /* NOP if unsupported transition */
1609                 if (sync == -2 || sync == -1) {
1610                         DEVI(dip)->devi_flags |= DEVI_CT_NOP;
1611                         result = (sync == -2) ? CT_ACK : CT_NONE;
1612                         goto out;
1613                 }
1614                 CT_DEBUG((CE_NOTE, "publish: is%s sync state change",
1615                     sync ? "" : " not"));
1616         } else if (DEVI(dip)->devi_flags & DEVI_CT_NOP) {
1617                 DEVI(dip)->devi_flags &= ~DEVI_CT_NOP;
1618                 result = CT_ACK;
1619                 goto out;
1620         }
1621 
1622         path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1623         (void) ddi_pathname(dip, path);
1624 
1625         mutex_enter(&(DEVI(dip)->devi_ct_lock));
1626 
1627         /*
1628          * Negotiation end - set the state of the device in the contract
1629          */
1630         if (evtype == CT_EV_NEGEND) {
1631                 CT_DEBUG((CE_NOTE, "publish: negend: setting cond state"));
1632                 set_cond_state(dip);
1633         }
1634 
1635         /*
1636          * If this device didn't go through negotiation, don't publish
1637          * a NEGEND event - simply release the barrier to allow other
1638          * device events in.
1639          */
1640         negend = 0;
1641         if (evtype == CT_EV_NEGEND && !DEVI(dip)->devi_ct_neg) {
1642                 CT_DEBUG((CE_NOTE, "publish: no negend reqd. release barrier"));
1643                 ct_barrier_release(dip);
1644                 mutex_exit(&(DEVI(dip)->devi_ct_lock));
1645                 result = CT_ACK;
1646                 goto out;
1647         } else if (evtype == CT_EV_NEGEND) {
1648                 /*
1649                  * There are negotiated contract breakages that
1650                  * need a NEGEND event
1651                  */
1652                 ASSERT(ct_barrier_held(dip));
1653                 negend = 1;
1654                 CT_DEBUG((CE_NOTE, "publish: setting negend flag"));
1655         } else {
1656                 /*
1657                  * This is a new event, not a NEGEND event. Wait for previous
1658                  * contract events to complete.
1659                  */
1660                 ct_barrier_acquire(dip);
1661         }
1662 
1663 
1664         match = 0;
1665         for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL;
1666             ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) {
1667 
1668                 ctid_t ctid;
1669                 size_t len = strlen(path);
1670 
1671                 mutex_enter(&ctd->cond_contract.ct_lock);
1672 
1673                 ASSERT(ctd->cond_dip == dip);
1674                 ASSERT(ctd->cond_minor);
1675                 ASSERT(strncmp(ctd->cond_minor, path, len) == 0 &&
1676                     ctd->cond_minor[len] == ':');
1677 
1678                 if (dev != DDI_DEV_T_ANY && dev != ctd->cond_devt) {
1679                         mutex_exit(&ctd->cond_contract.ct_lock);
1680                         continue;
1681                 }
1682                 if (dev != DDI_DEV_T_ANY && spec_type != ctd->cond_spec) {
1683                         mutex_exit(&ctd->cond_contract.ct_lock);
1684                         continue;
1685                 }
1686 
1687                 /* We have a matching contract */
1688                 match = 1;
1689                 ctid = ctd->cond_contract.ct_id;
1690                 CT_DEBUG((CE_NOTE, "publish: found matching contract: %d",
1691                     ctid));
1692 
1693                 /*
1694                  * There are 4 possible cases
1695                  * 1. A contract is broken (dev not in acceptable state) and
1696                  *    the state change is synchronous - start negotiation
1697                  *    by sending a CTE_NEG critical event.
1698                  * 2. A contract is broken and the state change is
1699                  *    asynchronous - just send a critical event and
1700                  *    break the contract.
1701                  * 3. Contract is not broken, but consumer has subscribed
1702                  *    to the event as a critical or informative event
1703                  *    - just send the appropriate event
1704                  * 4. contract waiting for negend event - just send the critical
1705                  *    NEGEND event.
1706                  */
1707                 broken = 0;
1708                 if (!negend && !(evtype & ctd->cond_aset)) {
1709                         broken = 1;
1710                         CT_DEBUG((CE_NOTE, "publish: Contract broken: %d",
1711                             ctid));
1712                 }
1713 
1714                 /*
1715                  * Don't send event if
1716                  *      - contract is not broken AND
1717                  *      - contract holder has not subscribed to this event AND
1718                  *      - contract not waiting for a NEGEND event
1719                  */
1720                 if (!broken && !EVSENDP(ctd, evtype) &&
1721                     !ctd->cond_neg) {
1722                         CT_DEBUG((CE_NOTE, "contract_device_publish(): "
1723                             "contract (%d): no publish reqd: event %d",
1724                             ctd->cond_contract.ct_id, evtype));
1725                         mutex_exit(&ctd->cond_contract.ct_lock);
1726                         continue;
1727                 }
1728 
1729                 /*
1730                  * Note: need to kmem_zalloc() the event so mutexes are
1731                  * initialized automatically
1732                  */
1733                 ct = &ctd->cond_contract;
1734                 event = kmem_zalloc(sizeof (ct_kevent_t), KM_SLEEP);
1735                 event->cte_type = evtype;
1736 
1737                 if (broken && sync) {
1738                         CT_DEBUG((CE_NOTE, "publish: broken + sync: "
1739                             "ctid: %d", ctid));
1740                         ASSERT(!negend);
1741                         ASSERT(ctd->cond_currev_id == 0);
1742                         ASSERT(ctd->cond_currev_type == 0);
1743                         ASSERT(ctd->cond_currev_ack == 0);
1744                         ASSERT(ctd->cond_neg == 0);
1745                         if (ctd->cond_noneg) {
1746                                 /* Nothing to publish. Event has been blocked */
1747                                 CT_DEBUG((CE_NOTE, "publish: sync and noneg:"
1748                                     "not publishing blocked ev: ctid: %d",
1749                                     ctid));
1750                                 result = CT_NACK;
1751                                 kmem_free(event, sizeof (ct_kevent_t));
1752                                 mutex_exit(&ctd->cond_contract.ct_lock);
1753                                 continue;
1754                         }
1755                         event->cte_flags = CTE_NEG; /* critical neg. event */
1756                         ctd->cond_currev_type = event->cte_type;
1757                         ct_barrier_incr(dip);
1758                         DEVI(dip)->devi_ct_neg = 1; /* waiting for negend */
1759                         ctd->cond_neg = 1;
1760                 } else if (broken && !sync) {
1761                         CT_DEBUG((CE_NOTE, "publish: broken + async: ctid: %d",
1762                             ctid));
1763                         ASSERT(!negend);
1764                         ASSERT(ctd->cond_currev_id == 0);
1765                         ASSERT(ctd->cond_currev_type == 0);
1766                         ASSERT(ctd->cond_currev_ack == 0);
1767                         ASSERT(ctd->cond_neg == 0);
1768                         event->cte_flags = 0; /* critical event */
1769                 } else if (EVSENDP(ctd, event->cte_type)) {
1770                         CT_DEBUG((CE_NOTE, "publish: event suscrib: ctid: %d",
1771                             ctid));
1772                         ASSERT(!negend);
1773                         ASSERT(ctd->cond_currev_id == 0);
1774                         ASSERT(ctd->cond_currev_type == 0);
1775                         ASSERT(ctd->cond_currev_ack == 0);
1776                         ASSERT(ctd->cond_neg == 0);
1777                         event->cte_flags = EVINFOP(ctd, event->cte_type) ?
1778                             CTE_INFO : 0;
1779                 } else if (ctd->cond_neg) {
1780                         CT_DEBUG((CE_NOTE, "publish: NEGEND: ctid: %d", ctid));
1781                         ASSERT(negend);
1782                         ASSERT(ctd->cond_noneg == 0);
1783                         nevid = ctd->cond_contract.ct_nevent ?
1784                             ctd->cond_contract.ct_nevent->cte_id : 0;
1785                         ASSERT(ctd->cond_currev_id == nevid);
1786                         event->cte_flags = 0;        /* NEGEND is always critical */
1787                         ctd->cond_currev_id = 0;
1788                         ctd->cond_currev_type = 0;
1789                         ctd->cond_currev_ack = 0;
1790                         ctd->cond_neg = 0;
1791                 } else {
1792                         CT_DEBUG((CE_NOTE, "publish: not publishing event for "
1793                             "ctid: %d, evtype: %d",
1794                             ctd->cond_contract.ct_id, event->cte_type));
1795                         ASSERT(!negend);
1796                         ASSERT(ctd->cond_currev_id == 0);
1797                         ASSERT(ctd->cond_currev_type == 0);
1798                         ASSERT(ctd->cond_currev_ack == 0);
1799                         ASSERT(ctd->cond_neg == 0);
1800                         kmem_free(event, sizeof (ct_kevent_t));
1801                         mutex_exit(&ctd->cond_contract.ct_lock);
1802                         continue;
1803                 }
1804 
1805                 nvl = NULL;
1806                 if (tnvl) {
1807                         VERIFY(nvlist_dup(tnvl, &nvl, 0) == 0);
1808                         if (negend) {
1809                                 int32_t newct = 0;
1810                                 ASSERT(ctd->cond_noneg == 0);
1811                                 VERIFY(nvlist_add_uint64(nvl, CTS_NEVID, nevid)
1812                                     == 0);
1813                                 VERIFY(nvlist_lookup_int32(nvl, CTS_NEWCT,
1814                                     &newct) == 0);
1815                                 VERIFY(nvlist_add_int32(nvl, CTS_NEWCT,
1816                                     newct == 1 ? 0 :
1817                                     ctd->cond_contract.ct_id) == 0);
1818                                 CT_DEBUG((CE_NOTE, "publish: negend: ctid: %d "
1819                                     "CTS_NEVID: %llu, CTS_NEWCT: %s",
1820                                     ctid, (unsigned long long)nevid,
1821                                     newct ? "success" : "failure"));
1822 
1823                         }
1824                 }
1825 
1826                 if (ctd->cond_neg) {
1827                         ASSERT(ctd->cond_contract.ct_ntime.ctm_start == -1);
1828                         ASSERT(ctd->cond_contract.ct_qtime.ctm_start == -1);
1829                         ctd->cond_contract.ct_ntime.ctm_start = ddi_get_lbolt();
1830                         ctd->cond_contract.ct_qtime.ctm_start =
1831                             ctd->cond_contract.ct_ntime.ctm_start;
1832                 }
1833 
1834                 /*
1835                  * by holding the dip's devi_ct_lock we ensure that
1836                  * all ACK/NACKs are held up until we have finished
1837                  * publishing to all contracts.
1838                  */
1839                 mutex_exit(&ctd->cond_contract.ct_lock);
1840                 evid = cte_publish_all(ct, event, nvl, NULL);
1841                 mutex_enter(&ctd->cond_contract.ct_lock);
1842 
1843                 if (ctd->cond_neg) {
1844                         ASSERT(!negend);
1845                         ASSERT(broken);
1846                         ASSERT(sync);
1847                         ASSERT(!ctd->cond_noneg);
1848                         CT_DEBUG((CE_NOTE, "publish: sync break, setting evid"
1849                             ": %d", ctid));
1850                         ctd->cond_currev_id = evid;
1851                 } else if (negend) {
1852                         ctd->cond_contract.ct_ntime.ctm_start = -1;
1853                         ctd->cond_contract.ct_qtime.ctm_start = -1;
1854                 }
1855                 mutex_exit(&ctd->cond_contract.ct_lock);
1856         }
1857 
1858         /*
1859          * If "negend" set counter back to initial state (-1) so that
1860          * other events can be published. Also clear the negotiation flag
1861          * on dip.
1862          *
1863          * 0 .. n are used for counting.
1864          * -1 indicates counter is available for use.
1865          */
1866         if (negend) {
1867                 /*
1868                  * devi_ct_count not necessarily 0. We may have
1869                  * timed out in which case, count will be non-zero.
1870                  */
1871                 ct_barrier_release(dip);
1872                 DEVI(dip)->devi_ct_neg = 0;
1873                 CT_DEBUG((CE_NOTE, "publish: negend: reset dip state: dip=%p",
1874                     (void *)dip));
1875         } else if (DEVI(dip)->devi_ct_neg) {
1876                 ASSERT(match);
1877                 ASSERT(!ct_barrier_empty(dip));
1878                 CT_DEBUG((CE_NOTE, "publish: sync count=%d, dip=%p",
1879                     DEVI(dip)->devi_ct_count, (void *)dip));
1880         } else {
1881                 /*
1882                  * for non-negotiated events or subscribed events or no
1883                  * matching contracts
1884                  */
1885                 ASSERT(ct_barrier_empty(dip));
1886                 ASSERT(DEVI(dip)->devi_ct_neg == 0);
1887                 CT_DEBUG((CE_NOTE, "publish: async/non-nego/subscrib/no-match: "
1888                     "dip=%p", (void *)dip));
1889 
1890                 /*
1891                  * only this function when called from contract_device_negend()
1892                  * can reset the counter to READY state i.e. -1. This function
1893                  * is so called for every event whether a NEGEND event is needed
1894                  * or not, but the negend event is only published if the event
1895                  * whose end they signal is a negotiated event for the contract.
1896                  */
1897         }
1898 
1899         if (!match) {
1900                 /* No matching contracts */
1901                 CT_DEBUG((CE_NOTE, "publish: No matching contract"));
1902                 result = CT_NONE;
1903         } else if (result == CT_NACK) {
1904                 /* a non-negotiable contract exists and this is a neg. event */
1905                 CT_DEBUG((CE_NOTE, "publish: found 1 or more NONEG contract"));
1906                 (void) wait_for_acks(dip, dev, spec_type, evtype);
1907         } else if (DEVI(dip)->devi_ct_neg) {
1908                 /* one or more contracts going through negotations  */
1909                 CT_DEBUG((CE_NOTE, "publish: sync contract: waiting"));
1910                 result = wait_for_acks(dip, dev, spec_type, evtype);
1911         } else {
1912                 /* no negotiated contracts or no broken contracts or NEGEND */
1913                 CT_DEBUG((CE_NOTE, "publish: async/no-break/negend"));
1914                 result = CT_ACK;
1915         }
1916 
1917         /*
1918          * Release the lock only now so that the only point where we
1919          * drop the lock is in wait_for_acks(). This is so that we don't
1920          * miss cv_signal/cv_broadcast from contract holders
1921          */
1922         CT_DEBUG((CE_NOTE, "publish: dropping devi_ct_lock"));
1923         mutex_exit(&(DEVI(dip)->devi_ct_lock));
1924 
1925 out:
1926         if (tnvl)
1927                 nvlist_free(tnvl);
1928         if (path)
1929                 kmem_free(path, MAXPATHLEN);
1930 
1931 
1932         CT_DEBUG((CE_NOTE, "publish: result = %s", result_str(result)));
1933         return (result);
1934 }
1935 
1936 
1937 /*
1938  * contract_device_offline
1939  *
1940  * Event publishing routine called by I/O framework when a device is offlined.
1941  */
1942 ct_ack_t
1943 contract_device_offline(dev_info_t *dip, dev_t dev, int spec_type)
1944 {
1945         nvlist_t *nvl;
1946         uint_t result;
1947         uint_t evtype;
1948 
1949         VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1950 
1951         evtype = CT_DEV_EV_OFFLINE;
1952         result = contract_device_publish(dip, dev, spec_type, evtype, nvl);
1953 
1954         /*
1955          * If a contract offline is NACKED, the framework expects us to call
1956          * NEGEND ourselves, since we know the final result
1957          */
1958         if (result == CT_NACK) {
1959                 contract_device_negend(dip, dev, spec_type, CT_EV_FAILURE);
1960         }
1961 
1962         return (result);
1963 }
1964 
1965 /*
1966  * contract_device_degrade
1967  *
1968  * Event publishing routine called by I/O framework when a device
1969  * moves to degrade state.
1970  */
1971 /*ARGSUSED*/
1972 void
1973 contract_device_degrade(dev_info_t *dip, dev_t dev, int spec_type)
1974 {
1975         nvlist_t *nvl;
1976         uint_t evtype;
1977 
1978         VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1979 
1980         evtype = CT_DEV_EV_DEGRADED;
1981         (void) contract_device_publish(dip, dev, spec_type, evtype, nvl);
1982 }
1983 
1984 /*
1985  * contract_device_undegrade
1986  *
1987  * Event publishing routine called by I/O framework when a device
1988  * moves from degraded state to online state.
1989  */
1990 /*ARGSUSED*/
1991 void
1992 contract_device_undegrade(dev_info_t *dip, dev_t dev, int spec_type)
1993 {
1994         nvlist_t *nvl;
1995         uint_t evtype;
1996 
1997         VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1998 
1999         evtype = CT_DEV_EV_ONLINE;
2000         (void) contract_device_publish(dip, dev, spec_type, evtype, nvl);
2001 }
2002 
2003 /*
2004  * For all contracts which have undergone a negotiation (because the device
2005  * moved out of the acceptable state for that contract and the state
2006  * change is synchronous i.e. requires negotiation) this routine publishes
2007  * a CT_EV_NEGEND event with the final disposition of the event.
2008  *
2009  * This event is always a critical event.
2010  */
2011 void
2012 contract_device_negend(dev_info_t *dip, dev_t dev, int spec_type, int result)
2013 {
2014         nvlist_t *nvl;
2015         uint_t evtype;
2016 
2017         ASSERT(result == CT_EV_SUCCESS || result == CT_EV_FAILURE);
2018 
2019         CT_DEBUG((CE_NOTE, "contract_device_negend(): entered: result: %d, "
2020             "dip: %p", result, (void *)dip));
2021 
2022         VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2023         VERIFY(nvlist_add_int32(nvl, CTS_NEWCT,
2024             result == CT_EV_SUCCESS ? 1 : 0) == 0);
2025 
2026         evtype = CT_EV_NEGEND;
2027         (void) contract_device_publish(dip, dev, spec_type, evtype, nvl);
2028 
2029         CT_DEBUG((CE_NOTE, "contract_device_negend(): exit dip: %p",
2030             (void *)dip));
2031 }
2032 
2033 /*
2034  * Wrapper routine called by other subsystems (such as LDI) to start
2035  * negotiations when a synchronous device state change occurs.
2036  * Returns CT_ACK or CT_NACK.
2037  */
2038 ct_ack_t
2039 contract_device_negotiate(dev_info_t *dip, dev_t dev, int spec_type,
2040     uint_t evtype)
2041 {
2042         int     result;
2043 
2044         ASSERT(dip);
2045         ASSERT(dev != NODEV);
2046         ASSERT(dev != DDI_DEV_T_ANY);
2047         ASSERT(dev != DDI_DEV_T_NONE);
2048         ASSERT(spec_type == S_IFBLK || spec_type == S_IFCHR);
2049 
2050         switch (evtype) {
2051         case CT_DEV_EV_OFFLINE:
2052                 result = contract_device_offline(dip, dev, spec_type);
2053                 break;
2054         default:
2055                 cmn_err(CE_PANIC, "contract_device_negotiate(): Negotiation "
2056                     "not supported: event (%d) for dev_t (%lu) and spec (%d), "
2057                     "dip (%p)", evtype, dev, spec_type, (void *)dip);
2058                 result = CT_NACK;
2059                 break;
2060         }
2061 
2062         return (result);
2063 }
2064 
2065 /*
2066  * A wrapper routine called by other subsystems (such as the LDI) to
2067  * finalize event processing for a state change event. For synchronous
2068  * state changes, this publishes NEGEND events. For asynchronous i.e.
2069  * non-negotiable events this publishes the event.
2070  */
2071 void
2072 contract_device_finalize(dev_info_t *dip, dev_t dev, int spec_type,
2073     uint_t evtype, int ct_result)
2074 {
2075         ASSERT(dip);
2076         ASSERT(dev != NODEV);
2077         ASSERT(dev != DDI_DEV_T_ANY);
2078         ASSERT(dev != DDI_DEV_T_NONE);
2079         ASSERT(spec_type == S_IFBLK || spec_type == S_IFCHR);
2080 
2081         switch (evtype) {
2082         case CT_DEV_EV_OFFLINE:
2083                 contract_device_negend(dip, dev, spec_type, ct_result);
2084                 break;
2085         case CT_DEV_EV_DEGRADED:
2086                 contract_device_degrade(dip, dev, spec_type);
2087                 contract_device_negend(dip, dev, spec_type, ct_result);
2088                 break;
2089         case CT_DEV_EV_ONLINE:
2090                 contract_device_undegrade(dip, dev, spec_type);
2091                 contract_device_negend(dip, dev, spec_type, ct_result);
2092                 break;
2093         default:
2094                 cmn_err(CE_PANIC, "contract_device_finalize(): Unsupported "
2095                     "event (%d) for dev_t (%lu) and spec (%d), dip (%p)",
2096                     evtype, dev, spec_type, (void *)dip);
2097                 break;
2098         }
2099 }
2100 
2101 /*
2102  * Called by I/O framework when a devinfo node is freed to remove the
2103  * association between a devinfo node and its contracts.
2104  */
2105 void
2106 contract_device_remove_dip(dev_info_t *dip)
2107 {
2108         cont_device_t *ctd;
2109         cont_device_t *next;
2110         contract_t *ct;
2111 
2112         mutex_enter(&(DEVI(dip)->devi_ct_lock));
2113         ct_barrier_wait_for_release(dip);
2114 
2115         for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; ctd = next) {
2116                 next = list_next(&(DEVI(dip)->devi_ct), ctd);
2117                 list_remove(&(DEVI(dip)->devi_ct), ctd);
2118                 ct = &ctd->cond_contract;
2119                 /*
2120                  * Unlink the dip associated with this contract
2121                  */
2122                 mutex_enter(&ct->ct_lock);
2123                 ASSERT(ctd->cond_dip == dip);
2124                 ctd->cond_dip = NULL; /* no longer linked to dip */
2125                 contract_rele(ct);      /* remove hold for dip linkage */
2126                 CT_DEBUG((CE_NOTE, "ct: remove_dip: removed dip from contract: "
2127                     "ctid: %d", ct->ct_id));
2128                 mutex_exit(&ct->ct_lock);
2129         }
2130         ASSERT(list_is_empty(&(DEVI(dip)->devi_ct)));
2131         mutex_exit(&(DEVI(dip)->devi_ct_lock));
2132 }
2133 
2134 /*
2135  * Barrier related routines
2136  */
2137 static void
2138 ct_barrier_acquire(dev_info_t *dip)
2139 {
2140         ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
2141         CT_DEBUG((CE_NOTE, "ct_barrier_acquire: waiting for barrier"));
2142         while (DEVI(dip)->devi_ct_count != -1)
2143                 cv_wait(&(DEVI(dip)->devi_ct_cv), &(DEVI(dip)->devi_ct_lock));
2144         DEVI(dip)->devi_ct_count = 0;
2145         CT_DEBUG((CE_NOTE, "ct_barrier_acquire: thread owns barrier"));
2146 }
2147 
2148 static void
2149 ct_barrier_release(dev_info_t *dip)
2150 {
2151         ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
2152         ASSERT(DEVI(dip)->devi_ct_count != -1);
2153         DEVI(dip)->devi_ct_count = -1;
2154         cv_broadcast(&(DEVI(dip)->devi_ct_cv));
2155         CT_DEBUG((CE_NOTE, "ct_barrier_release: Released barrier"));
2156 }
2157 
2158 static int
2159 ct_barrier_held(dev_info_t *dip)
2160 {
2161         ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
2162         return (DEVI(dip)->devi_ct_count != -1);
2163 }
2164 
2165 static int
2166 ct_barrier_empty(dev_info_t *dip)
2167 {
2168         ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
2169         ASSERT(DEVI(dip)->devi_ct_count != -1);
2170         return (DEVI(dip)->devi_ct_count == 0);
2171 }
2172 
2173 static void
2174 ct_barrier_wait_for_release(dev_info_t *dip)
2175 {
2176         ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
2177         while (DEVI(dip)->devi_ct_count != -1)
2178                 cv_wait(&(DEVI(dip)->devi_ct_cv), &(DEVI(dip)->devi_ct_lock));
2179 }
2180 
2181 static void
2182 ct_barrier_decr(dev_info_t *dip)
2183 {
2184         CT_DEBUG((CE_NOTE, "barrier_decr:  ct_count before decr: %d",
2185             DEVI(dip)->devi_ct_count));
2186 
2187         ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
2188         ASSERT(DEVI(dip)->devi_ct_count > 0);
2189 
2190         DEVI(dip)->devi_ct_count--;
2191         if (DEVI(dip)->devi_ct_count == 0) {
2192                 cv_broadcast(&DEVI(dip)->devi_ct_cv);
2193                 CT_DEBUG((CE_NOTE, "barrier_decr: cv_broadcast"));
2194         }
2195 }
2196 
2197 static void
2198 ct_barrier_incr(dev_info_t *dip)
2199 {
2200         ASSERT(ct_barrier_held(dip));
2201         DEVI(dip)->devi_ct_count++;
2202 }
2203 
2204 static int
2205 ct_barrier_wait_for_empty(dev_info_t *dip, int secs)
2206 {
2207         clock_t abstime;
2208 
2209         ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
2210 
2211         abstime = ddi_get_lbolt() + drv_sectohz(secs);
2212         while (DEVI(dip)->devi_ct_count) {
2213                 if (cv_timedwait(&(DEVI(dip)->devi_ct_cv),
2214                     &(DEVI(dip)->devi_ct_lock), abstime) == -1) {
2215                         return (-1);
2216                 }
2217         }
2218         return (0);
2219 }