1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * FMD Case Subsystem
  28  *
  29  * Diagnosis engines are expected to group telemetry events related to the
  30  * diagnosis of a particular problem on the system into a set of cases.  The
  31  * diagnosis engine may have any number of cases open at a given point in time.
  32  * Some cases may eventually be *solved* by associating a suspect list of one
  33  * or more problems with the case, at which point fmd publishes a list.suspect
  34  * event for the case and it becomes visible to administrators and agents.
  35  *
  36  * Every case is named using a UUID, and is globally visible in the case hash.
  37  * Cases are reference-counted, except for the reference from the case hash
  38  * itself.  Consumers of case references include modules, which store active
  39  * cases on the mod_cases list, ASRUs in the resource cache, and the RPC code.
  40  *
  41  * Cases obey the following state machine.  In states UNSOLVED, SOLVED, and
  42  * CLOSE_WAIT, a case's module refers to the owning module (a diagnosis engine
  43  * or transport) and the case is referenced by the mod_cases list.  Once the
  44  * case reaches the CLOSED or REPAIRED states, a case's module changes to refer
  45  * to the root module (fmd.d_rmod) and is deleted from the owner's mod_cases.
  46  *
  47  *                      +------------+
  48  *           +----------|  UNSOLVED  |
  49  *           |          +------------+
  50  *           |                1 |
  51  *           |                  |
  52  *           |          +-------v----+
  53  *         2 |          |    SOLVED  |
  54  *           |          +------------+
  55  *           |              3 |  5 |
  56  *           +------------+   |    |
  57  *                        |   |    |
  58  *                      +-v---v----v-+
  59  *                      | CLOSE_WAIT |
  60  *                      +------------+
  61  *                        |   |    |
  62  *            +-----------+   |    +------------+
  63  *            |             4 |                 |
  64  *            v         +-----v------+          |
  65  *         discard      |   CLOSED   |        6 |
  66  *                      +------------+          |
  67  *                            |                 |
  68  *                            |    +------------+
  69  *                          7 |    |
  70  *                      +-----v----v-+
  71  *                      |  REPAIRED  |
  72  *                      +------------+
  73  *                            |
  74  *                          8 |
  75  *                      +-----v------+
  76  *                      |  RESOLVED  |
  77  *                      +------------+
  78  *                            |
  79  *                            v
  80  *                         discard
  81  *
  82  * The state machine changes are triggered by calls to fmd_case_transition()
  83  * from various locations inside of fmd, as described below:
  84  *
  85  * [1] Called by: fmd_case_solve()
  86  *       Actions: FMD_CF_SOLVED flag is set in ci_flags
  87  *                conviction policy is applied to suspect list
  88  *                suspects convicted are marked faulty (F) in R$
  89  *                list.suspect event logged and dispatched
  90  *
  91  * [2] Called by: fmd_case_close(), fmd_case_uuclose()
  92  *       Actions: diagnosis engine fmdo_close() entry point scheduled
  93  *                case discarded upon exit from CLOSE_WAIT
  94  *
  95  * [3] Called by: fmd_case_close(), fmd_case_uuclose(), fmd_xprt_event_uuclose()
  96  *       Actions: FMD_CF_ISOLATED flag is set in ci_flags
  97  *                suspects convicted (F) are marked unusable (U) in R$
  98  *                diagnosis engine fmdo_close() entry point scheduled
  99  *                case transitions to CLOSED [4] upon exit from CLOSE_WAIT
 100  *
 101  * [4] Called by: fmd_case_delete() (after fmdo_close() entry point returns)
 102  *       Actions: list.isolated event dispatched
 103  *                case deleted from module's list of open cases
 104  *
 105  * [5] Called by: fmd_case_repair(), fmd_case_update()
 106  *       Actions: FMD_CF_REPAIR flag is set in ci_flags
 107  *                diagnosis engine fmdo_close() entry point scheduled
 108  *                case transitions to REPAIRED [6] upon exit from CLOSE_WAIT
 109  *
 110  * [6] Called by: fmd_case_delete() (after fmdo_close() entry point returns)
 111  *       Actions: suspects convicted are marked non faulty (!F) in R$
 112  *                list.repaired or list.updated event dispatched
 113  *
 114  * [7] Called by: fmd_case_repair(), fmd_case_update()
 115  *       Actions: FMD_CF_REPAIR flag is set in ci_flags
 116  *                suspects convicted are marked non faulty (!F) in R$
 117  *                list.repaired or list.updated event dispatched
 118  *
 119  * [8] Called by: fmd_case_uuresolve()
 120  *       Actions: list.resolved event dispatched
 121  *                case is discarded
 122  */
 123 
 124 #include <sys/fm/protocol.h>
 125 #include <uuid/uuid.h>
 126 #include <alloca.h>
 127 
 128 #include <fmd_alloc.h>
 129 #include <fmd_module.h>
 130 #include <fmd_error.h>
 131 #include <fmd_conf.h>
 132 #include <fmd_case.h>
 133 #include <fmd_string.h>
 134 #include <fmd_subr.h>
 135 #include <fmd_protocol.h>
 136 #include <fmd_event.h>
 137 #include <fmd_eventq.h>
 138 #include <fmd_dispq.h>
 139 #include <fmd_buf.h>
 140 #include <fmd_log.h>
 141 #include <fmd_asru.h>
 142 #include <fmd_fmri.h>
 143 #include <fmd_xprt.h>
 144 
 145 #include <fmd.h>
 146 
 147 static const char *const _fmd_case_snames[] = {
 148         "UNSOLVED",     /* FMD_CASE_UNSOLVED */
 149         "SOLVED",       /* FMD_CASE_SOLVED */
 150         "CLOSE_WAIT",   /* FMD_CASE_CLOSE_WAIT */
 151         "CLOSED",       /* FMD_CASE_CLOSED */
 152         "REPAIRED",     /* FMD_CASE_REPAIRED */
 153         "RESOLVED"      /* FMD_CASE_RESOLVED */
 154 };
 155 
 156 static fmd_case_impl_t *fmd_case_tryhold(fmd_case_impl_t *);
 157 
 158 fmd_case_hash_t *
 159 fmd_case_hash_create(void)
 160 {
 161         fmd_case_hash_t *chp = fmd_alloc(sizeof (fmd_case_hash_t), FMD_SLEEP);
 162 
 163         (void) pthread_rwlock_init(&chp->ch_lock, NULL);
 164         chp->ch_hashlen = fmd.d_str_buckets;
 165         chp->ch_hash = fmd_zalloc(sizeof (void *) * chp->ch_hashlen, FMD_SLEEP);
 166         chp->ch_code_hash = fmd_zalloc(sizeof (void *) * chp->ch_hashlen,
 167             FMD_SLEEP);
 168         chp->ch_count = 0;
 169 
 170         return (chp);
 171 }
 172 
 173 /*
 174  * Destroy the case hash.  Unlike most of our hash tables, no active references
 175  * are kept by the case hash itself; all references come from other subsystems.
 176  * The hash must be destroyed after all modules are unloaded; if anything was
 177  * present in the hash it would be by definition a reference count leak.
 178  */
 179 void
 180 fmd_case_hash_destroy(fmd_case_hash_t *chp)
 181 {
 182         fmd_free(chp->ch_hash, sizeof (void *) * chp->ch_hashlen);
 183         fmd_free(chp->ch_code_hash, sizeof (void *) * chp->ch_hashlen);
 184         fmd_free(chp, sizeof (fmd_case_hash_t));
 185 }
 186 
 187 /*
 188  * Take a snapshot of the case hash by placing an additional hold on each
 189  * member in an auxiliary array, and then call 'func' for each case.
 190  */
 191 void
 192 fmd_case_hash_apply(fmd_case_hash_t *chp,
 193     void (*func)(fmd_case_t *, void *), void *arg)
 194 {
 195         fmd_case_impl_t *cp, **cps, **cpp;
 196         uint_t cpc, i;
 197 
 198         (void) pthread_rwlock_rdlock(&chp->ch_lock);
 199 
 200         cps = cpp = fmd_alloc(chp->ch_count * sizeof (fmd_case_t *), FMD_SLEEP);
 201         cpc = chp->ch_count;
 202 
 203         for (i = 0; i < chp->ch_hashlen; i++) {
 204                 for (cp = chp->ch_hash[i]; cp != NULL; cp = cp->ci_next)
 205                         *cpp++ = fmd_case_tryhold(cp);
 206         }
 207 
 208         ASSERT(cpp == cps + cpc);
 209         (void) pthread_rwlock_unlock(&chp->ch_lock);
 210 
 211         for (i = 0; i < cpc; i++) {
 212                 if (cps[i] != NULL) {
 213                         func((fmd_case_t *)cps[i], arg);
 214                         fmd_case_rele((fmd_case_t *)cps[i]);
 215                 }
 216         }
 217 
 218         fmd_free(cps, cpc * sizeof (fmd_case_t *));
 219 }
 220 
 221 static void
 222 fmd_case_code_hash_insert(fmd_case_hash_t *chp, fmd_case_impl_t *cip)
 223 {
 224         uint_t h = fmd_strhash(cip->ci_code) % chp->ch_hashlen;
 225 
 226         cip->ci_code_next = chp->ch_code_hash[h];
 227         chp->ch_code_hash[h] = cip;
 228 }
 229 
 230 static void
 231 fmd_case_code_hash_delete(fmd_case_hash_t *chp, fmd_case_impl_t *cip)
 232 {
 233         fmd_case_impl_t **pp, *cp;
 234 
 235         if (cip->ci_code) {
 236                 uint_t h = fmd_strhash(cip->ci_code) % chp->ch_hashlen;
 237 
 238                 pp = &chp->ch_code_hash[h];
 239                 for (cp = *pp; cp != NULL; cp = cp->ci_code_next) {
 240                         if (cp != cip)
 241                                 pp = &cp->ci_code_next;
 242                         else
 243                                 break;
 244                 }
 245                 if (cp != NULL) {
 246                         *pp = cp->ci_code_next;
 247                         cp->ci_code_next = NULL;
 248                 }
 249         }
 250 }
 251 
 252 /*
 253  * Look up the diagcode for this case and cache it in ci_code.  If no suspects
 254  * were defined for this case or if the lookup fails, the event dictionary or
 255  * module code is broken, and we set the event code to a precomputed default.
 256  */
 257 static const char *
 258 fmd_case_mkcode(fmd_case_t *cp)
 259 {
 260         fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
 261         fmd_case_susp_t *cis;
 262         fmd_case_hash_t *chp = fmd.d_cases;
 263 
 264         char **keys, **keyp;
 265         const char *s;
 266 
 267         ASSERT(MUTEX_HELD(&cip->ci_lock));
 268         ASSERT(cip->ci_state >= FMD_CASE_SOLVED);
 269 
 270         /*
 271          * delete any existing entry from code hash if it is on it
 272          */
 273         fmd_case_code_hash_delete(chp, cip);
 274 
 275         fmd_free(cip->ci_code, cip->ci_codelen);
 276         cip->ci_codelen = cip->ci_mod->mod_codelen;
 277         cip->ci_code = fmd_zalloc(cip->ci_codelen, FMD_SLEEP);
 278         keys = keyp = alloca(sizeof (char *) * (cip->ci_nsuspects + 1));
 279 
 280         for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) {
 281                 if (nvlist_lookup_string(cis->cis_nvl, FM_CLASS, keyp) == 0)
 282                         keyp++;
 283         }
 284 
 285         *keyp = NULL; /* mark end of keys[] array for libdiagcode */
 286 
 287         if (cip->ci_nsuspects == 0 || fmd_module_dc_key2code(
 288             cip->ci_mod, keys, cip->ci_code, cip->ci_codelen) != 0) {
 289                 (void) fmd_conf_getprop(fmd.d_conf, "nodiagcode", &s);
 290                 fmd_free(cip->ci_code, cip->ci_codelen);
 291                 cip->ci_codelen = strlen(s) + 1;
 292                 cip->ci_code = fmd_zalloc(cip->ci_codelen, FMD_SLEEP);
 293                 (void) strcpy(cip->ci_code, s);
 294         }
 295 
 296         /*
 297          * add into hash of solved cases
 298          */
 299         fmd_case_code_hash_insert(chp, cip);
 300 
 301         return (cip->ci_code);
 302 }
 303 
 304 typedef struct {
 305         int     *fcl_countp;
 306         int     fcl_maxcount;
 307         uint8_t *fcl_ba;
 308         nvlist_t **fcl_nva;
 309         int     *fcl_msgp;
 310 } fmd_case_lst_t;
 311 
 312 static void
 313 fmd_case_set_lst(fmd_asru_link_t *alp, void *arg)
 314 {
 315         fmd_case_lst_t *entryp = (fmd_case_lst_t *)arg;
 316         boolean_t b;
 317         int state;
 318 
 319         if (*entryp->fcl_countp >= entryp->fcl_maxcount)
 320                 return;
 321         if (nvlist_lookup_boolean_value(alp->al_event, FM_SUSPECT_MESSAGE,
 322             &b) == 0 && b == B_FALSE)
 323                 *entryp->fcl_msgp = B_FALSE;
 324         entryp->fcl_ba[*entryp->fcl_countp] = 0;
 325         state = fmd_asru_al_getstate(alp);
 326         if (state & FMD_ASRU_DEGRADED)
 327                 entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_DEGRADED;
 328         if (state & FMD_ASRU_UNUSABLE)
 329                 entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_UNUSABLE;
 330         if (state & FMD_ASRU_FAULTY)
 331                 entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_FAULTY;
 332         if (!(state & FMD_ASRU_PRESENT))
 333                 entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_NOT_PRESENT;
 334         if (alp->al_reason == FMD_ASRU_REPAIRED)
 335                 entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_REPAIRED;
 336         else if (alp->al_reason == FMD_ASRU_REPLACED)
 337                 entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_REPLACED;
 338         else if (alp->al_reason == FMD_ASRU_ACQUITTED)
 339                 entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_ACQUITTED;
 340         entryp->fcl_nva[*entryp->fcl_countp] = alp->al_event;
 341         (*entryp->fcl_countp)++;
 342 }
 343 
 344 static void
 345 fmd_case_faulty(fmd_asru_link_t *alp, void *arg)
 346 {
 347         int *faultyp = (int *)arg;
 348 
 349         *faultyp |= (alp->al_flags & FMD_ASRU_FAULTY);
 350 }
 351 
 352 static void
 353 fmd_case_usable(fmd_asru_link_t *alp, void *arg)
 354 {
 355         int *usablep = (int *)arg;
 356 
 357         *usablep |= !(fmd_asru_al_getstate(alp) & FMD_ASRU_UNUSABLE);
 358 }
 359 
 360 static void
 361 fmd_case_not_faulty(fmd_asru_link_t *alp, void *arg)
 362 {
 363         int *not_faultyp = (int *)arg;
 364 
 365         *not_faultyp |= !(alp->al_flags & FMD_ASRU_FAULTY);
 366 }
 367 
 368 /*
 369  * Have we got any suspects with an asru that are still unusable and present?
 370  */
 371 static void
 372 fmd_case_unusable_and_present(fmd_asru_link_t *alp, void *arg)
 373 {
 374         int *rvalp = (int *)arg;
 375         int state;
 376         nvlist_t *asru;
 377 
 378         /*
 379          * if this a proxy case and this suspect doesn't have an local asru
 380          * then state is unknown so we must assume it may still be unusable.
 381          */
 382         if ((alp->al_flags & FMD_ASRU_PROXY) &&
 383             !(alp->al_flags & FMD_ASRU_PROXY_WITH_ASRU)) {
 384                 *rvalp |= B_TRUE;
 385                 return;
 386         }
 387 
 388         state = fmd_asru_al_getstate(alp);
 389         if (nvlist_lookup_nvlist(alp->al_event, FM_FAULT_ASRU, &asru) != 0)
 390                 return;
 391         *rvalp |= ((state & FMD_ASRU_UNUSABLE) && (state & FMD_ASRU_PRESENT));
 392 }
 393 
 394 nvlist_t *
 395 fmd_case_mkevent(fmd_case_t *cp, const char *class)
 396 {
 397         fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
 398         nvlist_t **nva, *nvl;
 399         uint8_t *ba;
 400         int msg = B_TRUE;
 401         const char *code;
 402         fmd_case_lst_t fcl;
 403         int count = 0;
 404 
 405         (void) pthread_mutex_lock(&cip->ci_lock);
 406         ASSERT(cip->ci_state >= FMD_CASE_SOLVED);
 407 
 408         nva = alloca(sizeof (nvlist_t *) * cip->ci_nsuspects);
 409         ba = alloca(sizeof (uint8_t) * cip->ci_nsuspects);
 410 
 411         /*
 412          * For each suspect associated with the case, store its fault event
 413          * nvlist in 'nva'.  We also look to see if any of the suspect faults
 414          * have asked not to be messaged.  If any of them have made such a
 415          * request, propagate that attribute to the composite list.* event.
 416          * Finally, store each suspect's faulty status into the bitmap 'ba'.
 417          */
 418         fcl.fcl_countp = &count;
 419         fcl.fcl_maxcount = cip->ci_nsuspects;
 420         fcl.fcl_msgp = &msg;
 421         fcl.fcl_ba = ba;
 422         fcl.fcl_nva = nva;
 423         fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_set_lst, &fcl);
 424 
 425         if (cip->ci_code == NULL)
 426                 (void) fmd_case_mkcode(cp);
 427         /*
 428          * For repair and updated event, we lookup diagcode from dict using key
 429          * "list.repaired" or "list.updated" or "list.resolved".
 430          */
 431         if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0)
 432                 (void) fmd_conf_getprop(fmd.d_conf, "repaircode", &code);
 433         else if (strcmp(class, FM_LIST_RESOLVED_CLASS) == 0)
 434                 (void) fmd_conf_getprop(fmd.d_conf, "resolvecode", &code);
 435         else if (strcmp(class, FM_LIST_UPDATED_CLASS) == 0)
 436                 (void) fmd_conf_getprop(fmd.d_conf, "updatecode", &code);
 437         else
 438                 code = cip->ci_code;
 439 
 440         if (msg == B_FALSE)
 441                 cip->ci_flags |= FMD_CF_INVISIBLE;
 442 
 443         /*
 444          * Use the ci_diag_de if one has been saved (eg for an injected fault).
 445          * Otherwise use the authority for the current module.
 446          */
 447         nvl = fmd_protocol_list(class, cip->ci_diag_de == NULL ?
 448             cip->ci_mod->mod_fmri : cip->ci_diag_de, cip->ci_uuid, code, count,
 449             nva, ba, msg, &cip->ci_tv, cip->ci_injected);
 450 
 451         (void) pthread_mutex_unlock(&cip->ci_lock);
 452         return (nvl);
 453 }
 454 
 455 static int fmd_case_match_on_faulty_overlap = 1;
 456 static int fmd_case_match_on_acquit_overlap = 1;
 457 static int fmd_case_auto_acquit_isolated = 1;
 458 static int fmd_case_auto_acquit_non_acquitted = 1;
 459 static int fmd_case_too_recent = 10; /* time in seconds */
 460 
 461 static boolean_t
 462 fmd_case_compare_elem(nvlist_t *nvl, nvlist_t *xnvl, const char *elem)
 463 {
 464         nvlist_t *new_rsrc;
 465         nvlist_t *rsrc;
 466         char *new_name = NULL;
 467         char *name = NULL;
 468         ssize_t new_namelen;
 469         ssize_t namelen;
 470         int fmri_present = 1;
 471         int new_fmri_present = 1;
 472         int match = B_FALSE;
 473         fmd_topo_t *ftp = fmd_topo_hold();
 474 
 475         if (nvlist_lookup_nvlist(xnvl, elem, &rsrc) != 0)
 476                 fmri_present = 0;
 477         else {
 478                 if ((namelen = fmd_fmri_nvl2str(rsrc, NULL, 0)) == -1)
 479                         goto done;
 480                 name = fmd_alloc(namelen + 1, FMD_SLEEP);
 481                 if (fmd_fmri_nvl2str(rsrc, name, namelen + 1) == -1)
 482                         goto done;
 483         }
 484         if (nvlist_lookup_nvlist(nvl, elem, &new_rsrc) != 0)
 485                 new_fmri_present = 0;
 486         else {
 487                 if ((new_namelen = fmd_fmri_nvl2str(new_rsrc, NULL, 0)) == -1)
 488                         goto done;
 489                 new_name = fmd_alloc(new_namelen + 1, FMD_SLEEP);
 490                 if (fmd_fmri_nvl2str(new_rsrc, new_name, new_namelen + 1) == -1)
 491                         goto done;
 492         }
 493         match = (fmri_present == new_fmri_present &&
 494             (fmri_present == 0 ||
 495             topo_fmri_strcmp(ftp->ft_hdl, name, new_name)));
 496 done:
 497         if (name != NULL)
 498                 fmd_free(name, namelen + 1);
 499         if (new_name != NULL)
 500                 fmd_free(new_name, new_namelen + 1);
 501         fmd_topo_rele(ftp);
 502         return (match);
 503 }
 504 
 505 static int
 506 fmd_case_match_suspect(nvlist_t *nvl1, nvlist_t *nvl2)
 507 {
 508         char *class, *new_class;
 509 
 510         if (!fmd_case_compare_elem(nvl1, nvl2, FM_FAULT_ASRU))
 511                 return (0);
 512         if (!fmd_case_compare_elem(nvl1, nvl2, FM_FAULT_RESOURCE))
 513                 return (0);
 514         if (!fmd_case_compare_elem(nvl1, nvl2, FM_FAULT_FRU))
 515                 return (0);
 516         (void) nvlist_lookup_string(nvl2, FM_CLASS, &class);
 517         (void) nvlist_lookup_string(nvl1, FM_CLASS, &new_class);
 518         return (strcmp(class, new_class) == 0);
 519 }
 520 
 521 typedef struct {
 522         int     *fcms_countp;
 523         int     fcms_maxcount;
 524         fmd_case_impl_t *fcms_cip;
 525         uint8_t *fcms_new_susp_state;
 526         uint8_t *fcms_old_susp_state;
 527         uint8_t *fcms_old_match_state;
 528 } fcms_t;
 529 #define SUSPECT_STATE_FAULTY                            0x1
 530 #define SUSPECT_STATE_ISOLATED                          0x2
 531 #define SUSPECT_STATE_REMOVED                           0x4
 532 #define SUSPECT_STATE_ACQUITED                          0x8
 533 #define SUSPECT_STATE_REPAIRED                          0x10
 534 #define SUSPECT_STATE_REPLACED                          0x20
 535 #define SUSPECT_STATE_NO_MATCH                          0x1
 536 
 537 /*
 538  * This is called for each suspect in the old case. Compare it against each
 539  * suspect in the new case, setting fcms_old_susp_state and fcms_new_susp_state
 540  * as appropriate. fcms_new_susp_state will left as 0 if the suspect is not
 541  * found in the old case.
 542  */
 543 static void
 544 fmd_case_match_suspects(fmd_asru_link_t *alp, void *arg)
 545 {
 546         fcms_t *fcmsp = (fcms_t *)arg;
 547         fmd_case_impl_t *cip = fcmsp->fcms_cip;
 548         fmd_case_susp_t *cis;
 549         int i = 0;
 550         int state = fmd_asru_al_getstate(alp);
 551 
 552         if (*fcmsp->fcms_countp >= fcmsp->fcms_maxcount)
 553                 return;
 554 
 555         if (!(state & FMD_ASRU_PRESENT) || (!(state & FMD_ASRU_FAULTY) &&
 556             alp->al_reason == FMD_ASRU_REMOVED))
 557                 fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp] =
 558                     SUSPECT_STATE_REMOVED;
 559         else if ((state & FMD_ASRU_UNUSABLE) && (state & FMD_ASRU_FAULTY))
 560                 fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp] =
 561                     SUSPECT_STATE_ISOLATED;
 562         else if (state & FMD_ASRU_FAULTY)
 563                 fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp] =
 564                     SUSPECT_STATE_FAULTY;
 565         else if (alp->al_reason == FMD_ASRU_REPLACED)
 566                 fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp] =
 567                     SUSPECT_STATE_REPLACED;
 568         else if (alp->al_reason == FMD_ASRU_ACQUITTED)
 569                 fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp] =
 570                     SUSPECT_STATE_ACQUITED;
 571         else
 572                 fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp] =
 573                     SUSPECT_STATE_REPAIRED;
 574 
 575         for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next, i++)
 576                 if (fmd_case_match_suspect(cis->cis_nvl, alp->al_event) == 1)
 577                         break;
 578         if (cis != NULL)
 579                 fcmsp->fcms_new_susp_state[i] =
 580                     fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp];
 581         else
 582                 fcmsp->fcms_old_match_state[*fcmsp->fcms_countp] |=
 583                     SUSPECT_STATE_NO_MATCH;
 584         (*fcmsp->fcms_countp)++;
 585 }
 586 
 587 typedef struct {
 588         int     *fca_do_update;
 589         fmd_case_impl_t *fca_cip;
 590 } fca_t;
 591 
 592 /*
 593  * Re-fault all acquitted suspects that are still present in the new list.
 594  */
 595 static void
 596 fmd_case_fault_acquitted_matching(fmd_asru_link_t *alp, void *arg)
 597 {
 598         fca_t *fcap = (fca_t *)arg;
 599         fmd_case_impl_t *cip = fcap->fca_cip;
 600         fmd_case_susp_t *cis;
 601         int state = fmd_asru_al_getstate(alp);
 602 
 603         if (!(state & FMD_ASRU_FAULTY) &&
 604             alp->al_reason == FMD_ASRU_ACQUITTED) {
 605                 for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next)
 606                         if (fmd_case_match_suspect(cis->cis_nvl,
 607                             alp->al_event) == 1)
 608                                 break;
 609                 if (cis != NULL) {
 610                         (void) fmd_asru_setflags(alp, FMD_ASRU_FAULTY);
 611                         *fcap->fca_do_update = 1;
 612                 }
 613         }
 614 }
 615 
 616 /*
 617  * Re-fault all suspects that are still present in the new list.
 618  */
 619 static void
 620 fmd_case_fault_all_matching(fmd_asru_link_t *alp, void *arg)
 621 {
 622         fca_t *fcap = (fca_t *)arg;
 623         fmd_case_impl_t *cip = fcap->fca_cip;
 624         fmd_case_susp_t *cis;
 625         int state = fmd_asru_al_getstate(alp);
 626 
 627         if (!(state & FMD_ASRU_FAULTY)) {
 628                 for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next)
 629                         if (fmd_case_match_suspect(cis->cis_nvl,
 630                             alp->al_event) == 1)
 631                                 break;
 632                 if (cis != NULL) {
 633                         (void) fmd_asru_setflags(alp, FMD_ASRU_FAULTY);
 634                         *fcap->fca_do_update = 1;
 635                 }
 636         }
 637 }
 638 
 639 /*
 640  * Acquit all suspects that are no longer present in the new list.
 641  */
 642 static void
 643 fmd_case_acquit_no_match(fmd_asru_link_t *alp, void *arg)
 644 {
 645         fca_t *fcap = (fca_t *)arg;
 646         fmd_case_impl_t *cip = fcap->fca_cip;
 647         fmd_case_susp_t *cis;
 648         int state = fmd_asru_al_getstate(alp);
 649 
 650         if (state & FMD_ASRU_FAULTY) {
 651                 for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next)
 652                         if (fmd_case_match_suspect(cis->cis_nvl,
 653                             alp->al_event) == 1)
 654                                 break;
 655                 if (cis == NULL) {
 656                         (void) fmd_asru_clrflags(alp, FMD_ASRU_FAULTY,
 657                             FMD_ASRU_ACQUITTED);
 658                         *fcap->fca_do_update = 1;
 659                 }
 660         }
 661 }
 662 
 663 /*
 664  * Acquit all isolated suspects.
 665  */
 666 static void
 667 fmd_case_acquit_isolated(fmd_asru_link_t *alp, void *arg)
 668 {
 669         int *do_update = (int *)arg;
 670         int state = fmd_asru_al_getstate(alp);
 671 
 672         if ((state & FMD_ASRU_PRESENT) && (state & FMD_ASRU_UNUSABLE) &&
 673             (state & FMD_ASRU_FAULTY)) {
 674                 (void) fmd_asru_clrflags(alp, FMD_ASRU_FAULTY,
 675                     FMD_ASRU_ACQUITTED);
 676                 *do_update = 1;
 677         }
 678 }
 679 
 680 /*
 681  * Acquit suspect which matches specified nvlist
 682  */
 683 static void
 684 fmd_case_acquit_suspect(fmd_asru_link_t *alp, void *arg)
 685 {
 686         nvlist_t *nvl = (nvlist_t *)arg;
 687         int state = fmd_asru_al_getstate(alp);
 688 
 689         if ((state & FMD_ASRU_FAULTY) &&
 690             fmd_case_match_suspect(nvl, alp->al_event) == 1)
 691                 (void) fmd_asru_clrflags(alp, FMD_ASRU_FAULTY,
 692                     FMD_ASRU_ACQUITTED);
 693 }
 694 
 695 typedef struct {
 696         fmd_case_impl_t *fccd_cip;
 697         uint8_t *fccd_new_susp_state;
 698         uint8_t *fccd_new_match_state;
 699         int *fccd_discard_new;
 700         int *fccd_adjust_new;
 701 } fccd_t;
 702 
 703 /*
 704  * see if a matching suspect list already exists in the cache
 705  */
 706 static void
 707 fmd_case_check_for_dups(fmd_case_t *old_cp, void *arg)
 708 {
 709         fccd_t *fccdp = (fccd_t *)arg;
 710         fmd_case_impl_t *new_cip = fccdp->fccd_cip;
 711         fmd_case_impl_t *old_cip = (fmd_case_impl_t *)old_cp;
 712         int i, count = 0, do_update = 0, got_isolated_overlap = 0;
 713         int got_faulty_overlap = 0;
 714         int got_acquit_overlap = 0;
 715         boolean_t too_recent;
 716         uint64_t most_recent = 0;
 717         fcms_t fcms;
 718         fca_t fca;
 719         uint8_t *new_susp_state;
 720         uint8_t *old_susp_state;
 721         uint8_t *old_match_state;
 722 
 723         new_susp_state = alloca(new_cip->ci_nsuspects * sizeof (uint8_t));
 724         for (i = 0; i < new_cip->ci_nsuspects; i++)
 725                 new_susp_state[i] = 0;
 726         old_susp_state = alloca(old_cip->ci_nsuspects * sizeof (uint8_t));
 727         for (i = 0; i < old_cip->ci_nsuspects; i++)
 728                 old_susp_state[i] = 0;
 729         old_match_state = alloca(old_cip->ci_nsuspects * sizeof (uint8_t));
 730         for (i = 0; i < old_cip->ci_nsuspects; i++)
 731                 old_match_state[i] = 0;
 732 
 733         /*
 734          * Compare with each suspect in the existing case.
 735          */
 736         fcms.fcms_countp = &count;
 737         fcms.fcms_maxcount = old_cip->ci_nsuspects;
 738         fcms.fcms_cip = new_cip;
 739         fcms.fcms_new_susp_state = new_susp_state;
 740         fcms.fcms_old_susp_state = old_susp_state;
 741         fcms.fcms_old_match_state = old_match_state;
 742         fmd_asru_hash_apply_by_case(fmd.d_asrus, (fmd_case_t *)old_cip,
 743             fmd_case_match_suspects, &fcms);
 744 
 745         /*
 746          * If we have some faulty, non-isolated suspects that overlap, then most
 747          * likely it is the suspects that overlap in the suspect lists that are
 748          * to blame. So we can consider this to be a match.
 749          */
 750         for (i = 0; i < new_cip->ci_nsuspects; i++)
 751                 if (new_susp_state[i] == SUSPECT_STATE_FAULTY)
 752                         got_faulty_overlap = 1;
 753         if (got_faulty_overlap && fmd_case_match_on_faulty_overlap)
 754                 goto got_match;
 755 
 756         /*
 757          * If we have no faulty, non-isolated suspects in the old case, but we
 758          * do have some acquitted suspects that overlap, then most likely it is
 759          * the acquitted suspects that overlap in the suspect lists that are
 760          * to blame. So we can consider this to be a match.
 761          */
 762         for (i = 0; i < new_cip->ci_nsuspects; i++)
 763                 if (new_susp_state[i] == SUSPECT_STATE_ACQUITED)
 764                         got_acquit_overlap = 1;
 765         for (i = 0; i < old_cip->ci_nsuspects; i++)
 766                 if (old_susp_state[i] == SUSPECT_STATE_FAULTY)
 767                         got_acquit_overlap = 0;
 768         if (got_acquit_overlap && fmd_case_match_on_acquit_overlap)
 769                 goto got_match;
 770 
 771         /*
 772          * Check that all suspects in the new list are present in the old list.
 773          * Return if we find one that isn't.
 774          */
 775         for (i = 0; i < new_cip->ci_nsuspects; i++)
 776                 if (new_susp_state[i] == 0)
 777                         return;
 778 
 779         /*
 780          * Check that all suspects in the old list are present in the new list
 781          * *or* they are isolated or removed/replaced (which would explain why
 782          * they are not present in the new list). Return if we find one that is
 783          * faulty and unisolated or repaired or acquitted, and that is not
 784          * present in the new case.
 785          */
 786         for (i = 0; i < old_cip->ci_nsuspects; i++)
 787                 if (old_match_state[i] == SUSPECT_STATE_NO_MATCH &&
 788                     (old_susp_state[i] == SUSPECT_STATE_FAULTY ||
 789                     old_susp_state[i] == SUSPECT_STATE_ACQUITED ||
 790                     old_susp_state[i] == SUSPECT_STATE_REPAIRED))
 791                         return;
 792 
 793 got_match:
 794         /*
 795          * If the old case is already in repaired/resolved state, we can't
 796          * do anything more with it, so keep the new case, but acquit some
 797          * of the suspects if appropriate.
 798          */
 799         if (old_cip->ci_state >= FMD_CASE_REPAIRED) {
 800                 if (fmd_case_auto_acquit_non_acquitted) {
 801                         *fccdp->fccd_adjust_new = 1;
 802                         for (i = 0; i < new_cip->ci_nsuspects; i++) {
 803                                 fccdp->fccd_new_susp_state[i] |=
 804                                     new_susp_state[i];
 805                                 if (new_susp_state[i] == 0)
 806                                         fccdp->fccd_new_susp_state[i] =
 807                                             SUSPECT_STATE_NO_MATCH;
 808                         }
 809                 }
 810                 return;
 811         }
 812 
 813         /*
 814          * Otherwise discard the new case and keep the old, again updating the
 815          * state of the suspects as appropriate
 816          */
 817         *fccdp->fccd_discard_new = 1;
 818         fca.fca_cip = new_cip;
 819         fca.fca_do_update = &do_update;
 820 
 821         /*
 822          * See if new case occurred within fmd_case_too_recent seconds of the
 823          * most recent modification to the old case and if so don't do
 824          * auto-acquit. This avoids problems if a flood of ereports come in and
 825          * they don't all get diagnosed before the first case causes some of
 826          * the devices to be isolated making it appear that an isolated device
 827          * was in the suspect list.
 828          */
 829         fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp,
 830             fmd_asru_most_recent, &most_recent);
 831         too_recent = (new_cip->ci_tv.tv_sec - most_recent <
 832             fmd_case_too_recent);
 833 
 834         if (got_faulty_overlap) {
 835                 /*
 836                  * Acquit any suspects not present in the new list, plus
 837                  * any that are are present but are isolated.
 838                  */
 839                 fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp,
 840                     fmd_case_acquit_no_match, &fca);
 841                 if (fmd_case_auto_acquit_isolated && !too_recent)
 842                         fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp,
 843                             fmd_case_acquit_isolated, &do_update);
 844         } else if (got_acquit_overlap) {
 845                 /*
 846                  * Re-fault the acquitted matching suspects and acquit all
 847                  * isolated suspects.
 848                  */
 849                 if (fmd_case_auto_acquit_isolated && !too_recent) {
 850                         fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp,
 851                             fmd_case_fault_acquitted_matching, &fca);
 852                         fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp,
 853                             fmd_case_acquit_isolated, &do_update);
 854                 }
 855         } else if (fmd_case_auto_acquit_isolated) {
 856                 /*
 857                  * To get here, there must be no faulty or acquitted suspects,
 858                  * but there must be at least one isolated suspect. Just acquit
 859                  * non-matching isolated suspects. If there are no matching
 860                  * isolated suspects, then re-fault all matching suspects.
 861                  */
 862                 for (i = 0; i < new_cip->ci_nsuspects; i++)
 863                         if (new_susp_state[i] == SUSPECT_STATE_ISOLATED)
 864                                 got_isolated_overlap = 1;
 865                 if (!got_isolated_overlap)
 866                         fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp,
 867                             fmd_case_fault_all_matching, &fca);
 868                 fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp,
 869                     fmd_case_acquit_no_match, &fca);
 870         }
 871 
 872         /*
 873          * If we've updated anything in the old case, call fmd_case_update()
 874          */
 875         if (do_update)
 876                 fmd_case_update(old_cp);
 877 }
 878 
 879 /*
 880  * Convict suspects in a case by applying a conviction policy and updating the
 881  * resource cache prior to emitting the list.suspect event for the given case.
 882  * At present, our policy is very simple: convict every suspect in the case.
 883  * In the future, this policy can be extended and made configurable to permit:
 884  *
 885  * - convicting the suspect with the highest FIT rate
 886  * - convicting the suspect with the cheapest FRU
 887  * - convicting the suspect with the FRU that is in a depot's inventory
 888  * - convicting the suspect with the longest lifetime
 889  *
 890  * and so forth.  A word to the wise: this problem is significantly harder that
 891  * it seems at first glance.  Future work should heed the following advice:
 892  *
 893  * Hacking the policy into C code here is a very bad idea.  The policy needs to
 894  * be decided upon very carefully and fundamentally encodes knowledge of what
 895  * suspect list combinations can be emitted by what diagnosis engines.  As such
 896  * fmd's code is the wrong location, because that would require fmd itself to
 897  * be updated for every diagnosis engine change, defeating the entire design.
 898  * The FMA Event Registry knows the suspect list combinations: policy inputs
 899  * can be derived from it and used to produce per-module policy configuration.
 900  *
 901  * If the policy needs to be dynamic and not statically fixed at either fmd
 902  * startup or module load time, any implementation of dynamic policy retrieval
 903  * must employ some kind of caching mechanism or be part of a built-in module.
 904  * The fmd_case_convict() function is called with locks held inside of fmd and
 905  * is not a place where unbounded blocking on some inter-process or inter-
 906  * system communication to another service (e.g. another daemon) can occur.
 907  */
 908 static int
 909 fmd_case_convict(fmd_case_t *cp)
 910 {
 911         fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
 912         fmd_asru_hash_t *ahp = fmd.d_asrus;
 913         int discard_new = 0, i;
 914         fmd_case_susp_t *cis;
 915         fmd_asru_link_t *alp;
 916         uint8_t *new_susp_state;
 917         uint8_t *new_match_state;
 918         int adjust_new = 0;
 919         fccd_t fccd;
 920         fmd_case_impl_t *ncp, **cps, **cpp;
 921         uint_t cpc;
 922         fmd_case_hash_t *chp;
 923 
 924         /*
 925          * First we must see if any matching cases already exist.
 926          */
 927         new_susp_state = alloca(cip->ci_nsuspects * sizeof (uint8_t));
 928         for (i = 0; i < cip->ci_nsuspects; i++)
 929                 new_susp_state[i] = 0;
 930         new_match_state = alloca(cip->ci_nsuspects * sizeof (uint8_t));
 931         for (i = 0; i < cip->ci_nsuspects; i++)
 932                 new_match_state[i] = 0;
 933         fccd.fccd_cip = cip;
 934         fccd.fccd_adjust_new = &adjust_new;
 935         fccd.fccd_new_susp_state = new_susp_state;
 936         fccd.fccd_new_match_state = new_match_state;
 937         fccd.fccd_discard_new = &discard_new;
 938 
 939         /*
 940          * Hold all cases
 941          */
 942         chp = fmd.d_cases;
 943         (void) pthread_rwlock_rdlock(&chp->ch_lock);
 944         cps = cpp = fmd_alloc(chp->ch_count * sizeof (fmd_case_t *), FMD_SLEEP);
 945         cpc = chp->ch_count;
 946         for (i = 0; i < chp->ch_hashlen; i++)
 947                 for (ncp = chp->ch_hash[i]; ncp != NULL; ncp = ncp->ci_next)
 948                         *cpp++ = fmd_case_tryhold(ncp);
 949         ASSERT(cpp == cps + cpc);
 950         (void) pthread_rwlock_unlock(&chp->ch_lock);
 951 
 952         /*
 953          * Run fmd_case_check_for_dups() on all cases except the current one.
 954          */
 955         for (i = 0; i < cpc; i++) {
 956                 if (cps[i] != NULL) {
 957                         if (cps[i] != (fmd_case_impl_t *)cp)
 958                                 fmd_case_check_for_dups((fmd_case_t *)cps[i],
 959                                     &fccd);
 960                         fmd_case_rele((fmd_case_t *)cps[i]);
 961                 }
 962         }
 963         fmd_free(cps, cpc * sizeof (fmd_case_t *));
 964 
 965         (void) pthread_mutex_lock(&cip->ci_lock);
 966         if (cip->ci_code == NULL)
 967                 (void) fmd_case_mkcode(cp);
 968         else if (cip->ci_precanned)
 969                 fmd_case_code_hash_insert(fmd.d_cases, cip);
 970 
 971         if (discard_new) {
 972                 /*
 973                  * We've found an existing case that is a match and it is not
 974                  * already in repaired or resolved state. So we can close this
 975                  * one as a duplicate.
 976                  */
 977                 (void) pthread_mutex_unlock(&cip->ci_lock);
 978                 return (1);
 979         }
 980 
 981         /*
 982          * Allocate new cache entries
 983          */
 984         for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) {
 985                 if ((alp = fmd_asru_hash_create_entry(ahp,
 986                     cp, cis->cis_nvl)) == NULL) {
 987                         fmd_error(EFMD_CASE_EVENT, "cannot convict suspect in "
 988                             "%s: %s\n", cip->ci_uuid, fmd_strerror(errno));
 989                         continue;
 990                 }
 991                 alp->al_flags |= FMD_ASRU_PRESENT;
 992                 alp->al_asru->asru_flags |= FMD_ASRU_PRESENT;
 993                 (void) fmd_asru_clrflags(alp, FMD_ASRU_UNUSABLE, 0);
 994                 (void) fmd_asru_setflags(alp, FMD_ASRU_FAULTY);
 995         }
 996 
 997         if (adjust_new) {
 998                 int some_suspect = 0, some_not_suspect = 0;
 999 
1000                 /*
1001                  * There is one or more matching case but they are already in
1002                  * repaired or resolved state. So we need to keep the new
1003                  * case, but we can adjust it. Repaired/removed/replaced
1004                  * suspects are unlikely to be to blame (unless there are
1005                  * actually two separate faults). So if we have a combination of
1006                  * repaired/replaced/removed suspects and acquitted suspects in
1007                  * the old lists, then we should acquit in the new list those
1008                  * that were repaired/replaced/removed in the old.
1009                  */
1010                 for (i = 0; i < cip->ci_nsuspects; i++) {
1011                         if ((new_susp_state[i] & SUSPECT_STATE_REPLACED) ||
1012                             (new_susp_state[i] & SUSPECT_STATE_REPAIRED) ||
1013                             (new_susp_state[i] & SUSPECT_STATE_REMOVED) ||
1014                             (new_match_state[i] & SUSPECT_STATE_NO_MATCH))
1015                                 some_not_suspect = 1;
1016                         else
1017                                 some_suspect = 1;
1018                 }
1019                 if (some_suspect && some_not_suspect) {
1020                         for (cis = cip->ci_suspects, i = 0; cis != NULL;
1021                             cis = cis->cis_next, i++)
1022                                 if ((new_susp_state[i] &
1023                                     SUSPECT_STATE_REPLACED) ||
1024                                     (new_susp_state[i] &
1025                                     SUSPECT_STATE_REPAIRED) ||
1026                                     (new_susp_state[i] &
1027                                     SUSPECT_STATE_REMOVED) ||
1028                                     (new_match_state[i] &
1029                                     SUSPECT_STATE_NO_MATCH))
1030                                         fmd_asru_hash_apply_by_case(fmd.d_asrus,
1031                                             cp, fmd_case_acquit_suspect,
1032                                             cis->cis_nvl);
1033                 }
1034         }
1035 
1036         (void) pthread_mutex_unlock(&cip->ci_lock);
1037         return (0);
1038 }
1039 
1040 void
1041 fmd_case_publish(fmd_case_t *cp, uint_t state)
1042 {
1043         fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1044         fmd_event_t *e;
1045         nvlist_t *nvl;
1046         char *class;
1047 
1048         if (state == FMD_CASE_CURRENT)
1049                 state = cip->ci_state; /* use current state */
1050 
1051         switch (state) {
1052         case FMD_CASE_SOLVED:
1053                 (void) pthread_mutex_lock(&cip->ci_lock);
1054 
1055                 /*
1056                  * If we already have a code, then case is already solved.
1057                  */
1058                 if (cip->ci_precanned == 0 && cip->ci_xprt == NULL &&
1059                     cip->ci_code != NULL) {
1060                         (void) pthread_mutex_unlock(&cip->ci_lock);
1061                         break;
1062                 }
1063 
1064                 if (cip->ci_tv_valid == 0) {
1065                         fmd_time_gettimeofday(&cip->ci_tv);
1066                         cip->ci_tv_valid = 1;
1067                 }
1068                 (void) pthread_mutex_unlock(&cip->ci_lock);
1069 
1070                 if (fmd_case_convict(cp) == 1) { /* dupclose */
1071                         cip->ci_flags &= ~FMD_CF_SOLVED;
1072                         fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, 0);
1073                         break;
1074                 }
1075                 if (cip->ci_xprt != NULL) {
1076                         /*
1077                          * For proxy, save some information about the transport
1078                          * in the resource cache.
1079                          */
1080                         int count = 0;
1081                         fmd_asru_set_on_proxy_t fasp;
1082                         fmd_xprt_impl_t *xip = (fmd_xprt_impl_t *)cip->ci_xprt;
1083 
1084                         fasp.fasp_countp = &count;
1085                         fasp.fasp_maxcount = cip->ci_nsuspects;
1086                         fasp.fasp_proxy_asru = cip->ci_proxy_asru;
1087                         fasp.fasp_proxy_external = xip->xi_flags &
1088                             FMD_XPRT_EXTERNAL;
1089                         fasp.fasp_proxy_rdonly = ((xip->xi_flags &
1090                             FMD_XPRT_RDWR) == FMD_XPRT_RDONLY);
1091                         fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
1092                             fmd_asru_set_on_proxy, &fasp);
1093                 }
1094                 nvl = fmd_case_mkevent(cp, FM_LIST_SUSPECT_CLASS);
1095                 (void) nvlist_lookup_string(nvl, FM_CLASS, &class);
1096 
1097                 e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
1098                 (void) pthread_rwlock_rdlock(&fmd.d_log_lock);
1099                 fmd_log_append(fmd.d_fltlog, e, cp);
1100                 (void) pthread_rwlock_unlock(&fmd.d_log_lock);
1101                 fmd_dispq_dispatch(fmd.d_disp, e, class);
1102 
1103                 (void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
1104                 cip->ci_mod->mod_stats->ms_casesolved.fmds_value.ui64++;
1105                 (void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
1106 
1107                 break;
1108 
1109         case FMD_CASE_CLOSE_WAIT:
1110                 fmd_case_hold(cp);
1111                 e = fmd_event_create(FMD_EVT_CLOSE, FMD_HRT_NOW, NULL, cp);
1112                 fmd_eventq_insert_at_head(cip->ci_mod->mod_queue, e);
1113 
1114                 (void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
1115                 cip->ci_mod->mod_stats->ms_caseclosed.fmds_value.ui64++;
1116                 (void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
1117 
1118                 break;
1119 
1120         case FMD_CASE_CLOSED:
1121                 nvl = fmd_case_mkevent(cp, FM_LIST_ISOLATED_CLASS);
1122                 (void) nvlist_lookup_string(nvl, FM_CLASS, &class);
1123                 e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
1124                 fmd_dispq_dispatch(fmd.d_disp, e, class);
1125                 break;
1126 
1127         case FMD_CASE_REPAIRED:
1128                 nvl = fmd_case_mkevent(cp, FM_LIST_REPAIRED_CLASS);
1129                 (void) nvlist_lookup_string(nvl, FM_CLASS, &class);
1130                 e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
1131                 (void) pthread_rwlock_rdlock(&fmd.d_log_lock);
1132                 fmd_log_append(fmd.d_fltlog, e, cp);
1133                 (void) pthread_rwlock_unlock(&fmd.d_log_lock);
1134                 fmd_dispq_dispatch(fmd.d_disp, e, class);
1135                 break;
1136 
1137         case FMD_CASE_RESOLVED:
1138                 nvl = fmd_case_mkevent(cp, FM_LIST_RESOLVED_CLASS);
1139                 (void) nvlist_lookup_string(nvl, FM_CLASS, &class);
1140                 e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
1141                 (void) pthread_rwlock_rdlock(&fmd.d_log_lock);
1142                 fmd_log_append(fmd.d_fltlog, e, cp);
1143                 (void) pthread_rwlock_unlock(&fmd.d_log_lock);
1144                 fmd_dispq_dispatch(fmd.d_disp, e, class);
1145                 break;
1146         }
1147 }
1148 
1149 fmd_case_t *
1150 fmd_case_hash_lookup(fmd_case_hash_t *chp, const char *uuid)
1151 {
1152         fmd_case_impl_t *cip;
1153         uint_t h;
1154 
1155         (void) pthread_rwlock_rdlock(&chp->ch_lock);
1156         h = fmd_strhash(uuid) % chp->ch_hashlen;
1157 
1158         for (cip = chp->ch_hash[h]; cip != NULL; cip = cip->ci_next) {
1159                 if (strcmp(cip->ci_uuid, uuid) == 0)
1160                         break;
1161         }
1162 
1163         /*
1164          * If deleting bit is set, treat the case as if it doesn't exist.
1165          */
1166         if (cip != NULL)
1167                 cip = fmd_case_tryhold(cip);
1168 
1169         if (cip == NULL)
1170                 (void) fmd_set_errno(EFMD_CASE_INVAL);
1171 
1172         (void) pthread_rwlock_unlock(&chp->ch_lock);
1173         return ((fmd_case_t *)cip);
1174 }
1175 
1176 static fmd_case_impl_t *
1177 fmd_case_hash_insert(fmd_case_hash_t *chp, fmd_case_impl_t *cip)
1178 {
1179         fmd_case_impl_t *eip;
1180         uint_t h;
1181 
1182         (void) pthread_rwlock_wrlock(&chp->ch_lock);
1183         h = fmd_strhash(cip->ci_uuid) % chp->ch_hashlen;
1184 
1185         for (eip = chp->ch_hash[h]; eip != NULL; eip = eip->ci_next) {
1186                 if (strcmp(cip->ci_uuid, eip->ci_uuid) == 0 &&
1187                     fmd_case_tryhold(eip) != NULL) {
1188                         (void) pthread_rwlock_unlock(&chp->ch_lock);
1189                         return (eip); /* uuid already present */
1190                 }
1191         }
1192 
1193         cip->ci_next = chp->ch_hash[h];
1194         chp->ch_hash[h] = cip;
1195 
1196         chp->ch_count++;
1197         ASSERT(chp->ch_count != 0);
1198 
1199         (void) pthread_rwlock_unlock(&chp->ch_lock);
1200         return (cip);
1201 }
1202 
1203 static void
1204 fmd_case_hash_delete(fmd_case_hash_t *chp, fmd_case_impl_t *cip)
1205 {
1206         fmd_case_impl_t *cp, **pp;
1207         uint_t h;
1208 
1209         ASSERT(MUTEX_HELD(&cip->ci_lock));
1210 
1211         cip->ci_flags |= FMD_CF_DELETING;
1212         (void) pthread_mutex_unlock(&cip->ci_lock);
1213 
1214         (void) pthread_rwlock_wrlock(&chp->ch_lock);
1215 
1216         h = fmd_strhash(cip->ci_uuid) % chp->ch_hashlen;
1217         pp = &chp->ch_hash[h];
1218 
1219         for (cp = *pp; cp != NULL; cp = cp->ci_next) {
1220                 if (cp != cip)
1221                         pp = &cp->ci_next;
1222                 else
1223                         break;
1224         }
1225 
1226         if (cp == NULL) {
1227                 fmd_panic("case %p (%s) not found on hash chain %u\n",
1228                     (void *)cip, cip->ci_uuid, h);
1229         }
1230 
1231         *pp = cp->ci_next;
1232         cp->ci_next = NULL;
1233 
1234         /*
1235          * delete from code hash if it is on it
1236          */
1237         fmd_case_code_hash_delete(chp, cip);
1238 
1239         ASSERT(chp->ch_count != 0);
1240         chp->ch_count--;
1241 
1242         (void) pthread_rwlock_unlock(&chp->ch_lock);
1243 
1244         (void) pthread_mutex_lock(&cip->ci_lock);
1245         ASSERT(cip->ci_flags & FMD_CF_DELETING);
1246 }
1247 
1248 fmd_case_t *
1249 fmd_case_create(fmd_module_t *mp, const char *uuidstr, void *data)
1250 {
1251         fmd_case_impl_t *cip = fmd_zalloc(sizeof (fmd_case_impl_t), FMD_SLEEP);
1252         fmd_case_impl_t *eip = NULL;
1253         uuid_t uuid;
1254 
1255         (void) pthread_mutex_init(&cip->ci_lock, NULL);
1256         fmd_buf_hash_create(&cip->ci_bufs);
1257 
1258         fmd_module_hold(mp);
1259         cip->ci_mod = mp;
1260         cip->ci_refs = 1;
1261         cip->ci_state = FMD_CASE_UNSOLVED;
1262         cip->ci_flags = FMD_CF_DIRTY;
1263         cip->ci_data = data;
1264 
1265         /*
1266          * Calling libuuid: get a clue.  The library interfaces cleverly do not
1267          * define any constant for the length of an unparse string, and do not
1268          * permit the caller to specify a buffer length for safety.  The spec
1269          * says it will be 36 bytes, but we make it tunable just in case.
1270          */
1271         (void) fmd_conf_getprop(fmd.d_conf, "uuidlen", &cip->ci_uuidlen);
1272         cip->ci_uuid = fmd_zalloc(cip->ci_uuidlen + 1, FMD_SLEEP);
1273 
1274         if (uuidstr == NULL) {
1275                 /*
1276                  * We expect this loop to execute only once, but code it
1277                  * defensively against the possibility of libuuid bugs.
1278                  * Keep generating uuids and attempting to do a hash insert
1279                  * until we get a unique one.
1280                  */
1281                 do {
1282                         if (eip != NULL)
1283                                 fmd_case_rele((fmd_case_t *)eip);
1284                         uuid_generate(uuid);
1285                         uuid_unparse(uuid, cip->ci_uuid);
1286                 } while ((eip = fmd_case_hash_insert(fmd.d_cases, cip)) != cip);
1287         } else {
1288                 /*
1289                  * If a uuid was specified we must succeed with that uuid,
1290                  * or return NULL indicating a case with that uuid already
1291                  * exists.
1292                  */
1293                 (void) strncpy(cip->ci_uuid, uuidstr, cip->ci_uuidlen + 1);
1294                 if (fmd_case_hash_insert(fmd.d_cases, cip) != cip) {
1295                         fmd_free(cip->ci_uuid, cip->ci_uuidlen + 1);
1296                         (void) fmd_buf_hash_destroy(&cip->ci_bufs);
1297                         fmd_module_rele(mp);
1298                         pthread_mutex_destroy(&cip->ci_lock);
1299                         fmd_free(cip, sizeof (*cip));
1300                         return (NULL);
1301                 }
1302         }
1303 
1304         ASSERT(fmd_module_locked(mp));
1305         fmd_list_append(&mp->mod_cases, cip);
1306         fmd_module_setcdirty(mp);
1307 
1308         (void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
1309         cip->ci_mod->mod_stats->ms_caseopen.fmds_value.ui64++;
1310         (void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
1311 
1312         return ((fmd_case_t *)cip);
1313 }
1314 
1315 static void
1316 fmd_case_destroy_suspects(fmd_case_impl_t *cip)
1317 {
1318         fmd_case_susp_t *cis, *ncis;
1319 
1320         ASSERT(MUTEX_HELD(&cip->ci_lock));
1321 
1322         if (cip->ci_proxy_asru)
1323                 fmd_free(cip->ci_proxy_asru, sizeof (uint8_t) *
1324                     cip->ci_nsuspects);
1325         if (cip->ci_diag_de)
1326                 nvlist_free(cip->ci_diag_de);
1327         if (cip->ci_diag_asru)
1328                 fmd_free(cip->ci_diag_asru, sizeof (uint8_t) *
1329                     cip->ci_nsuspects);
1330 
1331         for (cis = cip->ci_suspects; cis != NULL; cis = ncis) {
1332                 ncis = cis->cis_next;
1333                 nvlist_free(cis->cis_nvl);
1334                 fmd_free(cis, sizeof (fmd_case_susp_t));
1335         }
1336 
1337         cip->ci_suspects = NULL;
1338         cip->ci_nsuspects = 0;
1339 }
1340 
1341 fmd_case_t *
1342 fmd_case_recreate(fmd_module_t *mp, fmd_xprt_t *xp,
1343     uint_t state, const char *uuid, const char *code)
1344 {
1345         fmd_case_impl_t *cip = fmd_zalloc(sizeof (fmd_case_impl_t), FMD_SLEEP);
1346         fmd_case_impl_t *eip;
1347 
1348         (void) pthread_mutex_init(&cip->ci_lock, NULL);
1349         fmd_buf_hash_create(&cip->ci_bufs);
1350 
1351         fmd_module_hold(mp);
1352         cip->ci_mod = mp;
1353         cip->ci_xprt = xp;
1354         cip->ci_refs = 1;
1355         cip->ci_state = state;
1356         cip->ci_uuid = fmd_strdup(uuid, FMD_SLEEP);
1357         cip->ci_uuidlen = strlen(cip->ci_uuid);
1358         cip->ci_code = fmd_strdup(code, FMD_SLEEP);
1359         cip->ci_codelen = cip->ci_code ? strlen(cip->ci_code) + 1 : 0;
1360 
1361         if (state > FMD_CASE_CLOSE_WAIT)
1362                 cip->ci_flags |= FMD_CF_SOLVED;
1363 
1364         /*
1365          * Insert the case into the global case hash.  If the specified UUID is
1366          * already present, check to see if it is an orphan: if so, reclaim it;
1367          * otherwise if it is owned by a different module then return NULL.
1368          */
1369         if ((eip = fmd_case_hash_insert(fmd.d_cases, cip)) != cip) {
1370                 (void) pthread_mutex_lock(&cip->ci_lock);
1371                 cip->ci_refs--; /* decrement to zero */
1372                 fmd_case_destroy((fmd_case_t *)cip, B_FALSE);
1373 
1374                 cip = eip; /* switch 'cip' to the existing case */
1375                 (void) pthread_mutex_lock(&cip->ci_lock);
1376 
1377                 /*
1378                  * If the ASRU cache is trying to recreate an orphan, then just
1379                  * return the existing case that we found without changing it.
1380                  */
1381                 if (mp == fmd.d_rmod) {
1382                         /*
1383                          * In case the case has already been created from
1384                          * a checkpoint file we need to set up code now.
1385                          */
1386                         if (cip->ci_state < FMD_CASE_CLOSED) {
1387                                 if (code != NULL && cip->ci_code == NULL) {
1388                                         cip->ci_code = fmd_strdup(code,
1389                                             FMD_SLEEP);
1390                                         cip->ci_codelen = cip->ci_code ?
1391                                             strlen(cip->ci_code) + 1 : 0;
1392                                         fmd_case_code_hash_insert(fmd.d_cases,
1393                                             cip);
1394                                 }
1395                         }
1396 
1397                         /*
1398                          * When recreating an orphan case, state passed in may
1399                          * be CLOSED (faulty) or REPAIRED/RESOLVED (!faulty). If
1400                          * any suspects are still CLOSED (faulty) then the
1401                          * overall state needs to be CLOSED.
1402                          */
1403                         if ((cip->ci_state == FMD_CASE_REPAIRED ||
1404                             cip->ci_state == FMD_CASE_RESOLVED) &&
1405                             state == FMD_CASE_CLOSED)
1406                                 cip->ci_state = FMD_CASE_CLOSED;
1407                         (void) pthread_mutex_unlock(&cip->ci_lock);
1408                         fmd_case_rele((fmd_case_t *)cip);
1409                         return ((fmd_case_t *)cip);
1410                 }
1411 
1412                 /*
1413                  * If the existing case isn't an orphan or is being proxied,
1414                  * then we have a UUID conflict: return failure to the caller.
1415                  */
1416                 if (cip->ci_mod != fmd.d_rmod || xp != NULL) {
1417                         (void) pthread_mutex_unlock(&cip->ci_lock);
1418                         fmd_case_rele((fmd_case_t *)cip);
1419                         return (NULL);
1420                 }
1421 
1422                 /*
1423                  * If the new module is reclaiming an orphaned case, remove
1424                  * the case from the root module, switch ci_mod, and then fall
1425                  * through to adding the case to the new owner module 'mp'.
1426                  */
1427                 fmd_module_lock(cip->ci_mod);
1428                 fmd_list_delete(&cip->ci_mod->mod_cases, cip);
1429                 fmd_module_unlock(cip->ci_mod);
1430 
1431                 fmd_module_rele(cip->ci_mod);
1432                 cip->ci_mod = mp;
1433                 fmd_module_hold(mp);
1434 
1435                 /*
1436                  * It's possible that fmd crashed or was restarted during a
1437                  * previous solve operation between the asru cache being created
1438                  * and the ckpt file being updated to SOLVED. Thus when the DE
1439                  * recreates the case here from the checkpoint file, the state
1440                  * will be UNSOLVED and yet we are having to reclaim because
1441                  * the case was in the asru cache. If this happens, revert the
1442                  * case back to the UNSOLVED state and let the DE solve it again
1443                  */
1444                 if (state == FMD_CASE_UNSOLVED) {
1445                         fmd_asru_hash_delete_case(fmd.d_asrus,
1446                             (fmd_case_t *)cip);
1447                         fmd_case_destroy_suspects(cip);
1448                         fmd_case_code_hash_delete(fmd.d_cases, cip);
1449                         fmd_free(cip->ci_code, cip->ci_codelen);
1450                         cip->ci_code = NULL;
1451                         cip->ci_codelen = 0;
1452                         cip->ci_tv_valid = 0;
1453                 }
1454 
1455                 cip->ci_state = state;
1456 
1457                 (void) pthread_mutex_unlock(&cip->ci_lock);
1458                 fmd_case_rele((fmd_case_t *)cip);
1459         } else {
1460                 /*
1461                  * add into hash of solved cases
1462                  */
1463                 if (cip->ci_code)
1464                         fmd_case_code_hash_insert(fmd.d_cases, cip);
1465         }
1466 
1467         ASSERT(fmd_module_locked(mp));
1468         fmd_list_append(&mp->mod_cases, cip);
1469 
1470         (void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
1471         cip->ci_mod->mod_stats->ms_caseopen.fmds_value.ui64++;
1472         (void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
1473 
1474         return ((fmd_case_t *)cip);
1475 }
1476 
1477 void
1478 fmd_case_destroy(fmd_case_t *cp, int visible)
1479 {
1480         fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1481         fmd_case_item_t *cit, *ncit;
1482 
1483         ASSERT(MUTEX_HELD(&cip->ci_lock));
1484         ASSERT(cip->ci_refs == 0);
1485 
1486         if (visible) {
1487                 TRACE((FMD_DBG_CASE, "deleting case %s", cip->ci_uuid));
1488                 fmd_case_hash_delete(fmd.d_cases, cip);
1489         }
1490 
1491         for (cit = cip->ci_items; cit != NULL; cit = ncit) {
1492                 ncit = cit->cit_next;
1493                 fmd_event_rele(cit->cit_event);
1494                 fmd_free(cit, sizeof (fmd_case_item_t));
1495         }
1496 
1497         fmd_case_destroy_suspects(cip);
1498 
1499         if (cip->ci_principal != NULL)
1500                 fmd_event_rele(cip->ci_principal);
1501 
1502         fmd_free(cip->ci_uuid, cip->ci_uuidlen + 1);
1503         fmd_free(cip->ci_code, cip->ci_codelen);
1504         (void) fmd_buf_hash_destroy(&cip->ci_bufs);
1505 
1506         fmd_module_rele(cip->ci_mod);
1507         fmd_free(cip, sizeof (fmd_case_impl_t));
1508 }
1509 
1510 void
1511 fmd_case_hold(fmd_case_t *cp)
1512 {
1513         fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1514 
1515         (void) pthread_mutex_lock(&cip->ci_lock);
1516         fmd_case_hold_locked(cp);
1517         (void) pthread_mutex_unlock(&cip->ci_lock);
1518 }
1519 
1520 void
1521 fmd_case_hold_locked(fmd_case_t *cp)
1522 {
1523         fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1524 
1525         ASSERT(MUTEX_HELD(&cip->ci_lock));
1526         if (cip->ci_flags & FMD_CF_DELETING)
1527                 fmd_panic("attempt to hold a deleting case %p (%s)\n",
1528                     (void *)cip, cip->ci_uuid);
1529         cip->ci_refs++;
1530         ASSERT(cip->ci_refs != 0);
1531 }
1532 
1533 static fmd_case_impl_t *
1534 fmd_case_tryhold(fmd_case_impl_t *cip)
1535 {
1536         /*
1537          * If the case's "deleting" bit is unset, hold and return case,
1538          * otherwise, return NULL.
1539          */
1540         (void) pthread_mutex_lock(&cip->ci_lock);
1541         if (cip->ci_flags & FMD_CF_DELETING) {
1542                 (void) pthread_mutex_unlock(&cip->ci_lock);
1543                 cip = NULL;
1544         } else {
1545                 fmd_case_hold_locked((fmd_case_t *)cip);
1546                 (void) pthread_mutex_unlock(&cip->ci_lock);
1547         }
1548         return (cip);
1549 }
1550 
1551 void
1552 fmd_case_rele(fmd_case_t *cp)
1553 {
1554         fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1555 
1556         (void) pthread_mutex_lock(&cip->ci_lock);
1557         ASSERT(cip->ci_refs != 0);
1558 
1559         if (--cip->ci_refs == 0)
1560                 fmd_case_destroy((fmd_case_t *)cip, B_TRUE);
1561         else
1562                 (void) pthread_mutex_unlock(&cip->ci_lock);
1563 }
1564 
1565 void
1566 fmd_case_rele_locked(fmd_case_t *cp)
1567 {
1568         fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1569 
1570         ASSERT(MUTEX_HELD(&cip->ci_lock));
1571         --cip->ci_refs;
1572         ASSERT(cip->ci_refs != 0);
1573 }
1574 
1575 int
1576 fmd_case_insert_principal(fmd_case_t *cp, fmd_event_t *ep)
1577 {
1578         fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1579         fmd_case_item_t *cit;
1580         fmd_event_t *oep;
1581         uint_t state;
1582         int new;
1583 
1584         fmd_event_hold(ep);
1585         (void) pthread_mutex_lock(&cip->ci_lock);
1586 
1587         if (cip->ci_flags & FMD_CF_SOLVED)
1588                 state = FMD_EVS_DIAGNOSED;
1589         else
1590                 state = FMD_EVS_ACCEPTED;
1591 
1592         oep = cip->ci_principal;
1593         cip->ci_principal = ep;
1594 
1595         for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next) {
1596                 if (cit->cit_event == ep)
1597                         break;
1598         }
1599 
1600         cip->ci_flags |= FMD_CF_DIRTY;
1601         new = cit == NULL && ep != oep;
1602 
1603         (void) pthread_mutex_unlock(&cip->ci_lock);
1604 
1605         fmd_module_setcdirty(cip->ci_mod);
1606         fmd_event_transition(ep, state);
1607 
1608         if (oep != NULL)
1609                 fmd_event_rele(oep);
1610 
1611         return (new);
1612 }
1613 
1614 int
1615 fmd_case_insert_event(fmd_case_t *cp, fmd_event_t *ep)
1616 {
1617         fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1618         fmd_case_item_t *cit;
1619         uint_t state;
1620         int new;
1621         boolean_t injected;
1622 
1623         (void) pthread_mutex_lock(&cip->ci_lock);
1624 
1625         if (cip->ci_flags & FMD_CF_SOLVED)
1626                 state = FMD_EVS_DIAGNOSED;
1627         else
1628                 state = FMD_EVS_ACCEPTED;
1629 
1630         for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next) {
1631                 if (cit->cit_event == ep)
1632                         break;
1633         }
1634 
1635         new = cit == NULL && ep != cip->ci_principal;
1636 
1637         /*
1638          * If the event is already in the case or the case is already solved,
1639          * there is no reason to save it: just transition it appropriately.
1640          */
1641         if (cit != NULL || (cip->ci_flags & FMD_CF_SOLVED)) {
1642                 (void) pthread_mutex_unlock(&cip->ci_lock);
1643                 fmd_event_transition(ep, state);
1644                 return (new);
1645         }
1646 
1647         cit = fmd_alloc(sizeof (fmd_case_item_t), FMD_SLEEP);
1648         fmd_event_hold(ep);
1649 
1650         if (nvlist_lookup_boolean_value(((fmd_event_impl_t *)ep)->ev_nvl,
1651             "__injected", &injected) == 0 && injected)
1652                 fmd_case_set_injected(cp);
1653 
1654         cit->cit_next = cip->ci_items;
1655         cit->cit_event = ep;
1656 
1657         cip->ci_items = cit;
1658         cip->ci_nitems++;
1659 
1660         cip->ci_flags |= FMD_CF_DIRTY;
1661         (void) pthread_mutex_unlock(&cip->ci_lock);
1662 
1663         fmd_module_setcdirty(cip->ci_mod);
1664         fmd_event_transition(ep, state);
1665 
1666         return (new);
1667 }
1668 
1669 void
1670 fmd_case_insert_suspect(fmd_case_t *cp, nvlist_t *nvl)
1671 {
1672         fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1673         fmd_case_susp_t *cis = fmd_alloc(sizeof (fmd_case_susp_t), FMD_SLEEP);
1674 
1675         (void) pthread_mutex_lock(&cip->ci_lock);
1676         ASSERT(cip->ci_state < FMD_CASE_CLOSE_WAIT);
1677         cip->ci_flags |= FMD_CF_DIRTY;
1678 
1679         cis->cis_next = cip->ci_suspects;
1680         cis->cis_nvl = nvl;
1681 
1682         cip->ci_suspects = cis;
1683         cip->ci_nsuspects++;
1684 
1685         (void) pthread_mutex_unlock(&cip->ci_lock);
1686         if (cip->ci_xprt == NULL)
1687                 fmd_module_setcdirty(cip->ci_mod);
1688 }
1689 
1690 void
1691 fmd_case_recreate_suspect(fmd_case_t *cp, nvlist_t *nvl)
1692 {
1693         fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1694         fmd_case_susp_t *cis = fmd_alloc(sizeof (fmd_case_susp_t), FMD_SLEEP);
1695         boolean_t b;
1696 
1697         (void) pthread_mutex_lock(&cip->ci_lock);
1698 
1699         cis->cis_next = cip->ci_suspects;
1700         cis->cis_nvl = nvl;
1701 
1702         if (nvlist_lookup_boolean_value(nvl,
1703             FM_SUSPECT_MESSAGE, &b) == 0 && b == B_FALSE)
1704                 cip->ci_flags |= FMD_CF_INVISIBLE;
1705 
1706         cip->ci_suspects = cis;
1707         cip->ci_nsuspects++;
1708 
1709         (void) pthread_mutex_unlock(&cip->ci_lock);
1710 }
1711 
1712 void
1713 fmd_case_reset_suspects(fmd_case_t *cp)
1714 {
1715         fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1716 
1717         (void) pthread_mutex_lock(&cip->ci_lock);
1718         ASSERT(cip->ci_state < FMD_CASE_SOLVED);
1719 
1720         fmd_case_destroy_suspects(cip);
1721         cip->ci_flags |= FMD_CF_DIRTY;
1722 
1723         (void) pthread_mutex_unlock(&cip->ci_lock);
1724         fmd_module_setcdirty(cip->ci_mod);
1725 }
1726 
1727 /*ARGSUSED*/
1728 static void
1729 fmd_case_unusable(fmd_asru_link_t *alp, void *arg)
1730 {
1731         (void) fmd_asru_setflags(alp, FMD_ASRU_UNUSABLE);
1732 }
1733 
1734 /*
1735  * Grab ci_lock and update the case state and set the dirty bit.  Then perform
1736  * whatever actions and emit whatever events are appropriate for the state.
1737  * Refer to the topmost block comment explaining the state machine for details.
1738  */
1739 void
1740 fmd_case_transition(fmd_case_t *cp, uint_t state, uint_t flags)
1741 {
1742         fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1743         fmd_case_item_t *cit;
1744         fmd_event_t *e;
1745         int resolved = 0;
1746         int any_unusable_and_present = 0;
1747 
1748         ASSERT(state <= FMD_CASE_RESOLVED);
1749         (void) pthread_mutex_lock(&cip->ci_lock);
1750 
1751         if (!(cip->ci_flags & FMD_CF_SOLVED) && !(flags & FMD_CF_SOLVED))
1752                 flags &= ~(FMD_CF_ISOLATED | FMD_CF_REPAIRED | FMD_CF_RESOLVED);
1753 
1754         cip->ci_flags |= flags;
1755 
1756         if (cip->ci_state >= state) {
1757                 (void) pthread_mutex_unlock(&cip->ci_lock);
1758                 return; /* already in specified state */
1759         }
1760 
1761         TRACE((FMD_DBG_CASE, "case %s %s->%s", cip->ci_uuid,
1762             _fmd_case_snames[cip->ci_state], _fmd_case_snames[state]));
1763 
1764         cip->ci_state = state;
1765         cip->ci_flags |= FMD_CF_DIRTY;
1766 
1767         if (cip->ci_xprt == NULL && cip->ci_mod != fmd.d_rmod)
1768                 fmd_module_setcdirty(cip->ci_mod);
1769 
1770         switch (state) {
1771         case FMD_CASE_SOLVED:
1772                 for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next)
1773                         fmd_event_transition(cit->cit_event, FMD_EVS_DIAGNOSED);
1774 
1775                 if (cip->ci_principal != NULL) {
1776                         fmd_event_transition(cip->ci_principal,
1777                             FMD_EVS_DIAGNOSED);
1778                 }
1779                 break;
1780 
1781         case FMD_CASE_CLOSE_WAIT:
1782                 /*
1783                  * If the case was never solved, do not change ASRUs.
1784                  * If the case was never fmd_case_closed, do not change ASRUs.
1785                  * If the case was repaired, do not change ASRUs.
1786                  */
1787                 if ((cip->ci_flags & (FMD_CF_SOLVED | FMD_CF_ISOLATED |
1788                     FMD_CF_REPAIRED)) == (FMD_CF_SOLVED | FMD_CF_ISOLATED))
1789                         fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
1790                             fmd_case_unusable, NULL);
1791 
1792                 /*
1793                  * If an orphaned case transitions to CLOSE_WAIT, the owning
1794                  * module is no longer loaded: continue on to CASE_CLOSED or
1795                  * CASE_REPAIRED as appropriate.
1796                  */
1797                 if (fmd_case_orphaned(cp)) {
1798                         if (cip->ci_flags & FMD_CF_REPAIRED) {
1799                                 state = cip->ci_state = FMD_CASE_REPAIRED;
1800                                 TRACE((FMD_DBG_CASE, "case %s %s->%s",
1801                                     cip->ci_uuid,
1802                                     _fmd_case_snames[FMD_CASE_CLOSE_WAIT],
1803                                     _fmd_case_snames[FMD_CASE_REPAIRED]));
1804                                 goto do_repair;
1805                         } else {
1806                                 state = cip->ci_state = FMD_CASE_CLOSED;
1807                                 TRACE((FMD_DBG_CASE, "case %s %s->%s",
1808                                     cip->ci_uuid,
1809                                     _fmd_case_snames[FMD_CASE_CLOSE_WAIT],
1810                                     _fmd_case_snames[FMD_CASE_CLOSED]));
1811                         }
1812                 }
1813                 break;
1814 
1815         case FMD_CASE_REPAIRED:
1816 do_repair:
1817                 ASSERT(cip->ci_xprt != NULL || fmd_case_orphaned(cp));
1818 
1819                 /*
1820                  * If we've been requested to transition straight on to the
1821                  * RESOLVED state (which can happen with fault proxying where a
1822                  * list.resolved or a uuresolved is received from the other
1823                  * side), or if all suspects are already either usable or not
1824                  * present then transition straight to RESOLVED state,
1825                  * publishing both the list.repaired and list.resolved. For a
1826                  * proxy, if we discover here that all suspects are already
1827                  * either usable or not present, notify the diag side instead
1828                  * using fmd_xprt_uuresolved().
1829                  */
1830                 if (flags & FMD_CF_RESOLVED) {
1831                         if (cip->ci_xprt != NULL)
1832                                 fmd_list_delete(&cip->ci_mod->mod_cases, cip);
1833                 } else {
1834                         fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
1835                             fmd_case_unusable_and_present,
1836                             &any_unusable_and_present);
1837                         if (any_unusable_and_present)
1838                                 break;
1839                         if (cip->ci_xprt != NULL) {
1840                                 fmd_xprt_uuresolved(cip->ci_xprt, cip->ci_uuid);
1841                                 break;
1842                         }
1843                 }
1844 
1845                 cip->ci_state = FMD_CASE_RESOLVED;
1846                 (void) pthread_mutex_unlock(&cip->ci_lock);
1847                 fmd_case_publish(cp, state);
1848                 TRACE((FMD_DBG_CASE, "case %s %s->%s", cip->ci_uuid,
1849                     _fmd_case_snames[FMD_CASE_REPAIRED],
1850                     _fmd_case_snames[FMD_CASE_RESOLVED]));
1851                 state = FMD_CASE_RESOLVED;
1852                 resolved = 1;
1853                 (void) pthread_mutex_lock(&cip->ci_lock);
1854                 break;
1855 
1856         case FMD_CASE_RESOLVED:
1857                 /*
1858                  * For a proxy, no need to check that all suspects are already
1859                  * either usable or not present - this request has come from
1860                  * the diagnosing side which makes the final decision on this.
1861                  */
1862                 if (cip->ci_xprt != NULL) {
1863                         fmd_list_delete(&cip->ci_mod->mod_cases, cip);
1864                         resolved = 1;
1865                         break;
1866                 }
1867 
1868                 ASSERT(fmd_case_orphaned(cp));
1869 
1870                 /*
1871                  * If all suspects are already either usable or not present then
1872                  * carry on, publish list.resolved and discard the case.
1873                  */
1874                 fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
1875                     fmd_case_unusable_and_present, &any_unusable_and_present);
1876                 if (any_unusable_and_present) {
1877                         (void) pthread_mutex_unlock(&cip->ci_lock);
1878                         return;
1879                 }
1880 
1881                 resolved = 1;
1882                 break;
1883         }
1884 
1885         (void) pthread_mutex_unlock(&cip->ci_lock);
1886 
1887         /*
1888          * If the module has initialized, then publish the appropriate event
1889          * for the new case state.  If not, we are being called from the
1890          * checkpoint code during module load, in which case the module's
1891          * _fmd_init() routine hasn't finished yet, and our event dictionaries
1892          * may not be open yet, which will prevent us from computing the event
1893          * code.  Defer the call to fmd_case_publish() by enqueuing a PUBLISH
1894          * event in our queue: this won't be processed until _fmd_init is done.
1895          */
1896         if (cip->ci_mod->mod_flags & FMD_MOD_INIT)
1897                 fmd_case_publish(cp, state);
1898         else {
1899                 fmd_case_hold(cp);
1900                 e = fmd_event_create(FMD_EVT_PUBLISH, FMD_HRT_NOW, NULL, cp);
1901                 fmd_eventq_insert_at_head(cip->ci_mod->mod_queue, e);
1902         }
1903 
1904         if (resolved) {
1905                 if (cip->ci_xprt != NULL) {
1906                         /*
1907                          * If we transitioned to RESOLVED, adjust the reference
1908                          * count to reflect our removal from
1909                          * fmd.d_rmod->mod_cases above.  If the caller has not
1910                          * placed an additional hold on the case, it will now
1911                          * be freed.
1912                          */
1913                         (void) pthread_mutex_lock(&cip->ci_lock);
1914                         fmd_asru_hash_delete_case(fmd.d_asrus, cp);
1915                         (void) pthread_mutex_unlock(&cip->ci_lock);
1916                         fmd_case_rele(cp);
1917                 } else {
1918                         fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
1919                             fmd_asru_log_resolved, NULL);
1920                         (void) pthread_mutex_lock(&cip->ci_lock);
1921                         /* mark as "ready to be discarded */
1922                         cip->ci_flags |= FMD_CF_RES_CMPL;
1923                         (void) pthread_mutex_unlock(&cip->ci_lock);
1924                 }
1925         }
1926 }
1927 
1928 /*
1929  * Discard any case if it is in RESOLVED state (and if check_if_aged argument
1930  * is set if all suspects have passed the rsrc.aged time).
1931  */
1932 void
1933 fmd_case_discard_resolved(fmd_case_t *cp, void *arg)
1934 {
1935         int check_if_aged = *(int *)arg;
1936         fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1937 
1938         /*
1939          * First check if case has completed transition to resolved.
1940          */
1941         (void) pthread_mutex_lock(&cip->ci_lock);
1942         if (!(cip->ci_flags & FMD_CF_RES_CMPL)) {
1943                 (void) pthread_mutex_unlock(&cip->ci_lock);
1944                 return;
1945         }
1946 
1947         /*
1948          * Now if check_is_aged is set, see if all suspects have aged.
1949          */
1950         if (check_if_aged) {
1951                 int aged = 1;
1952 
1953                 fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
1954                     fmd_asru_check_if_aged, &aged);
1955                 if (!aged) {
1956                         (void) pthread_mutex_unlock(&cip->ci_lock);
1957                         return;
1958                 }
1959         }
1960 
1961         /*
1962          * Finally discard the case, clearing FMD_CF_RES_CMPL so we don't
1963          * do it twice.
1964          */
1965         fmd_module_lock(cip->ci_mod);
1966         fmd_list_delete(&cip->ci_mod->mod_cases, cip);
1967         fmd_module_unlock(cip->ci_mod);
1968         fmd_asru_hash_delete_case(fmd.d_asrus, cp);
1969         cip->ci_flags &= ~FMD_CF_RES_CMPL;
1970         (void) pthread_mutex_unlock(&cip->ci_lock);
1971         fmd_case_rele(cp);
1972 }
1973 
1974 /*
1975  * Transition the specified case to *at least* the specified state by first
1976  * re-validating the suspect list using the resource cache.  This function is
1977  * employed by the checkpoint code when restoring a saved, solved case to see
1978  * if the state of the case has effectively changed while fmd was not running
1979  * or the module was not loaded.
1980  */
1981 void
1982 fmd_case_transition_update(fmd_case_t *cp, uint_t state, uint_t flags)
1983 {
1984         fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1985 
1986         int usable = 0;         /* are any suspects usable? */
1987 
1988         ASSERT(state >= FMD_CASE_SOLVED);
1989         (void) pthread_mutex_lock(&cip->ci_lock);
1990 
1991         fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_usable, &usable);
1992 
1993         (void) pthread_mutex_unlock(&cip->ci_lock);
1994 
1995         if (!usable) {
1996                 state = MAX(state, FMD_CASE_CLOSE_WAIT);
1997                 flags |= FMD_CF_ISOLATED;
1998         }
1999 
2000         fmd_case_transition(cp, state, flags);
2001 }
2002 
2003 void
2004 fmd_case_setdirty(fmd_case_t *cp)
2005 {
2006         fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2007 
2008         (void) pthread_mutex_lock(&cip->ci_lock);
2009         cip->ci_flags |= FMD_CF_DIRTY;
2010         (void) pthread_mutex_unlock(&cip->ci_lock);
2011 
2012         fmd_module_setcdirty(cip->ci_mod);
2013 }
2014 
2015 void
2016 fmd_case_clrdirty(fmd_case_t *cp)
2017 {
2018         fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2019 
2020         (void) pthread_mutex_lock(&cip->ci_lock);
2021         cip->ci_flags &= ~FMD_CF_DIRTY;
2022         (void) pthread_mutex_unlock(&cip->ci_lock);
2023 }
2024 
2025 void
2026 fmd_case_commit(fmd_case_t *cp)
2027 {
2028         fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2029         fmd_case_item_t *cit;
2030 
2031         (void) pthread_mutex_lock(&cip->ci_lock);
2032 
2033         if (cip->ci_flags & FMD_CF_DIRTY) {
2034                 for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next)
2035                         fmd_event_commit(cit->cit_event);
2036 
2037                 if (cip->ci_principal != NULL)
2038                         fmd_event_commit(cip->ci_principal);
2039 
2040                 fmd_buf_hash_commit(&cip->ci_bufs);
2041                 cip->ci_flags &= ~FMD_CF_DIRTY;
2042         }
2043 
2044         (void) pthread_mutex_unlock(&cip->ci_lock);
2045 }
2046 
2047 /*
2048  * On proxy side, send back repair/acquit/etc request to diagnosing side
2049  */
2050 void
2051 fmd_case_xprt_updated(fmd_case_t *cp)
2052 {
2053         fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2054         nvlist_t **nva;
2055         uint8_t *ba;
2056         int msg = B_TRUE;
2057         int count = 0;
2058         fmd_case_lst_t fcl;
2059 
2060         ASSERT(cip->ci_xprt != NULL);
2061         (void) pthread_mutex_lock(&cip->ci_lock);
2062         ba = alloca(sizeof (uint8_t) * cip->ci_nsuspects);
2063         nva = alloca(sizeof (nvlist_t *) * cip->ci_nsuspects);
2064         fcl.fcl_countp = &count;
2065         fcl.fcl_maxcount = cip->ci_nsuspects;
2066         fcl.fcl_msgp = &msg;
2067         fcl.fcl_ba = ba;
2068         fcl.fcl_nva = nva;
2069         fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_set_lst, &fcl);
2070         (void) pthread_mutex_unlock(&cip->ci_lock);
2071         fmd_xprt_updated(cip->ci_xprt, cip->ci_uuid, ba, cip->ci_proxy_asru,
2072             count);
2073 }
2074 
2075 /*
2076  * fmd_case_update_status() can be called on either the proxy side when a
2077  * list.suspect is received, or on the diagnosing side when an update request
2078  * is received from the proxy. It updates the status in the resource cache.
2079  */
2080 void
2081 fmd_case_update_status(fmd_case_t *cp, uint8_t *statusp, uint8_t *proxy_asrup,
2082     uint8_t *diag_asrup)
2083 {
2084         fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2085         int count = 0;
2086         fmd_asru_update_status_t faus;
2087 
2088         /*
2089          * update status of resource cache entries
2090          */
2091         faus.faus_countp = &count;
2092         faus.faus_maxcount = cip->ci_nsuspects;
2093         faus.faus_ba = statusp;
2094         faus.faus_proxy_asru = proxy_asrup;
2095         faus.faus_diag_asru = diag_asrup;
2096         faus.faus_is_proxy = (cip->ci_xprt != NULL);
2097         (void) pthread_mutex_lock(&cip->ci_lock);
2098         fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_asru_update_status,
2099             &faus);
2100         (void) pthread_mutex_unlock(&cip->ci_lock);
2101 }
2102 
2103 /*
2104  * Called on either the proxy side or the diag side when a repair has taken
2105  * place on the other side but this side may know the asru "contains"
2106  * relationships.
2107  */
2108 void
2109 fmd_case_update_containees(fmd_case_t *cp)
2110 {
2111         fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2112 
2113         (void) pthread_mutex_lock(&cip->ci_lock);
2114         fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
2115             fmd_asru_update_containees, NULL);
2116         (void) pthread_mutex_unlock(&cip->ci_lock);
2117 }
2118 
2119 /*
2120  * fmd_case_close_status() is called on diagnosing side when proxy side
2121  * has had a uuclose. It updates the status in the resource cache.
2122  */
2123 void
2124 fmd_case_close_status(fmd_case_t *cp)
2125 {
2126         fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2127         int count = 0;
2128         fmd_asru_close_status_t facs;
2129 
2130         /*
2131          * update status of resource cache entries
2132          */
2133         facs.facs_countp = &count;
2134         facs.facs_maxcount = cip->ci_nsuspects;
2135         (void) pthread_mutex_lock(&cip->ci_lock);
2136         fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_asru_close_status,
2137             &facs);
2138         (void) pthread_mutex_unlock(&cip->ci_lock);
2139 }
2140 
2141 /*
2142  * Indicate that the case may need to change state because one or more of the
2143  * ASRUs named as a suspect has changed state.  We examine all the suspects
2144  * and if none are still faulty, we initiate a case close transition.
2145  */
2146 void
2147 fmd_case_update(fmd_case_t *cp)
2148 {
2149         fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2150         uint_t cstate;
2151         int faulty = 0;
2152 
2153         (void) pthread_mutex_lock(&cip->ci_lock);
2154         cstate = cip->ci_state;
2155 
2156         if (cip->ci_state < FMD_CASE_SOLVED) {
2157                 (void) pthread_mutex_unlock(&cip->ci_lock);
2158                 return; /* update is not appropriate */
2159         }
2160 
2161         if (cip->ci_flags & FMD_CF_REPAIRED) {
2162                 (void) pthread_mutex_unlock(&cip->ci_lock);
2163                 return; /* already repaired */
2164         }
2165 
2166         TRACE((FMD_DBG_CASE, "case update %s", cip->ci_uuid));
2167         fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_faulty, &faulty);
2168         (void) pthread_mutex_unlock(&cip->ci_lock);
2169 
2170         if (faulty) {
2171                 nvlist_t *nvl;
2172                 fmd_event_t *e;
2173                 char *class;
2174 
2175                 TRACE((FMD_DBG_CASE, "sending list.updated %s", cip->ci_uuid));
2176                 nvl = fmd_case_mkevent(cp, FM_LIST_UPDATED_CLASS);
2177                 (void) nvlist_lookup_string(nvl, FM_CLASS, &class);
2178                 e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
2179                 (void) pthread_rwlock_rdlock(&fmd.d_log_lock);
2180                 fmd_log_append(fmd.d_fltlog, e, cp);
2181                 (void) pthread_rwlock_unlock(&fmd.d_log_lock);
2182                 fmd_dispq_dispatch(fmd.d_disp, e, class);
2183                 return; /* one or more suspects are still marked faulty */
2184         }
2185 
2186         if (cstate == FMD_CASE_CLOSED)
2187                 fmd_case_transition(cp, FMD_CASE_REPAIRED, FMD_CF_REPAIRED);
2188         else
2189                 fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, FMD_CF_REPAIRED);
2190 }
2191 
2192 /*
2193  * Delete a closed case from the module's case list once the fmdo_close() entry
2194  * point has run to completion.  If the case is owned by a transport module,
2195  * tell the transport to proxy a case close on the other end of the transport.
2196  * Transition to the appropriate next state based on ci_flags.  This
2197  * function represents the end of CLOSE_WAIT and transitions the case to either
2198  * CLOSED or REPAIRED or discards it entirely because it was never solved;
2199  * refer to the topmost block comment explaining the state machine for details.
2200  */
2201 void
2202 fmd_case_delete(fmd_case_t *cp)
2203 {
2204         fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2205         fmd_modstat_t *msp;
2206         size_t buftotal;
2207 
2208         TRACE((FMD_DBG_CASE, "case delete %s", cip->ci_uuid));
2209         ASSERT(fmd_module_locked(cip->ci_mod));
2210         fmd_list_delete(&cip->ci_mod->mod_cases, cip);
2211         buftotal = fmd_buf_hash_destroy(&cip->ci_bufs);
2212 
2213         (void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
2214         msp = cip->ci_mod->mod_stats;
2215 
2216         ASSERT(msp->ms_caseopen.fmds_value.ui64 != 0);
2217         msp->ms_caseopen.fmds_value.ui64--;
2218 
2219         ASSERT(msp->ms_buftotal.fmds_value.ui64 >= buftotal);
2220         msp->ms_buftotal.fmds_value.ui64 -= buftotal;
2221 
2222         (void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
2223 
2224         if (cip->ci_xprt == NULL)
2225                 fmd_module_setcdirty(cip->ci_mod);
2226 
2227         fmd_module_rele(cip->ci_mod);
2228         cip->ci_mod = fmd.d_rmod;
2229         fmd_module_hold(cip->ci_mod);
2230 
2231         /*
2232          * If the case has been solved, then retain it
2233          * on the root module's case list at least until we're transitioned.
2234          * Otherwise free the case with our final fmd_case_rele() below.
2235          */
2236         if (cip->ci_flags & FMD_CF_SOLVED) {
2237                 fmd_module_lock(cip->ci_mod);
2238                 fmd_list_append(&cip->ci_mod->mod_cases, cip);
2239                 fmd_module_unlock(cip->ci_mod);
2240                 fmd_case_hold(cp);
2241         }
2242 
2243         /*
2244          * Transition onwards to REPAIRED or CLOSED as originally requested.
2245          * Note that for proxy case if we're transitioning to CLOSED it means
2246          * the case was isolated locally, so call fmd_xprt_uuclose() to notify
2247          * the diagnosing side. No need to notify the diagnosing side if we are
2248          * transitioning to REPAIRED as we only do this when requested to do
2249          * so by the diagnosing side anyway.
2250          */
2251         if (cip->ci_flags & FMD_CF_REPAIRED)
2252                 fmd_case_transition(cp, FMD_CASE_REPAIRED, 0);
2253         else if (cip->ci_flags & FMD_CF_ISOLATED) {
2254                 fmd_case_transition(cp, FMD_CASE_CLOSED, 0);
2255                 if (cip->ci_xprt != NULL)
2256                         fmd_xprt_uuclose(cip->ci_xprt, cip->ci_uuid);
2257         }
2258 
2259         fmd_case_rele(cp);
2260 }
2261 
2262 void
2263 fmd_case_discard(fmd_case_t *cp, boolean_t delete_from_asru_cache)
2264 {
2265         fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2266 
2267         (void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
2268         cip->ci_mod->mod_stats->ms_caseopen.fmds_value.ui64--;
2269         (void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
2270 
2271         ASSERT(fmd_module_locked(cip->ci_mod));
2272         fmd_list_delete(&cip->ci_mod->mod_cases, cip);
2273         if (delete_from_asru_cache) {
2274                 (void) pthread_mutex_lock(&cip->ci_lock);
2275                 fmd_asru_hash_delete_case(fmd.d_asrus, cp);
2276                 (void) pthread_mutex_unlock(&cip->ci_lock);
2277         }
2278         fmd_case_rele(cp);
2279 }
2280 
2281 /*
2282  * Indicate that the problem corresponding to a case has been repaired by
2283  * clearing the faulty bit on each ASRU named as a suspect.  If the case hasn't
2284  * already been closed, this function initiates the transition to CLOSE_WAIT.
2285  * The caller must have the case held from fmd_case_hash_lookup(), so we can
2286  * grab and drop ci_lock without the case being able to be freed in between.
2287  */
2288 int
2289 fmd_case_repair(fmd_case_t *cp)
2290 {
2291         fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2292         uint_t cstate;
2293         fmd_asru_rep_arg_t fara;
2294 
2295         (void) pthread_mutex_lock(&cip->ci_lock);
2296         cstate = cip->ci_state;
2297 
2298         if (cstate < FMD_CASE_SOLVED) {
2299                 (void) pthread_mutex_unlock(&cip->ci_lock);
2300                 return (fmd_set_errno(EFMD_CASE_STATE));
2301         }
2302 
2303         if (cip->ci_flags & FMD_CF_REPAIRED) {
2304                 (void) pthread_mutex_unlock(&cip->ci_lock);
2305                 return (0); /* already repaired */
2306         }
2307 
2308         TRACE((FMD_DBG_CASE, "case repair %s", cip->ci_uuid));
2309         fara.fara_reason = FMD_ASRU_REPAIRED;
2310         fara.fara_bywhat = FARA_BY_CASE;
2311         fara.fara_rval = NULL;
2312         fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_asru_repaired, &fara);
2313         (void) pthread_mutex_unlock(&cip->ci_lock);
2314 
2315         /*
2316          * if this is a proxied case, send the repair across the transport.
2317          * The remote side will then do the repair and send a list.repaired back
2318          * again such that we can finally repair the case on this side.
2319          */
2320         if (cip->ci_xprt != NULL) {
2321                 fmd_case_xprt_updated(cp);
2322                 return (0);
2323         }
2324 
2325         if (cstate == FMD_CASE_CLOSED)
2326                 fmd_case_transition(cp, FMD_CASE_REPAIRED, FMD_CF_REPAIRED);
2327         else
2328                 fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, FMD_CF_REPAIRED);
2329 
2330         return (0);
2331 }
2332 
2333 int
2334 fmd_case_acquit(fmd_case_t *cp)
2335 {
2336         fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2337         uint_t cstate;
2338         fmd_asru_rep_arg_t fara;
2339 
2340         (void) pthread_mutex_lock(&cip->ci_lock);
2341         cstate = cip->ci_state;
2342 
2343         if (cstate < FMD_CASE_SOLVED) {
2344                 (void) pthread_mutex_unlock(&cip->ci_lock);
2345                 return (fmd_set_errno(EFMD_CASE_STATE));
2346         }
2347 
2348         if (cip->ci_flags & FMD_CF_REPAIRED) {
2349                 (void) pthread_mutex_unlock(&cip->ci_lock);
2350                 return (0); /* already repaired */
2351         }
2352 
2353         TRACE((FMD_DBG_CASE, "case acquit %s", cip->ci_uuid));
2354         fara.fara_reason = FMD_ASRU_ACQUITTED;
2355         fara.fara_bywhat = FARA_BY_CASE;
2356         fara.fara_rval = NULL;
2357         fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_asru_repaired, &fara);
2358         (void) pthread_mutex_unlock(&cip->ci_lock);
2359 
2360         /*
2361          * if this is a proxied case, send the repair across the transport.
2362          * The remote side will then do the repair and send a list.repaired back
2363          * again such that we can finally repair the case on this side.
2364          */
2365         if (cip->ci_xprt != NULL) {
2366                 fmd_case_xprt_updated(cp);
2367                 return (0);
2368         }
2369 
2370         if (cstate == FMD_CASE_CLOSED)
2371                 fmd_case_transition(cp, FMD_CASE_REPAIRED, FMD_CF_REPAIRED);
2372         else
2373                 fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, FMD_CF_REPAIRED);
2374 
2375         return (0);
2376 }
2377 
2378 int
2379 fmd_case_contains(fmd_case_t *cp, fmd_event_t *ep)
2380 {
2381         fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2382         fmd_case_item_t *cit;
2383         uint_t state;
2384         int rv = 0;
2385 
2386         (void) pthread_mutex_lock(&cip->ci_lock);
2387 
2388         if (cip->ci_state >= FMD_CASE_SOLVED)
2389                 state = FMD_EVS_DIAGNOSED;
2390         else
2391                 state = FMD_EVS_ACCEPTED;
2392 
2393         for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next) {
2394                 if ((rv = fmd_event_equal(ep, cit->cit_event)) != 0)
2395                         break;
2396         }
2397 
2398         if (rv == 0 && cip->ci_principal != NULL)
2399                 rv = fmd_event_equal(ep, cip->ci_principal);
2400 
2401         (void) pthread_mutex_unlock(&cip->ci_lock);
2402 
2403         if (rv != 0)
2404                 fmd_event_transition(ep, state);
2405 
2406         return (rv);
2407 }
2408 
2409 int
2410 fmd_case_orphaned(fmd_case_t *cp)
2411 {
2412         return (((fmd_case_impl_t *)cp)->ci_mod == fmd.d_rmod);
2413 }
2414 
2415 void
2416 fmd_case_settime(fmd_case_t *cp, time_t tv_sec, suseconds_t tv_usec)
2417 {
2418         ((fmd_case_impl_t *)cp)->ci_tv.tv_sec = tv_sec;
2419         ((fmd_case_impl_t *)cp)->ci_tv.tv_usec = tv_usec;
2420         ((fmd_case_impl_t *)cp)->ci_tv_valid = 1;
2421 }
2422 
2423 void
2424 fmd_case_set_injected(fmd_case_t *cp)
2425 {
2426         ((fmd_case_impl_t *)cp)->ci_injected = 1;
2427 }
2428 
2429 void
2430 fmd_case_set_de_fmri(fmd_case_t *cp, nvlist_t *nvl)
2431 {
2432         fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2433 
2434         if (cip->ci_diag_de)
2435                 nvlist_free(cip->ci_diag_de);
2436         cip->ci_diag_de = nvl;
2437 }
2438 
2439 void
2440 fmd_case_setcode(fmd_case_t *cp, char *code)
2441 {
2442         fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2443 
2444         cip->ci_code = fmd_strdup(code, FMD_SLEEP);
2445         cip->ci_codelen = cip->ci_code ? strlen(cip->ci_code) + 1 : 0;
2446 }
2447 
2448 /*ARGSUSED*/
2449 static void
2450 fmd_case_repair_replay_case(fmd_case_t *cp, void *arg)
2451 {
2452         int not_faulty = 0;
2453         int faulty = 0;
2454         nvlist_t *nvl;
2455         fmd_event_t *e;
2456         char *class;
2457         int any_unusable_and_present = 0;
2458         fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2459 
2460         if (cip->ci_state < FMD_CASE_SOLVED || cip->ci_xprt != NULL)
2461                 return;
2462 
2463         if (cip->ci_state == FMD_CASE_RESOLVED) {
2464                 cip->ci_flags |= FMD_CF_RES_CMPL;
2465                 return;
2466         }
2467 
2468         fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_faulty, &faulty);
2469         fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_not_faulty,
2470             &not_faulty);
2471 
2472         if (cip->ci_state >= FMD_CASE_REPAIRED && !faulty) {
2473                 /*
2474                  * If none of the suspects is faulty, replay the list.repaired.
2475                  * If all suspects are already either usable or not present then
2476                  * also transition straight to RESOLVED state.
2477                  */
2478                 fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
2479                     fmd_case_unusable_and_present, &any_unusable_and_present);
2480                 if (!any_unusable_and_present) {
2481                         cip->ci_state = FMD_CASE_RESOLVED;
2482 
2483                         TRACE((FMD_DBG_CASE, "replay sending list.repaired %s",
2484                             cip->ci_uuid));
2485                         nvl = fmd_case_mkevent(cp, FM_LIST_REPAIRED_CLASS);
2486                         (void) nvlist_lookup_string(nvl, FM_CLASS, &class);
2487                         e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl,
2488                             class);
2489                         fmd_dispq_dispatch(fmd.d_disp, e, class);
2490 
2491                         TRACE((FMD_DBG_CASE, "replay sending list.resolved %s",
2492                             cip->ci_uuid));
2493                         fmd_case_publish(cp, FMD_CASE_RESOLVED);
2494                         fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
2495                             fmd_asru_log_resolved, NULL);
2496                         cip->ci_flags |= FMD_CF_RES_CMPL;
2497                 } else {
2498                         TRACE((FMD_DBG_CASE, "replay sending list.repaired %s",
2499                             cip->ci_uuid));
2500                         nvl = fmd_case_mkevent(cp, FM_LIST_REPAIRED_CLASS);
2501                         (void) nvlist_lookup_string(nvl, FM_CLASS, &class);
2502                         e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl,
2503                             class);
2504                         fmd_dispq_dispatch(fmd.d_disp, e, class);
2505                 }
2506         } else if (faulty && not_faulty) {
2507                 /*
2508                  * if some but not all of the suspects are not faulty, replay
2509                  * the list.updated.
2510                  */
2511                 TRACE((FMD_DBG_CASE, "replay sending list.updated %s",
2512                     cip->ci_uuid));
2513                 nvl = fmd_case_mkevent(cp, FM_LIST_UPDATED_CLASS);
2514                 (void) nvlist_lookup_string(nvl, FM_CLASS, &class);
2515                 e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
2516                 fmd_dispq_dispatch(fmd.d_disp, e, class);
2517         }
2518 }
2519 
2520 void
2521 fmd_case_repair_replay()
2522 {
2523         fmd_case_hash_apply(fmd.d_cases, fmd_case_repair_replay_case, NULL);
2524 }