1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright 2012 Milan Jurik. All rights reserved.
  25  *
  26  * fme.c -- fault management exercise module
  27  *
  28  * this module provides the simulated fault management exercise.
  29  */
  30 
  31 #include <stdio.h>
  32 #include <stdlib.h>
  33 #include <string.h>
  34 #include <strings.h>
  35 #include <ctype.h>
  36 #include <alloca.h>
  37 #include <libnvpair.h>
  38 #include <sys/fm/protocol.h>
  39 #include <fm/fmd_api.h>
  40 #include "alloc.h"
  41 #include "out.h"
  42 #include "stats.h"
  43 #include "stable.h"
  44 #include "literals.h"
  45 #include "lut.h"
  46 #include "tree.h"
  47 #include "ptree.h"
  48 #include "itree.h"
  49 #include "ipath.h"
  50 #include "fme.h"
  51 #include "evnv.h"
  52 #include "eval.h"
  53 #include "config.h"
  54 #include "platform.h"
  55 #include "esclex.h"
  56 
  57 /* imported from eft.c... */
  58 extern hrtime_t Hesitate;
  59 extern char *Serd_Override;
  60 extern nv_alloc_t Eft_nv_hdl;
  61 extern int Max_fme;
  62 extern fmd_hdl_t *Hdl;
  63 
  64 static int Istat_need_save;
  65 static int Serd_need_save;
  66 void istat_save(void);
  67 void serd_save(void);
  68 
  69 /* fme under construction is global so we can free it on module abort */
  70 static struct fme *Nfmep;
  71 
  72 static int Undiag_reason = UD_VAL_UNKNOWN;
  73 
  74 static int Nextid = 0;
  75 
  76 static int Open_fme_count = 0;  /* Count of open FMEs */
  77 
  78 /* list of fault management exercises underway */
  79 static struct fme {
  80         struct fme *next;               /* next exercise */
  81         unsigned long long ull;         /* time when fme was created */
  82         int id;                         /* FME id */
  83         struct config *config;          /* cooked configuration data */
  84         struct lut *eventtree;          /* propagation tree for this FME */
  85         /*
  86          * The initial error report that created this FME is kept in
  87          * two forms.  e0 points to the instance tree node and is used
  88          * by fme_eval() as the starting point for the inference
  89          * algorithm.  e0r is the event handle FMD passed to us when
  90          * the ereport first arrived and is used when setting timers,
  91          * which are always relative to the time of this initial
  92          * report.
  93          */
  94         struct event *e0;
  95         fmd_event_t *e0r;
  96 
  97         id_t    timer;                  /* for setting an fmd time-out */
  98 
  99         struct event *ecurrent;         /* ereport under consideration */
 100         struct event *suspects;         /* current suspect list */
 101         struct event *psuspects;        /* previous suspect list */
 102         int nsuspects;                  /* count of suspects */
 103         int posted_suspects;            /* true if we've posted a diagnosis */
 104         int uniqobs;                    /* number of unique events observed */
 105         int peek;                       /* just peeking, don't track suspects */
 106         int overflow;                   /* true if overflow FME */
 107         enum fme_state {
 108                 FME_NOTHING = 5000,     /* not evaluated yet */
 109                 FME_WAIT,               /* need to wait for more info */
 110                 FME_CREDIBLE,           /* suspect list is credible */
 111                 FME_DISPROVED,          /* no valid suspects found */
 112                 FME_DEFERRED            /* don't know yet (k-count not met) */
 113         } state;
 114 
 115         unsigned long long pull;        /* time passed since created */
 116         unsigned long long wull;        /* wait until this time for re-eval */
 117         struct event *observations;     /* observation list */
 118         struct lut *globals;            /* values of global variables */
 119         /* fmd interfacing */
 120         fmd_hdl_t *hdl;                 /* handle for talking with fmd */
 121         fmd_case_t *fmcase;             /* what fmd 'case' we associate with */
 122         /* stats */
 123         struct stats *Rcount;
 124         struct stats *Hcallcount;
 125         struct stats *Rcallcount;
 126         struct stats *Ccallcount;
 127         struct stats *Ecallcount;
 128         struct stats *Tcallcount;
 129         struct stats *Marrowcount;
 130         struct stats *diags;
 131 } *FMElist, *EFMElist, *ClosedFMEs;
 132 
 133 static struct case_list {
 134         fmd_case_t *fmcase;
 135         struct case_list *next;
 136 } *Undiagablecaselist;
 137 
 138 static void fme_eval(struct fme *fmep, fmd_event_t *ffep);
 139 static enum fme_state hypothesise(struct fme *fmep, struct event *ep,
 140         unsigned long long at_latest_by, unsigned long long *pdelay);
 141 static struct node *eventprop_lookup(struct event *ep, const char *propname);
 142 static struct node *pathstring2epnamenp(char *path);
 143 static void publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep,
 144         fmd_case_t *fmcase, nvlist_t *detector, char *arg);
 145 static char *undiag_2reason_str(int ud, char *arg);
 146 static const char *undiag_2defect_str(int ud);
 147 static void restore_suspects(struct fme *fmep);
 148 static void save_suspects(struct fme *fmep);
 149 static void destroy_fme(struct fme *f);
 150 static void fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep,
 151     const char *eventstring, const struct ipath *ipp, nvlist_t *nvl);
 152 static void istat_counter_reset_cb(struct istat_entry *entp,
 153     struct stats *statp, const struct ipath *ipp);
 154 static void istat_counter_topo_chg_cb(struct istat_entry *entp,
 155     struct stats *statp, void *unused);
 156 static void serd_reset_cb(struct serd_entry *entp, void *unused,
 157     const struct ipath *ipp);
 158 static void serd_topo_chg_cb(struct serd_entry *entp, void *unused,
 159     void *unused2);
 160 static void destroy_fme_bufs(struct fme *fp);
 161 
 162 static struct fme *
 163 alloc_fme(void)
 164 {
 165         struct fme *fmep;
 166 
 167         fmep = MALLOC(sizeof (*fmep));
 168         bzero(fmep, sizeof (*fmep));
 169         return (fmep);
 170 }
 171 
 172 /*
 173  * fme_ready -- called when all initialization of the FME (except for
 174  *      stats) has completed successfully.  Adds the fme to global lists
 175  *      and establishes its stats.
 176  */
 177 static struct fme *
 178 fme_ready(struct fme *fmep)
 179 {
 180         char nbuf[100];
 181 
 182         Nfmep = NULL;   /* don't need to free this on module abort now */
 183 
 184         if (EFMElist) {
 185                 EFMElist->next = fmep;
 186                 EFMElist = fmep;
 187         } else
 188                 FMElist = EFMElist = fmep;
 189 
 190         (void) sprintf(nbuf, "fme%d.Rcount", fmep->id);
 191         fmep->Rcount = stats_new_counter(nbuf, "ereports received", 0);
 192         (void) sprintf(nbuf, "fme%d.Hcall", fmep->id);
 193         fmep->Hcallcount = stats_new_counter(nbuf, "calls to hypothesise()", 1);
 194         (void) sprintf(nbuf, "fme%d.Rcall", fmep->id);
 195         fmep->Rcallcount = stats_new_counter(nbuf,
 196             "calls to requirements_test()", 1);
 197         (void) sprintf(nbuf, "fme%d.Ccall", fmep->id);
 198         fmep->Ccallcount = stats_new_counter(nbuf, "calls to causes_test()", 1);
 199         (void) sprintf(nbuf, "fme%d.Ecall", fmep->id);
 200         fmep->Ecallcount =
 201             stats_new_counter(nbuf, "calls to effects_test()", 1);
 202         (void) sprintf(nbuf, "fme%d.Tcall", fmep->id);
 203         fmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1);
 204         (void) sprintf(nbuf, "fme%d.Marrow", fmep->id);
 205         fmep->Marrowcount = stats_new_counter(nbuf,
 206             "arrows marked by mark_arrows()", 1);
 207         (void) sprintf(nbuf, "fme%d.diags", fmep->id);
 208         fmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0);
 209 
 210         out(O_ALTFP|O_VERB2, "newfme: config snapshot contains...");
 211         config_print(O_ALTFP|O_VERB2, fmep->config);
 212 
 213         return (fmep);
 214 }
 215 
 216 extern void ipath_dummy_lut(struct arrow *);
 217 extern struct lut *itree_create_dummy(const char *, const struct ipath *);
 218 
 219 /* ARGSUSED */
 220 static void
 221 set_needed_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
 222 {
 223         struct bubble *bp;
 224         struct arrowlist *ap;
 225 
 226         for (bp = itree_next_bubble(ep, NULL); bp;
 227             bp = itree_next_bubble(ep, bp)) {
 228                 if (bp->t != B_FROM)
 229                         continue;
 230                 for (ap = itree_next_arrow(bp, NULL); ap;
 231                     ap = itree_next_arrow(bp, ap)) {
 232                         ap->arrowp->pnode->u.arrow.needed = 1;
 233                         ipath_dummy_lut(ap->arrowp);
 234                 }
 235         }
 236 }
 237 
 238 /* ARGSUSED */
 239 static void
 240 unset_needed_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
 241 {
 242         struct bubble *bp;
 243         struct arrowlist *ap;
 244 
 245         for (bp = itree_next_bubble(ep, NULL); bp;
 246             bp = itree_next_bubble(ep, bp)) {
 247                 if (bp->t != B_FROM)
 248                         continue;
 249                 for (ap = itree_next_arrow(bp, NULL); ap;
 250                     ap = itree_next_arrow(bp, ap))
 251                         ap->arrowp->pnode->u.arrow.needed = 0;
 252         }
 253 }
 254 
 255 static void globals_destructor(void *left, void *right, void *arg);
 256 static void clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep);
 257 
 258 static boolean_t
 259 prune_propagations(const char *e0class, const struct ipath *e0ipp)
 260 {
 261         char nbuf[100];
 262         unsigned long long my_delay = TIMEVAL_EVENTUALLY;
 263         extern struct lut *Usednames;
 264 
 265         Nfmep = alloc_fme();
 266         Nfmep->id = Nextid;
 267         Nfmep->state = FME_NOTHING;
 268         Nfmep->eventtree = itree_create_dummy(e0class, e0ipp);
 269         if ((Nfmep->e0 =
 270             itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) {
 271                 itree_free(Nfmep->eventtree);
 272                 FREE(Nfmep);
 273                 Nfmep = NULL;
 274                 return (B_FALSE);
 275         }
 276         Nfmep->ecurrent = Nfmep->observations = Nfmep->e0;
 277         Nfmep->e0->count++;
 278 
 279         (void) sprintf(nbuf, "fme%d.Rcount", Nfmep->id);
 280         Nfmep->Rcount = stats_new_counter(nbuf, "ereports received", 0);
 281         (void) sprintf(nbuf, "fme%d.Hcall", Nfmep->id);
 282         Nfmep->Hcallcount =
 283             stats_new_counter(nbuf, "calls to hypothesise()", 1);
 284         (void) sprintf(nbuf, "fme%d.Rcall", Nfmep->id);
 285         Nfmep->Rcallcount = stats_new_counter(nbuf,
 286             "calls to requirements_test()", 1);
 287         (void) sprintf(nbuf, "fme%d.Ccall", Nfmep->id);
 288         Nfmep->Ccallcount =
 289             stats_new_counter(nbuf, "calls to causes_test()", 1);
 290         (void) sprintf(nbuf, "fme%d.Ecall", Nfmep->id);
 291         Nfmep->Ecallcount =
 292             stats_new_counter(nbuf, "calls to effects_test()", 1);
 293         (void) sprintf(nbuf, "fme%d.Tcall", Nfmep->id);
 294         Nfmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1);
 295         (void) sprintf(nbuf, "fme%d.Marrow", Nfmep->id);
 296         Nfmep->Marrowcount = stats_new_counter(nbuf,
 297             "arrows marked by mark_arrows()", 1);
 298         (void) sprintf(nbuf, "fme%d.diags", Nfmep->id);
 299         Nfmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0);
 300 
 301         Nfmep->peek = 1;
 302         lut_walk(Nfmep->eventtree, (lut_cb)unset_needed_arrows, (void *)Nfmep);
 303         lut_free(Usednames, NULL, NULL);
 304         Usednames = NULL;
 305         lut_walk(Nfmep->eventtree, (lut_cb)clear_arrows, (void *)Nfmep);
 306         (void) hypothesise(Nfmep, Nfmep->e0, Nfmep->ull, &my_delay);
 307         itree_prune(Nfmep->eventtree);
 308         lut_walk(Nfmep->eventtree, (lut_cb)set_needed_arrows, (void *)Nfmep);
 309 
 310         stats_delete(Nfmep->Rcount);
 311         stats_delete(Nfmep->Hcallcount);
 312         stats_delete(Nfmep->Rcallcount);
 313         stats_delete(Nfmep->Ccallcount);
 314         stats_delete(Nfmep->Ecallcount);
 315         stats_delete(Nfmep->Tcallcount);
 316         stats_delete(Nfmep->Marrowcount);
 317         stats_delete(Nfmep->diags);
 318         itree_free(Nfmep->eventtree);
 319         lut_free(Nfmep->globals, globals_destructor, NULL);
 320         FREE(Nfmep);
 321         return (B_TRUE);
 322 }
 323 
 324 static struct fme *
 325 newfme(const char *e0class, const struct ipath *e0ipp, fmd_hdl_t *hdl,
 326         fmd_case_t *fmcase, fmd_event_t *ffep, nvlist_t *nvl)
 327 {
 328         struct cfgdata *cfgdata;
 329         int init_size;
 330         extern int alloc_total();
 331         nvlist_t *detector = NULL;
 332         char *pathstr;
 333         char *arg;
 334 
 335         /*
 336          * First check if e0ipp is actually in the topology so we can give a
 337          * more useful error message.
 338          */
 339         ipathlastcomp(e0ipp);
 340         pathstr = ipath2str(NULL, e0ipp);
 341         cfgdata = config_snapshot();
 342         platform_units_translate(0, cfgdata->cooked, NULL, NULL,
 343             &detector, pathstr);
 344         FREE(pathstr);
 345         structconfig_free(cfgdata->cooked);
 346         config_free(cfgdata);
 347         if (detector == NULL) {
 348                 /* See if class permits silent discard on unknown component. */
 349                 if (lut_lookup(Ereportenames_discard, (void *)e0class, NULL)) {
 350                         out(O_ALTFP|O_VERB2, "Unable to map \"%s\" ereport "
 351                             "to component path, but silent discard allowed.",
 352                             e0class);
 353                 } else {
 354                         Undiag_reason = UD_VAL_BADEVENTPATH;
 355                         (void) nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR,
 356                             &detector);
 357                         arg = ipath2str(e0class, e0ipp);
 358                         publish_undiagnosable(hdl, ffep, fmcase, detector, arg);
 359                         FREE(arg);
 360                 }
 361                 return (NULL);
 362         }
 363 
 364         /*
 365          * Next run a quick first pass of the rules with a dummy config. This
 366          * allows us to prune those rules which can't possibly cause this
 367          * ereport.
 368          */
 369         if (!prune_propagations(e0class, e0ipp)) {
 370                 /*
 371                  * The fault class must have been in the rules or we would
 372                  * not have registered for it (and got a "nosub"), and the
 373                  * pathname must be in the topology or we would have failed the
 374                  * previous test. So to get here means the combination of
 375                  * class and pathname in the ereport must be invalid.
 376                  */
 377                 Undiag_reason = UD_VAL_BADEVENTCLASS;
 378                 arg = ipath2str(e0class, e0ipp);
 379                 publish_undiagnosable(hdl, ffep, fmcase, detector, arg);
 380                 nvlist_free(detector);
 381                 FREE(arg);
 382                 return (NULL);
 383         }
 384 
 385         /*
 386          * Now go ahead and create the real fme using the pruned rules.
 387          */
 388         init_size = alloc_total();
 389         out(O_ALTFP|O_STAMP, "start config_snapshot using %d bytes", init_size);
 390         nvlist_free(detector);
 391         pathstr = ipath2str(NULL, e0ipp);
 392         cfgdata = config_snapshot();
 393         platform_units_translate(0, cfgdata->cooked, NULL, NULL,
 394             &detector, pathstr);
 395         FREE(pathstr);
 396         platform_save_config(hdl, fmcase);
 397         out(O_ALTFP|O_STAMP, "config_snapshot added %d bytes",
 398             alloc_total() - init_size);
 399 
 400         Nfmep = alloc_fme();
 401 
 402         Nfmep->id = Nextid++;
 403         Nfmep->config = cfgdata->cooked;
 404         config_free(cfgdata);
 405         Nfmep->posted_suspects = 0;
 406         Nfmep->uniqobs = 0;
 407         Nfmep->state = FME_NOTHING;
 408         Nfmep->pull = 0ULL;
 409         Nfmep->overflow = 0;
 410 
 411         Nfmep->fmcase = fmcase;
 412         Nfmep->hdl = hdl;
 413 
 414         if ((Nfmep->eventtree = itree_create(Nfmep->config)) == NULL) {
 415                 Undiag_reason = UD_VAL_INSTFAIL;
 416                 arg = ipath2str(e0class, e0ipp);
 417                 publish_undiagnosable(hdl, ffep, fmcase, detector, arg);
 418                 nvlist_free(detector);
 419                 FREE(arg);
 420                 structconfig_free(Nfmep->config);
 421                 destroy_fme_bufs(Nfmep);
 422                 FREE(Nfmep);
 423                 Nfmep = NULL;
 424                 return (NULL);
 425         }
 426 
 427         itree_ptree(O_ALTFP|O_VERB2, Nfmep->eventtree);
 428 
 429         if ((Nfmep->e0 =
 430             itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) {
 431                 Undiag_reason = UD_VAL_BADEVENTI;
 432                 arg = ipath2str(e0class, e0ipp);
 433                 publish_undiagnosable(hdl, ffep, fmcase, detector, arg);
 434                 nvlist_free(detector);
 435                 FREE(arg);
 436                 itree_free(Nfmep->eventtree);
 437                 structconfig_free(Nfmep->config);
 438                 destroy_fme_bufs(Nfmep);
 439                 FREE(Nfmep);
 440                 Nfmep = NULL;
 441                 return (NULL);
 442         }
 443 
 444         nvlist_free(detector);
 445         return (fme_ready(Nfmep));
 446 }
 447 
 448 void
 449 fme_fini(void)
 450 {
 451         struct fme *sfp, *fp;
 452         struct case_list *ucasep, *nextcasep;
 453 
 454         ucasep = Undiagablecaselist;
 455         while (ucasep != NULL) {
 456                 nextcasep = ucasep->next;
 457                 FREE(ucasep);
 458                 ucasep = nextcasep;
 459         }
 460         Undiagablecaselist = NULL;
 461 
 462         /* clean up closed fmes */
 463         fp = ClosedFMEs;
 464         while (fp != NULL) {
 465                 sfp = fp->next;
 466                 destroy_fme(fp);
 467                 fp = sfp;
 468         }
 469         ClosedFMEs = NULL;
 470 
 471         fp = FMElist;
 472         while (fp != NULL) {
 473                 sfp = fp->next;
 474                 destroy_fme(fp);
 475                 fp = sfp;
 476         }
 477         FMElist = EFMElist = NULL;
 478 
 479         /* if we were in the middle of creating an fme, free it now */
 480         if (Nfmep) {
 481                 destroy_fme(Nfmep);
 482                 Nfmep = NULL;
 483         }
 484 }
 485 
 486 /*
 487  * Allocated space for a buffer name.  20 bytes allows for
 488  * a ridiculous 9,999,999 unique observations.
 489  */
 490 #define OBBUFNMSZ 20
 491 
 492 /*
 493  *  serialize_observation
 494  *
 495  *  Create a recoverable version of the current observation
 496  *  (f->ecurrent).  We keep a serialized version of each unique
 497  *  observation in order that we may resume correctly the fme in the
 498  *  correct state if eft or fmd crashes and we're restarted.
 499  */
 500 static void
 501 serialize_observation(struct fme *fp, const char *cls, const struct ipath *ipp)
 502 {
 503         size_t pkdlen;
 504         char tmpbuf[OBBUFNMSZ];
 505         char *pkd = NULL;
 506         char *estr;
 507 
 508         (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", fp->uniqobs);
 509         estr = ipath2str(cls, ipp);
 510         fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, strlen(estr) + 1);
 511         fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)estr,
 512             strlen(estr) + 1);
 513         FREE(estr);
 514 
 515         if (fp->ecurrent != NULL && fp->ecurrent->nvp != NULL) {
 516                 (void) snprintf(tmpbuf,
 517                     OBBUFNMSZ, "observed%d.nvp", fp->uniqobs);
 518                 if (nvlist_xpack(fp->ecurrent->nvp,
 519                     &pkd, &pkdlen, NV_ENCODE_XDR, &Eft_nv_hdl) != 0)
 520                         out(O_DIE|O_SYS, "pack of observed nvl failed");
 521                 fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, pkdlen);
 522                 fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)pkd, pkdlen);
 523                 FREE(pkd);
 524         }
 525 
 526         fp->uniqobs++;
 527         fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs,
 528             sizeof (fp->uniqobs));
 529 }
 530 
 531 /*
 532  *  init_fme_bufs -- We keep several bits of state about an fme for
 533  *      use if eft or fmd crashes and we're restarted.
 534  */
 535 static void
 536 init_fme_bufs(struct fme *fp)
 537 {
 538         fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_PULL, sizeof (fp->pull));
 539         fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_PULL, (void *)&fp->pull,
 540             sizeof (fp->pull));
 541 
 542         fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_ID, sizeof (fp->id));
 543         fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_ID, (void *)&fp->id,
 544             sizeof (fp->id));
 545 
 546         fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_NOBS, sizeof (fp->uniqobs));
 547         fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs,
 548             sizeof (fp->uniqobs));
 549 
 550         fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_POSTD,
 551             sizeof (fp->posted_suspects));
 552         fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_POSTD,
 553             (void *)&fp->posted_suspects, sizeof (fp->posted_suspects));
 554 }
 555 
 556 static void
 557 destroy_fme_bufs(struct fme *fp)
 558 {
 559         char tmpbuf[OBBUFNMSZ];
 560         int o;
 561 
 562         platform_restore_config(fp->hdl, fp->fmcase);
 563         fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFGLEN);
 564         fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFG);
 565         fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_PULL);
 566         fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_ID);
 567         fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_POSTD);
 568         fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_NOBS);
 569 
 570         for (o = 0; o < fp->uniqobs; o++) {
 571                 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", o);
 572                 fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf);
 573                 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", o);
 574                 fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf);
 575         }
 576 }
 577 
 578 /*
 579  * reconstitute_observations -- convert a case's serialized observations
 580  *      back into struct events.  Returns zero if all observations are
 581  *      successfully reconstituted.
 582  */
 583 static int
 584 reconstitute_observations(struct fme *fmep)
 585 {
 586         struct event *ep;
 587         struct node *epnamenp = NULL;
 588         size_t pkdlen;
 589         char *pkd = NULL;
 590         char *tmpbuf = alloca(OBBUFNMSZ);
 591         char *sepptr;
 592         char *estr;
 593         int ocnt;
 594         int elen;
 595 
 596         for (ocnt = 0; ocnt < fmep->uniqobs; ocnt++) {
 597                 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", ocnt);
 598                 elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
 599                 if (elen == 0) {
 600                         out(O_ALTFP,
 601                             "reconstitute_observation: no %s buffer found.",
 602                             tmpbuf);
 603                         Undiag_reason = UD_VAL_MISSINGOBS;
 604                         break;
 605                 }
 606 
 607                 estr = MALLOC(elen);
 608                 fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen);
 609                 sepptr = strchr(estr, '@');
 610                 if (sepptr == NULL) {
 611                         out(O_ALTFP,
 612                             "reconstitute_observation: %s: "
 613                             "missing @ separator in %s.",
 614                             tmpbuf, estr);
 615                         Undiag_reason = UD_VAL_MISSINGPATH;
 616                         FREE(estr);
 617                         break;
 618                 }
 619 
 620                 *sepptr = '\0';
 621                 if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) {
 622                         out(O_ALTFP,
 623                             "reconstitute_observation: %s: "
 624                             "trouble converting path string \"%s\" "
 625                             "to internal representation.",
 626                             tmpbuf, sepptr + 1);
 627                         Undiag_reason = UD_VAL_MISSINGPATH;
 628                         FREE(estr);
 629                         break;
 630                 }
 631 
 632                 /* construct the event */
 633                 ep = itree_lookup(fmep->eventtree,
 634                     stable(estr), ipath(epnamenp));
 635                 if (ep == NULL) {
 636                         out(O_ALTFP,
 637                             "reconstitute_observation: %s: "
 638                             "lookup of  \"%s\" in itree failed.",
 639                             tmpbuf, ipath2str(estr, ipath(epnamenp)));
 640                         Undiag_reason = UD_VAL_BADOBS;
 641                         tree_free(epnamenp);
 642                         FREE(estr);
 643                         break;
 644                 }
 645                 tree_free(epnamenp);
 646 
 647                 /*
 648                  * We may or may not have a saved nvlist for the observation
 649                  */
 650                 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", ocnt);
 651                 pkdlen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
 652                 if (pkdlen != 0) {
 653                         pkd = MALLOC(pkdlen);
 654                         fmd_buf_read(fmep->hdl,
 655                             fmep->fmcase, tmpbuf, pkd, pkdlen);
 656                         ASSERT(ep->nvp == NULL);
 657                         if (nvlist_xunpack(pkd,
 658                             pkdlen, &ep->nvp, &Eft_nv_hdl) != 0)
 659                                 out(O_DIE|O_SYS, "pack of observed nvl failed");
 660                         FREE(pkd);
 661                 }
 662 
 663                 if (ocnt == 0)
 664                         fmep->e0 = ep;
 665 
 666                 FREE(estr);
 667                 fmep->ecurrent = ep;
 668                 ep->count++;
 669 
 670                 /* link it into list of observations seen */
 671                 ep->observations = fmep->observations;
 672                 fmep->observations = ep;
 673         }
 674 
 675         if (ocnt == fmep->uniqobs) {
 676                 (void) fme_ready(fmep);
 677                 return (0);
 678         }
 679 
 680         return (1);
 681 }
 682 
 683 /*
 684  * restart_fme -- called during eft initialization.  Reconstitutes
 685  *      an in-progress fme.
 686  */
 687 void
 688 fme_restart(fmd_hdl_t *hdl, fmd_case_t *inprogress)
 689 {
 690         nvlist_t *defect;
 691         struct case_list *bad;
 692         struct fme *fmep;
 693         struct cfgdata *cfgdata;
 694         size_t rawsz;
 695         struct event *ep;
 696         char *tmpbuf = alloca(OBBUFNMSZ);
 697         char *sepptr;
 698         char *estr;
 699         int elen;
 700         struct node *epnamenp = NULL;
 701         int init_size;
 702         extern int alloc_total();
 703         char *reason;
 704 
 705         /*
 706          * ignore solved or closed cases
 707          */
 708         if (fmd_case_solved(hdl, inprogress) ||
 709             fmd_case_closed(hdl, inprogress))
 710                 return;
 711 
 712         fmep = alloc_fme();
 713         fmep->fmcase = inprogress;
 714         fmep->hdl = hdl;
 715 
 716         if (fmd_buf_size(hdl, inprogress, WOBUF_POSTD) == 0) {
 717                 out(O_ALTFP, "restart_fme: no saved posted status");
 718                 Undiag_reason = UD_VAL_MISSINGINFO;
 719                 goto badcase;
 720         } else {
 721                 fmd_buf_read(hdl, inprogress, WOBUF_POSTD,
 722                     (void *)&fmep->posted_suspects,
 723                     sizeof (fmep->posted_suspects));
 724         }
 725 
 726         if (fmd_buf_size(hdl, inprogress, WOBUF_ID) == 0) {
 727                 out(O_ALTFP, "restart_fme: no saved id");
 728                 Undiag_reason = UD_VAL_MISSINGINFO;
 729                 goto badcase;
 730         } else {
 731                 fmd_buf_read(hdl, inprogress, WOBUF_ID, (void *)&fmep->id,
 732                     sizeof (fmep->id));
 733         }
 734         if (Nextid <= fmep->id)
 735                 Nextid = fmep->id + 1;
 736 
 737         out(O_ALTFP, "Replay FME %d", fmep->id);
 738 
 739         if (fmd_buf_size(hdl, inprogress, WOBUF_CFGLEN) != sizeof (size_t)) {
 740                 out(O_ALTFP, "restart_fme: No config data");
 741                 Undiag_reason = UD_VAL_MISSINGINFO;
 742                 goto badcase;
 743         }
 744         fmd_buf_read(hdl, inprogress, WOBUF_CFGLEN, (void *)&rawsz,
 745             sizeof (size_t));
 746 
 747         if ((fmep->e0r = fmd_case_getprincipal(hdl, inprogress)) == NULL) {
 748                 out(O_ALTFP, "restart_fme: No event zero");
 749                 Undiag_reason = UD_VAL_MISSINGZERO;
 750                 goto badcase;
 751         }
 752 
 753         if (fmd_buf_size(hdl, inprogress, WOBUF_PULL) == 0) {
 754                 out(O_ALTFP, "restart_fme: no saved wait time");
 755                 Undiag_reason = UD_VAL_MISSINGINFO;
 756                 goto badcase;
 757         } else {
 758                 fmd_buf_read(hdl, inprogress, WOBUF_PULL, (void *)&fmep->pull,
 759                     sizeof (fmep->pull));
 760         }
 761 
 762         if (fmd_buf_size(hdl, inprogress, WOBUF_NOBS) == 0) {
 763                 out(O_ALTFP, "restart_fme: no count of observations");
 764                 Undiag_reason = UD_VAL_MISSINGINFO;
 765                 goto badcase;
 766         } else {
 767                 fmd_buf_read(hdl, inprogress, WOBUF_NOBS,
 768                     (void *)&fmep->uniqobs, sizeof (fmep->uniqobs));
 769         }
 770 
 771         (void) snprintf(tmpbuf, OBBUFNMSZ, "observed0");
 772         elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
 773         if (elen == 0) {
 774                 out(O_ALTFP, "reconstitute_observation: no %s buffer found.",
 775                     tmpbuf);
 776                 Undiag_reason = UD_VAL_MISSINGOBS;
 777                 goto badcase;
 778         }
 779         estr = MALLOC(elen);
 780         fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen);
 781         sepptr = strchr(estr, '@');
 782         if (sepptr == NULL) {
 783                 out(O_ALTFP, "reconstitute_observation: %s: "
 784                     "missing @ separator in %s.",
 785                     tmpbuf, estr);
 786                 Undiag_reason = UD_VAL_MISSINGPATH;
 787                 FREE(estr);
 788                 goto badcase;
 789         }
 790         *sepptr = '\0';
 791         if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) {
 792                 out(O_ALTFP, "reconstitute_observation: %s: "
 793                     "trouble converting path string \"%s\" "
 794                     "to internal representation.", tmpbuf, sepptr + 1);
 795                 Undiag_reason = UD_VAL_MISSINGPATH;
 796                 FREE(estr);
 797                 goto badcase;
 798         }
 799         (void) prune_propagations(stable(estr), ipath(epnamenp));
 800         tree_free(epnamenp);
 801         FREE(estr);
 802 
 803         init_size = alloc_total();
 804         out(O_ALTFP|O_STAMP, "start config_restore using %d bytes", init_size);
 805         cfgdata = MALLOC(sizeof (struct cfgdata));
 806         cfgdata->cooked = NULL;
 807         cfgdata->devcache = NULL;
 808         cfgdata->devidcache = NULL;
 809         cfgdata->tpcache = NULL;
 810         cfgdata->cpucache = NULL;
 811         cfgdata->raw_refcnt = 1;
 812 
 813         if (rawsz > 0) {
 814                 if (fmd_buf_size(hdl, inprogress, WOBUF_CFG) != rawsz) {
 815                         out(O_ALTFP, "restart_fme: Config data size mismatch");
 816                         Undiag_reason = UD_VAL_CFGMISMATCH;
 817                         goto badcase;
 818                 }
 819                 cfgdata->begin = MALLOC(rawsz);
 820                 cfgdata->end = cfgdata->nextfree = cfgdata->begin + rawsz;
 821                 fmd_buf_read(hdl,
 822                     inprogress, WOBUF_CFG, cfgdata->begin, rawsz);
 823         } else {
 824                 cfgdata->begin = cfgdata->end = cfgdata->nextfree = NULL;
 825         }
 826 
 827         config_cook(cfgdata);
 828         fmep->config = cfgdata->cooked;
 829         config_free(cfgdata);
 830         out(O_ALTFP|O_STAMP, "config_restore added %d bytes",
 831             alloc_total() - init_size);
 832 
 833         if ((fmep->eventtree = itree_create(fmep->config)) == NULL) {
 834                 /* case not properly saved or irretrievable */
 835                 out(O_ALTFP, "restart_fme: NULL instance tree");
 836                 Undiag_reason = UD_VAL_INSTFAIL;
 837                 goto badcase;
 838         }
 839 
 840         itree_ptree(O_ALTFP|O_VERB2, fmep->eventtree);
 841 
 842         if (reconstitute_observations(fmep) != 0)
 843                 goto badcase;
 844 
 845         out(O_ALTFP|O_NONL, "FME %d replay observations: ", fmep->id);
 846         for (ep = fmep->observations; ep; ep = ep->observations) {
 847                 out(O_ALTFP|O_NONL, " ");
 848                 itree_pevent_brief(O_ALTFP|O_NONL, ep);
 849         }
 850         out(O_ALTFP, NULL);
 851 
 852         Open_fme_count++;
 853 
 854         /* give the diagnosis algorithm a shot at the new FME state */
 855         fme_eval(fmep, fmep->e0r);
 856         return;
 857 
 858 badcase:
 859         if (fmep->eventtree != NULL)
 860                 itree_free(fmep->eventtree);
 861         if (fmep->config)
 862                 structconfig_free(fmep->config);
 863         destroy_fme_bufs(fmep);
 864         FREE(fmep);
 865 
 866         /*
 867          * Since we're unable to restart the case, add it to the undiagable
 868          * list and solve and close it as appropriate.
 869          */
 870         bad = MALLOC(sizeof (struct case_list));
 871         bad->next = NULL;
 872 
 873         if (Undiagablecaselist != NULL)
 874                 bad->next = Undiagablecaselist;
 875         Undiagablecaselist = bad;
 876         bad->fmcase = inprogress;
 877 
 878         out(O_ALTFP|O_NONL, "[case %s (unable to restart), ",
 879             fmd_case_uuid(hdl, bad->fmcase));
 880 
 881         if (fmd_case_solved(hdl, bad->fmcase)) {
 882                 out(O_ALTFP|O_NONL, "already solved, ");
 883         } else {
 884                 out(O_ALTFP|O_NONL, "solving, ");
 885                 defect = fmd_nvl_create_fault(hdl,
 886                     undiag_2defect_str(Undiag_reason), 100, NULL, NULL, NULL);
 887                 reason = undiag_2reason_str(Undiag_reason, NULL);
 888                 (void) nvlist_add_string(defect, UNDIAG_REASON, reason);
 889                 FREE(reason);
 890                 fmd_case_add_suspect(hdl, bad->fmcase, defect);
 891                 fmd_case_solve(hdl, bad->fmcase);
 892                 Undiag_reason = UD_VAL_UNKNOWN;
 893         }
 894 
 895         if (fmd_case_closed(hdl, bad->fmcase)) {
 896                 out(O_ALTFP, "already closed ]");
 897         } else {
 898                 out(O_ALTFP, "closing ]");
 899                 fmd_case_close(hdl, bad->fmcase);
 900         }
 901 }
 902 
 903 /*ARGSUSED*/
 904 static void
 905 globals_destructor(void *left, void *right, void *arg)
 906 {
 907         struct evalue *evp = (struct evalue *)right;
 908         if (evp->t == NODEPTR)
 909                 tree_free((struct node *)(uintptr_t)evp->v);
 910         evp->v = (uintptr_t)NULL;
 911         FREE(evp);
 912 }
 913 
 914 void
 915 destroy_fme(struct fme *f)
 916 {
 917         stats_delete(f->Rcount);
 918         stats_delete(f->Hcallcount);
 919         stats_delete(f->Rcallcount);
 920         stats_delete(f->Ccallcount);
 921         stats_delete(f->Ecallcount);
 922         stats_delete(f->Tcallcount);
 923         stats_delete(f->Marrowcount);
 924         stats_delete(f->diags);
 925 
 926         if (f->eventtree != NULL)
 927                 itree_free(f->eventtree);
 928         if (f->config)
 929                 structconfig_free(f->config);
 930         lut_free(f->globals, globals_destructor, NULL);
 931         FREE(f);
 932 }
 933 
 934 static const char *
 935 fme_state2str(enum fme_state s)
 936 {
 937         switch (s) {
 938         case FME_NOTHING:       return ("NOTHING");
 939         case FME_WAIT:          return ("WAIT");
 940         case FME_CREDIBLE:      return ("CREDIBLE");
 941         case FME_DISPROVED:     return ("DISPROVED");
 942         case FME_DEFERRED:      return ("DEFERRED");
 943         default:                return ("UNKNOWN");
 944         }
 945 }
 946 
 947 static int
 948 is_problem(enum nametype t)
 949 {
 950         return (t == N_FAULT || t == N_DEFECT || t == N_UPSET);
 951 }
 952 
 953 static int
 954 is_defect(enum nametype t)
 955 {
 956         return (t == N_DEFECT);
 957 }
 958 
 959 static int
 960 is_upset(enum nametype t)
 961 {
 962         return (t == N_UPSET);
 963 }
 964 
 965 static void
 966 fme_print(int flags, struct fme *fmep)
 967 {
 968         struct event *ep;
 969 
 970         out(flags, "Fault Management Exercise %d", fmep->id);
 971         out(flags, "\t       State: %s", fme_state2str(fmep->state));
 972         out(flags|O_NONL, "\t  Start time: ");
 973         ptree_timeval(flags|O_NONL, &fmep->ull);
 974         out(flags, NULL);
 975         if (fmep->wull) {
 976                 out(flags|O_NONL, "\t   Wait time: ");
 977                 ptree_timeval(flags|O_NONL, &fmep->wull);
 978                 out(flags, NULL);
 979         }
 980         out(flags|O_NONL, "\t          E0: ");
 981         if (fmep->e0)
 982                 itree_pevent_brief(flags|O_NONL, fmep->e0);
 983         else
 984                 out(flags|O_NONL, "NULL");
 985         out(flags, NULL);
 986         out(flags|O_NONL, "\tObservations:");
 987         for (ep = fmep->observations; ep; ep = ep->observations) {
 988                 out(flags|O_NONL, " ");
 989                 itree_pevent_brief(flags|O_NONL, ep);
 990         }
 991         out(flags, NULL);
 992         out(flags|O_NONL, "\tSuspect list:");
 993         for (ep = fmep->suspects; ep; ep = ep->suspects) {
 994                 out(flags|O_NONL, " ");
 995                 itree_pevent_brief(flags|O_NONL, ep);
 996         }
 997         out(flags, NULL);
 998         if (fmep->eventtree != NULL) {
 999                 out(flags|O_VERB2, "\t        Tree:");
1000                 itree_ptree(flags|O_VERB2, fmep->eventtree);
1001         }
1002 }
1003 
1004 static struct node *
1005 pathstring2epnamenp(char *path)
1006 {
1007         char *sep = "/";
1008         struct node *ret;
1009         char *ptr;
1010 
1011         if ((ptr = strtok(path, sep)) == NULL)
1012                 out(O_DIE, "pathstring2epnamenp: invalid empty class");
1013 
1014         ret = tree_iname(stable(ptr), NULL, 0);
1015 
1016         while ((ptr = strtok(NULL, sep)) != NULL)
1017                 ret = tree_name_append(ret,
1018                     tree_iname(stable(ptr), NULL, 0));
1019 
1020         return (ret);
1021 }
1022 
1023 /*
1024  * for a given upset sp, increment the corresponding SERD engine.  if the
1025  * SERD engine trips, return the ename and ipp of the resulting ereport.
1026  * returns true if engine tripped and *enamep and *ippp were filled in.
1027  */
1028 static int
1029 serd_eval(struct fme *fmep, fmd_hdl_t *hdl, fmd_event_t *ffep,
1030     fmd_case_t *fmcase, struct event *sp, const char **enamep,
1031     const struct ipath **ippp)
1032 {
1033         struct node *serdinst;
1034         char *serdname;
1035         char *serdresource;
1036         char *serdclass;
1037         struct node *nid;
1038         struct serd_entry *newentp;
1039         int i, serdn = -1, serdincrement = 1, len = 0;
1040         char *serdsuffix = NULL, *serdt = NULL;
1041         struct evalue *ep;
1042 
1043         ASSERT(sp->t == N_UPSET);
1044         ASSERT(ffep != NULL);
1045 
1046         if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
1047             (void *)"n", (lut_cmp)strcmp)) != NULL) {
1048                 ASSERT(ep->t == UINT64);
1049                 serdn = (int)ep->v;
1050         }
1051         if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
1052             (void *)"t", (lut_cmp)strcmp)) != NULL) {
1053                 ASSERT(ep->t == STRING);
1054                 serdt = (char *)(uintptr_t)ep->v;
1055         }
1056         if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
1057             (void *)"suffix", (lut_cmp)strcmp)) != NULL) {
1058                 ASSERT(ep->t == STRING);
1059                 serdsuffix = (char *)(uintptr_t)ep->v;
1060         }
1061         if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
1062             (void *)"increment", (lut_cmp)strcmp)) != NULL) {
1063                 ASSERT(ep->t == UINT64);
1064                 serdincrement = (int)ep->v;
1065         }
1066 
1067         /*
1068          * obtain instanced SERD engine from the upset sp.  from this
1069          * derive serdname, the string used to identify the SERD engine.
1070          */
1071         serdinst = eventprop_lookup(sp, L_engine);
1072 
1073         if (serdinst == NULL)
1074                 return (-1);
1075 
1076         len = strlen(serdinst->u.stmt.np->u.event.ename->u.name.s) + 1;
1077         if (serdsuffix != NULL)
1078                 len += strlen(serdsuffix);
1079         serdclass = MALLOC(len);
1080         if (serdsuffix != NULL)
1081                 (void) snprintf(serdclass, len, "%s%s",
1082                     serdinst->u.stmt.np->u.event.ename->u.name.s, serdsuffix);
1083         else
1084                 (void) snprintf(serdclass, len, "%s",
1085                     serdinst->u.stmt.np->u.event.ename->u.name.s);
1086         serdresource = ipath2str(NULL,
1087             ipath(serdinst->u.stmt.np->u.event.epname));
1088         len += strlen(serdresource) + 1;
1089         serdname = MALLOC(len);
1090         (void) snprintf(serdname, len, "%s@%s", serdclass, serdresource);
1091         FREE(serdresource);
1092 
1093         /* handle serd engine "id" property, if there is one */
1094         if ((nid =
1095             lut_lookup(serdinst->u.stmt.lutp, (void *)L_id, NULL)) != NULL) {
1096                 struct evalue *gval;
1097                 char suffixbuf[200];
1098                 char *suffix;
1099                 char *nserdname;
1100                 size_t nname;
1101 
1102                 out(O_ALTFP|O_NONL, "serd \"%s\" id: ", serdname);
1103                 ptree_name_iter(O_ALTFP|O_NONL, nid);
1104 
1105                 ASSERTinfo(nid->t == T_GLOBID, ptree_nodetype2str(nid->t));
1106 
1107                 if ((gval = lut_lookup(fmep->globals,
1108                     (void *)nid->u.globid.s, NULL)) == NULL) {
1109                         out(O_ALTFP, " undefined");
1110                 } else if (gval->t == UINT64) {
1111                         out(O_ALTFP, " %llu", gval->v);
1112                         (void) sprintf(suffixbuf, "%llu", gval->v);
1113                         suffix = suffixbuf;
1114                 } else {
1115                         out(O_ALTFP, " \"%s\"", (char *)(uintptr_t)gval->v);
1116                         suffix = (char *)(uintptr_t)gval->v;
1117                 }
1118 
1119                 nname = strlen(serdname) + strlen(suffix) + 2;
1120                 nserdname = MALLOC(nname);
1121                 (void) snprintf(nserdname, nname, "%s:%s", serdname, suffix);
1122                 FREE(serdname);
1123                 serdname = nserdname;
1124         }
1125 
1126         /*
1127          * if the engine is empty, and we have an override for n/t then
1128          * destroy and recreate it.
1129          */
1130         if ((serdn != -1 || serdt != NULL) && fmd_serd_exists(hdl, serdname) &&
1131             fmd_serd_empty(hdl, serdname))
1132                 fmd_serd_destroy(hdl, serdname);
1133 
1134         if (!fmd_serd_exists(hdl, serdname)) {
1135                 struct node *nN, *nT;
1136                 const char *s;
1137                 struct node *nodep;
1138                 struct config *cp;
1139                 char *path;
1140                 uint_t nval;
1141                 hrtime_t tval;
1142                 int i;
1143                 char *ptr;
1144                 int got_n_override = 0, got_t_override = 0;
1145 
1146                 /* no SERD engine yet, so create it */
1147                 nodep = serdinst->u.stmt.np->u.event.epname;
1148                 path = ipath2str(NULL, ipath(nodep));
1149                 cp = config_lookup(fmep->config, path, 0);
1150                 FREE((void *)path);
1151 
1152                 /*
1153                  * We allow serd paramaters to be overridden, either from
1154                  * eft.conf file values (if Serd_Override is set) or from
1155                  * driver properties (for "serd.io.device" engines).
1156                  */
1157                 if (Serd_Override != NULL) {
1158                         char *save_ptr, *ptr1, *ptr2, *ptr3;
1159                         ptr3 = save_ptr = STRDUP(Serd_Override);
1160                         while (*ptr3 != '\0') {
1161                                 ptr1 = strchr(ptr3, ',');
1162                                 *ptr1 = '\0';
1163                                 if (strcmp(ptr3, serdclass) == 0) {
1164                                         ptr2 =  strchr(ptr1 + 1, ',');
1165                                         *ptr2 = '\0';
1166                                         nval = atoi(ptr1 + 1);
1167                                         out(O_ALTFP, "serd override %s_n %d",
1168                                             serdclass, nval);
1169                                         ptr3 =  strchr(ptr2 + 1, ' ');
1170                                         if (ptr3)
1171                                                 *ptr3 = '\0';
1172                                         ptr = STRDUP(ptr2 + 1);
1173                                         out(O_ALTFP, "serd override %s_t %s",
1174                                             serdclass, ptr);
1175                                         got_n_override = 1;
1176                                         got_t_override = 1;
1177                                         break;
1178                                 } else {
1179                                         ptr2 =  strchr(ptr1 + 1, ',');
1180                                         ptr3 =  strchr(ptr2 + 1, ' ');
1181                                         if (ptr3 == NULL)
1182                                                 break;
1183                                 }
1184                                 ptr3++;
1185                         }
1186                         FREE(save_ptr);
1187                 }
1188 
1189                 if (cp && got_n_override == 0) {
1190                         /*
1191                          * convert serd engine class into property name
1192                          */
1193                         char *prop_name = MALLOC(strlen(serdclass) + 3);
1194                         for (i = 0; i < strlen(serdclass); i++) {
1195                                 if (serdclass[i] == '.')
1196                                         prop_name[i] = '_';
1197                                 else
1198                                         prop_name[i] = serdclass[i];
1199                         }
1200                         prop_name[i++] = '_';
1201                         prop_name[i++] = 'n';
1202                         prop_name[i] = '\0';
1203                         if (s = config_getprop(cp, prop_name)) {
1204                                 nval = atoi(s);
1205                                 out(O_ALTFP, "serd override %s_n %s",
1206                                     serdclass, s);
1207                                 got_n_override = 1;
1208                         }
1209                         prop_name[i - 1] = 't';
1210                         if (s = config_getprop(cp, prop_name)) {
1211                                 ptr = STRDUP(s);
1212                                 out(O_ALTFP, "serd override %s_t %s",
1213                                     serdclass, s);
1214                                 got_t_override = 1;
1215                         }
1216                         FREE(prop_name);
1217                 }
1218 
1219                 if (serdn != -1 && got_n_override == 0) {
1220                         nval = serdn;
1221                         out(O_ALTFP, "serd override %s_n %d", serdclass, serdn);
1222                         got_n_override = 1;
1223                 }
1224                 if (serdt != NULL && got_t_override == 0) {
1225                         ptr = STRDUP(serdt);
1226                         out(O_ALTFP, "serd override %s_t %s", serdclass, serdt);
1227                         got_t_override = 1;
1228                 }
1229 
1230                 if (!got_n_override) {
1231                         nN = lut_lookup(serdinst->u.stmt.lutp, (void *)L_N,
1232                             NULL);
1233                         ASSERT(nN->t == T_NUM);
1234                         nval = (uint_t)nN->u.ull;
1235                 }
1236                 if (!got_t_override) {
1237                         nT = lut_lookup(serdinst->u.stmt.lutp, (void *)L_T,
1238                             NULL);
1239                         ASSERT(nT->t == T_TIMEVAL);
1240                         tval = (hrtime_t)nT->u.ull;
1241                 } else {
1242                         const unsigned long long *ullp;
1243                         const char *suffix;
1244                         int len;
1245 
1246                         len = strspn(ptr, "0123456789");
1247                         suffix = stable(&ptr[len]);
1248                         ullp = (unsigned long long *)lut_lookup(Timesuffixlut,
1249                             (void *)suffix, NULL);
1250                         ptr[len] = '\0';
1251                         tval = strtoull(ptr, NULL, 0) * (ullp ? *ullp : 1ll);
1252                         FREE(ptr);
1253                 }
1254                 fmd_serd_create(hdl, serdname, nval, tval);
1255         }
1256 
1257         newentp = MALLOC(sizeof (*newentp));
1258         newentp->ename = stable(serdclass);
1259         FREE(serdclass);
1260         newentp->ipath = ipath(serdinst->u.stmt.np->u.event.epname);
1261         newentp->hdl = hdl;
1262         if (lut_lookup(SerdEngines, newentp, (lut_cmp)serd_cmp) == NULL) {
1263                 SerdEngines = lut_add(SerdEngines, (void *)newentp,
1264                     (void *)newentp, (lut_cmp)serd_cmp);
1265                 Serd_need_save = 1;
1266                 serd_save();
1267         } else {
1268                 FREE(newentp);
1269         }
1270 
1271 
1272         /*
1273          * increment SERD engine.  if engine fires, reset serd
1274          * engine and return trip_strcode if required.
1275          */
1276         for (i = 0; i < serdincrement; i++) {
1277                 if (fmd_serd_record(hdl, serdname, ffep)) {
1278                         fmd_case_add_serd(hdl, fmcase, serdname);
1279                         fmd_serd_reset(hdl, serdname);
1280 
1281                         if (ippp) {
1282                                 struct node *tripinst =
1283                                     lut_lookup(serdinst->u.stmt.lutp,
1284                                     (void *)L_trip, NULL);
1285                                 ASSERT(tripinst != NULL);
1286                                 *enamep = tripinst->u.event.ename->u.name.s;
1287                                 *ippp = ipath(tripinst->u.event.epname);
1288                                 out(O_ALTFP|O_NONL,
1289                                     "[engine fired: %s, sending: ", serdname);
1290                                 ipath_print(O_ALTFP|O_NONL, *enamep, *ippp);
1291                                 out(O_ALTFP, "]");
1292                         } else {
1293                                 out(O_ALTFP, "[engine fired: %s, no trip]",
1294                                     serdname);
1295                         }
1296                         FREE(serdname);
1297                         return (1);
1298                 }
1299         }
1300 
1301         FREE(serdname);
1302         return (0);
1303 }
1304 
1305 /*
1306  * search a suspect list for upsets.  feed each upset to serd_eval() and
1307  * build up tripped[], an array of ereports produced by the firing of
1308  * any SERD engines.  then feed each ereport back into
1309  * fme_receive_report().
1310  *
1311  * returns ntrip, the number of these ereports produced.
1312  */
1313 static int
1314 upsets_eval(struct fme *fmep, fmd_event_t *ffep)
1315 {
1316         /* we build an array of tripped ereports that we send ourselves */
1317         struct {
1318                 const char *ename;
1319                 const struct ipath *ipp;
1320         } *tripped;
1321         struct event *sp;
1322         int ntrip, nupset, i;
1323 
1324         /*
1325          * count the number of upsets to determine the upper limit on
1326          * expected trip ereport strings.  remember that one upset can
1327          * lead to at most one ereport.
1328          */
1329         nupset = 0;
1330         for (sp = fmep->suspects; sp; sp = sp->suspects) {
1331                 if (sp->t == N_UPSET)
1332                         nupset++;
1333         }
1334 
1335         if (nupset == 0)
1336                 return (0);
1337 
1338         /*
1339          * get to this point if we have upsets and expect some trip
1340          * ereports
1341          */
1342         tripped = alloca(sizeof (*tripped) * nupset);
1343         bzero((void *)tripped, sizeof (*tripped) * nupset);
1344 
1345         ntrip = 0;
1346         for (sp = fmep->suspects; sp; sp = sp->suspects)
1347                 if (sp->t == N_UPSET &&
1348                     serd_eval(fmep, fmep->hdl, ffep, fmep->fmcase, sp,
1349                     &tripped[ntrip].ename, &tripped[ntrip].ipp) == 1)
1350                         ntrip++;
1351 
1352         for (i = 0; i < ntrip; i++) {
1353                 struct event *ep, *nep;
1354                 struct fme *nfmep;
1355                 fmd_case_t *fmcase;
1356                 const struct ipath *ipp;
1357                 const char *eventstring;
1358                 int prev_verbose;
1359                 unsigned long long my_delay = TIMEVAL_EVENTUALLY;
1360                 enum fme_state state;
1361 
1362                 /*
1363                  * First try and evaluate a case with the trip ereport plus
1364                  * all the other ereports that cause the trip. If that fails
1365                  * to evaluate then try again with just this ereport on its own.
1366                  */
1367                 out(O_ALTFP|O_NONL, "fme_receive_report_serd: ");
1368                 ipath_print(O_ALTFP|O_NONL, tripped[i].ename, tripped[i].ipp);
1369                 out(O_ALTFP|O_STAMP, NULL);
1370                 ep = fmep->e0;
1371                 eventstring = ep->enode->u.event.ename->u.name.s;
1372                 ipp = ep->ipp;
1373 
1374                 /*
1375                  * create a duplicate fme and case
1376                  */
1377                 fmcase = fmd_case_open(fmep->hdl, NULL);
1378                 out(O_ALTFP|O_NONL, "duplicate fme for event [");
1379                 ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1380                 out(O_ALTFP, " ]");
1381 
1382                 if ((nfmep = newfme(eventstring, ipp, fmep->hdl,
1383                     fmcase, ffep, ep->nvp)) == NULL) {
1384                         out(O_ALTFP|O_NONL, "[");
1385                         ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1386                         out(O_ALTFP, " CANNOT DIAGNOSE]");
1387                         continue;
1388                 }
1389 
1390                 Open_fme_count++;
1391                 nfmep->pull = fmep->pull;
1392                 init_fme_bufs(nfmep);
1393                 out(O_ALTFP|O_NONL, "[");
1394                 ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1395                 out(O_ALTFP, " created FME%d, case %s]", nfmep->id,
1396                     fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
1397                 if (ffep) {
1398                         fmd_case_setprincipal(nfmep->hdl, nfmep->fmcase, ffep);
1399                         fmd_case_add_ereport(nfmep->hdl, nfmep->fmcase, ffep);
1400                         nfmep->e0r = ffep;
1401                 }
1402 
1403                 /*
1404                  * add the original ereports
1405                  */
1406                 for (ep = fmep->observations; ep; ep = ep->observations) {
1407                         eventstring = ep->enode->u.event.ename->u.name.s;
1408                         ipp = ep->ipp;
1409                         out(O_ALTFP|O_NONL, "adding event [");
1410                         ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1411                         out(O_ALTFP, " ]");
1412                         nep = itree_lookup(nfmep->eventtree, eventstring, ipp);
1413                         if (nep->count++ == 0) {
1414                                 nep->observations = nfmep->observations;
1415                                 nfmep->observations = nep;
1416                                 serialize_observation(nfmep, eventstring, ipp);
1417                                 nep->nvp = evnv_dupnvl(ep->nvp);
1418                         }
1419                         if (ep->ffep && ep->ffep != ffep)
1420                                 fmd_case_add_ereport(nfmep->hdl, nfmep->fmcase,
1421                                     ep->ffep);
1422                         stats_counter_bump(nfmep->Rcount);
1423                 }
1424 
1425                 /*
1426                  * add the serd trigger ereport
1427                  */
1428                 if ((ep = itree_lookup(nfmep->eventtree, tripped[i].ename,
1429                     tripped[i].ipp)) == NULL) {
1430                         /*
1431                          * The trigger ereport is not in the instance tree. It
1432                          * was presumably removed by prune_propagations() as
1433                          * this combination of events is not present in the
1434                          * rules.
1435                          */
1436                         out(O_ALTFP, "upsets_eval: e0 not in instance tree");
1437                         Undiag_reason = UD_VAL_BADEVENTI;
1438                         goto retry_lone_ereport;
1439                 }
1440                 out(O_ALTFP|O_NONL, "adding event [");
1441                 ipath_print(O_ALTFP|O_NONL, tripped[i].ename, tripped[i].ipp);
1442                 out(O_ALTFP, " ]");
1443                 nfmep->ecurrent = ep;
1444                 ep->nvp = NULL;
1445                 ep->count = 1;
1446                 ep->observations = nfmep->observations;
1447                 nfmep->observations = ep;
1448 
1449                 /*
1450                  * just peek first.
1451                  */
1452                 nfmep->peek = 1;
1453                 prev_verbose = Verbose;
1454                 if (Debug == 0)
1455                         Verbose = 0;
1456                 lut_walk(nfmep->eventtree, (lut_cb)clear_arrows, (void *)nfmep);
1457                 state = hypothesise(nfmep, nfmep->e0, nfmep->ull, &my_delay);
1458                 nfmep->peek = 0;
1459                 Verbose = prev_verbose;
1460                 if (state == FME_DISPROVED) {
1461                         out(O_ALTFP, "upsets_eval: hypothesis disproved");
1462                         Undiag_reason = UD_VAL_UNSOLVD;
1463 retry_lone_ereport:
1464                         /*
1465                          * However the trigger ereport on its own might be
1466                          * diagnosable, so check for that. Undo the new fme
1467                          * and case we just created and call fme_receive_report.
1468                          */
1469                         out(O_ALTFP|O_NONL, "[");
1470                         ipath_print(O_ALTFP|O_NONL, tripped[i].ename,
1471                             tripped[i].ipp);
1472                         out(O_ALTFP, " retrying with just trigger ereport]");
1473                         itree_free(nfmep->eventtree);
1474                         nfmep->eventtree = NULL;
1475                         structconfig_free(nfmep->config);
1476                         nfmep->config = NULL;
1477                         destroy_fme_bufs(nfmep);
1478                         fmd_case_close(nfmep->hdl, nfmep->fmcase);
1479                         fme_receive_report(fmep->hdl, ffep,
1480                             tripped[i].ename, tripped[i].ipp, NULL);
1481                         continue;
1482                 }
1483 
1484                 /*
1485                  * and evaluate
1486                  */
1487                 serialize_observation(nfmep, tripped[i].ename, tripped[i].ipp);
1488                 fme_eval(nfmep, ffep);
1489         }
1490 
1491         return (ntrip);
1492 }
1493 
1494 /*
1495  * fme_receive_external_report -- call when an external ereport comes in
1496  *
1497  * this routine just converts the relevant information from the ereport
1498  * into a format used internally and passes it on to fme_receive_report().
1499  */
1500 void
1501 fme_receive_external_report(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl,
1502     const char *class)
1503 {
1504         struct node             *epnamenp;
1505         fmd_case_t              *fmcase;
1506         const struct ipath      *ipp;
1507         nvlist_t                *detector = NULL;
1508 
1509         class = stable(class);
1510 
1511         /* Get the component path from the ereport */
1512         epnamenp = platform_getpath(nvl);
1513 
1514         /* See if we ended up without a path. */
1515         if (epnamenp == NULL) {
1516                 /* See if class permits silent discard on unknown component. */
1517                 if (lut_lookup(Ereportenames_discard, (void *)class, NULL)) {
1518                         out(O_ALTFP|O_VERB2, "Unable to map \"%s\" ereport "
1519                             "to component path, but silent discard allowed.",
1520                             class);
1521                 } else {
1522                         /*
1523                          * XFILE: Failure to find a component is bad unless
1524                          * 'discard_if_config_unknown=1' was specified in the
1525                          * ereport definition. Indicate undiagnosable.
1526                          */
1527                         Undiag_reason = UD_VAL_NOPATH;
1528                         fmcase = fmd_case_open(hdl, NULL);
1529 
1530                         /*
1531                          * We don't have a component path here (which means that
1532                          * the detector was not in hc-scheme and couldn't be
1533                          * converted to hc-scheme. Report the raw detector as
1534                          * the suspect resource if there is one.
1535                          */
1536                         (void) nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR,
1537                             &detector);
1538                         publish_undiagnosable(hdl, ffep, fmcase, detector,
1539                             (char *)class);
1540                 }
1541                 return;
1542         }
1543 
1544         ipp = ipath(epnamenp);
1545         tree_free(epnamenp);
1546         fme_receive_report(hdl, ffep, class, ipp, nvl);
1547 }
1548 
1549 /*ARGSUSED*/
1550 void
1551 fme_receive_repair_list(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl,
1552     const char *eventstring)
1553 {
1554         char *uuid;
1555         nvlist_t **nva;
1556         uint_t nvc;
1557         const struct ipath *ipp;
1558 
1559         if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) != 0 ||
1560             nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST,
1561             &nva, &nvc) != 0) {
1562                 out(O_ALTFP, "No uuid or fault list for list.repaired event");
1563                 return;
1564         }
1565 
1566         out(O_ALTFP, "Processing list.repaired from case %s", uuid);
1567 
1568         while (nvc-- != 0) {
1569                 /*
1570                  * Reset any istat or serd engine associated with this path.
1571                  */
1572                 char *path;
1573 
1574                 if ((ipp = platform_fault2ipath(*nva++)) == NULL)
1575                         continue;
1576 
1577                 path = ipath2str(NULL, ipp);
1578                 out(O_ALTFP, "fme_receive_repair_list: resetting state for %s",
1579                     path);
1580                 FREE(path);
1581 
1582                 lut_walk(Istats, (lut_cb)istat_counter_reset_cb, (void *)ipp);
1583                 istat_save();
1584 
1585                 lut_walk(SerdEngines, (lut_cb)serd_reset_cb, (void *)ipp);
1586                 serd_save();
1587         }
1588 }
1589 
1590 /*ARGSUSED*/
1591 void
1592 fme_receive_topology_change(void)
1593 {
1594         lut_walk(Istats, (lut_cb)istat_counter_topo_chg_cb, NULL);
1595         istat_save();
1596 
1597         lut_walk(SerdEngines, (lut_cb)serd_topo_chg_cb, NULL);
1598         serd_save();
1599 }
1600 
1601 static int mark_arrows(struct fme *fmep, struct event *ep, int mark,
1602     unsigned long long at_latest_by, unsigned long long *pdelay, int keep);
1603 
1604 /* ARGSUSED */
1605 static void
1606 clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
1607 {
1608         struct bubble *bp;
1609         struct arrowlist *ap;
1610 
1611         ep->cached_state = 0;
1612         ep->keep_in_tree = 0;
1613         for (bp = itree_next_bubble(ep, NULL); bp;
1614             bp = itree_next_bubble(ep, bp)) {
1615                 if (bp->t != B_FROM)
1616                         continue;
1617                 bp->mark = 0;
1618                 for (ap = itree_next_arrow(bp, NULL); ap;
1619                     ap = itree_next_arrow(bp, ap))
1620                         ap->arrowp->mark = 0;
1621         }
1622 }
1623 
1624 static void
1625 fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep,
1626     const char *eventstring, const struct ipath *ipp, nvlist_t *nvl)
1627 {
1628         struct event *ep;
1629         struct fme *fmep = NULL;
1630         struct fme *ofmep = NULL;
1631         struct fme *cfmep, *svfmep;
1632         int matched = 0;
1633         nvlist_t *defect;
1634         fmd_case_t *fmcase;
1635         char *reason;
1636 
1637         out(O_ALTFP|O_NONL, "fme_receive_report: ");
1638         ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1639         out(O_ALTFP|O_STAMP, NULL);
1640 
1641         /* decide which FME it goes to */
1642         for (fmep = FMElist; fmep; fmep = fmep->next) {
1643                 int prev_verbose;
1644                 unsigned long long my_delay = TIMEVAL_EVENTUALLY;
1645                 enum fme_state state;
1646                 nvlist_t *pre_peek_nvp = NULL;
1647 
1648                 if (fmep->overflow) {
1649                         if (!(fmd_case_closed(fmep->hdl, fmep->fmcase)))
1650                                 ofmep = fmep;
1651 
1652                         continue;
1653                 }
1654 
1655                 /*
1656                  * ignore solved or closed cases
1657                  */
1658                 if (fmep->posted_suspects ||
1659                     fmd_case_solved(fmep->hdl, fmep->fmcase) ||
1660                     fmd_case_closed(fmep->hdl, fmep->fmcase))
1661                         continue;
1662 
1663                 /* look up event in event tree for this FME */
1664                 if ((ep = itree_lookup(fmep->eventtree,
1665                     eventstring, ipp)) == NULL)
1666                         continue;
1667 
1668                 /* note observation */
1669                 fmep->ecurrent = ep;
1670                 if (ep->count++ == 0) {
1671                         /* link it into list of observations seen */
1672                         ep->observations = fmep->observations;
1673                         fmep->observations = ep;
1674                         ep->nvp = evnv_dupnvl(nvl);
1675                 } else {
1676                         /* use new payload values for peek */
1677                         pre_peek_nvp = ep->nvp;
1678                         ep->nvp = evnv_dupnvl(nvl);
1679                 }
1680 
1681                 /* tell hypothesise() not to mess with suspect list */
1682                 fmep->peek = 1;
1683 
1684                 /* don't want this to be verbose (unless Debug is set) */
1685                 prev_verbose = Verbose;
1686                 if (Debug == 0)
1687                         Verbose = 0;
1688 
1689                 lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep);
1690                 state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay);
1691 
1692                 fmep->peek = 0;
1693 
1694                 /* put verbose flag back */
1695                 Verbose = prev_verbose;
1696 
1697                 if (state != FME_DISPROVED) {
1698                         /* found an FME that explains the ereport */
1699                         matched++;
1700                         out(O_ALTFP|O_NONL, "[");
1701                         ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1702                         out(O_ALTFP, " explained by FME%d]", fmep->id);
1703 
1704                         if (pre_peek_nvp)
1705                                 nvlist_free(pre_peek_nvp);
1706 
1707                         if (ep->count == 1)
1708                                 serialize_observation(fmep, eventstring, ipp);
1709 
1710                         if (ffep) {
1711                                 fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
1712                                 ep->ffep = ffep;
1713                         }
1714 
1715                         stats_counter_bump(fmep->Rcount);
1716 
1717                         /* re-eval FME */
1718                         fme_eval(fmep, ffep);
1719                 } else {
1720 
1721                         /* not a match, undo noting of observation */
1722                         fmep->ecurrent = NULL;
1723                         if (--ep->count == 0) {
1724                                 /* unlink it from observations */
1725                                 fmep->observations = ep->observations;
1726                                 ep->observations = NULL;
1727                                 nvlist_free(ep->nvp);
1728                                 ep->nvp = NULL;
1729                         } else {
1730                                 nvlist_free(ep->nvp);
1731                                 ep->nvp = pre_peek_nvp;
1732                         }
1733                 }
1734         }
1735 
1736         if (matched)
1737                 return; /* explained by at least one existing FME */
1738 
1739         /* clean up closed fmes */
1740         cfmep = ClosedFMEs;
1741         while (cfmep != NULL) {
1742                 svfmep = cfmep->next;
1743                 destroy_fme(cfmep);
1744                 cfmep = svfmep;
1745         }
1746         ClosedFMEs = NULL;
1747 
1748         if (ofmep) {
1749                 out(O_ALTFP|O_NONL, "[");
1750                 ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1751                 out(O_ALTFP, " ADDING TO OVERFLOW FME]");
1752                 if (ffep)
1753                         fmd_case_add_ereport(hdl, ofmep->fmcase, ffep);
1754 
1755                 return;
1756 
1757         } else if (Max_fme && (Open_fme_count >= Max_fme)) {
1758                 out(O_ALTFP|O_NONL, "[");
1759                 ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1760                 out(O_ALTFP, " MAX OPEN FME REACHED]");
1761 
1762                 fmcase = fmd_case_open(hdl, NULL);
1763 
1764                 /* Create overflow fme */
1765                 if ((fmep = newfme(eventstring, ipp, hdl, fmcase, ffep,
1766                     nvl)) == NULL) {
1767                         out(O_ALTFP|O_NONL, "[");
1768                         ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1769                         out(O_ALTFP, " CANNOT OPEN OVERFLOW FME]");
1770                         return;
1771                 }
1772 
1773                 Open_fme_count++;
1774 
1775                 init_fme_bufs(fmep);
1776                 fmep->overflow = B_TRUE;
1777 
1778                 if (ffep)
1779                         fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
1780 
1781                 Undiag_reason = UD_VAL_MAXFME;
1782                 defect = fmd_nvl_create_fault(hdl,
1783                     undiag_2defect_str(Undiag_reason), 100, NULL, NULL, NULL);
1784                 reason = undiag_2reason_str(Undiag_reason, NULL);
1785                 (void) nvlist_add_string(defect, UNDIAG_REASON, reason);
1786                 FREE(reason);
1787                 fmd_case_add_suspect(hdl, fmep->fmcase, defect);
1788                 fmd_case_solve(hdl, fmep->fmcase);
1789                 Undiag_reason = UD_VAL_UNKNOWN;
1790                 return;
1791         }
1792 
1793         /* open a case */
1794         fmcase = fmd_case_open(hdl, NULL);
1795 
1796         /* start a new FME */
1797         if ((fmep = newfme(eventstring, ipp, hdl, fmcase, ffep, nvl)) == NULL) {
1798                 out(O_ALTFP|O_NONL, "[");
1799                 ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1800                 out(O_ALTFP, " CANNOT DIAGNOSE]");
1801                 return;
1802         }
1803 
1804         Open_fme_count++;
1805 
1806         init_fme_bufs(fmep);
1807 
1808         out(O_ALTFP|O_NONL, "[");
1809         ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1810         out(O_ALTFP, " created FME%d, case %s]", fmep->id,
1811             fmd_case_uuid(hdl, fmep->fmcase));
1812 
1813         ep = fmep->e0;
1814         ASSERT(ep != NULL);
1815 
1816         /* note observation */
1817         fmep->ecurrent = ep;
1818         if (ep->count++ == 0) {
1819                 /* link it into list of observations seen */
1820                 ep->observations = fmep->observations;
1821                 fmep->observations = ep;
1822                 ep->nvp = evnv_dupnvl(nvl);
1823                 serialize_observation(fmep, eventstring, ipp);
1824         } else {
1825                 /* new payload overrides any previous */
1826                 nvlist_free(ep->nvp);
1827                 ep->nvp = evnv_dupnvl(nvl);
1828         }
1829 
1830         stats_counter_bump(fmep->Rcount);
1831 
1832         if (ffep) {
1833                 fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
1834                 fmd_case_setprincipal(hdl, fmep->fmcase, ffep);
1835                 fmep->e0r = ffep;
1836                 ep->ffep = ffep;
1837         }
1838 
1839         /* give the diagnosis algorithm a shot at the new FME state */
1840         fme_eval(fmep, ffep);
1841 }
1842 
1843 void
1844 fme_status(int flags)
1845 {
1846         struct fme *fmep;
1847 
1848         if (FMElist == NULL) {
1849                 out(flags, "No fault management exercises underway.");
1850                 return;
1851         }
1852 
1853         for (fmep = FMElist; fmep; fmep = fmep->next)
1854                 fme_print(flags, fmep);
1855 }
1856 
1857 /*
1858  * "indent" routines used mostly for nicely formatted debug output, but also
1859  * for sanity checking for infinite recursion bugs.
1860  */
1861 
1862 #define MAX_INDENT 1024
1863 static const char *indent_s[MAX_INDENT];
1864 static int current_indent;
1865 
1866 static void
1867 indent_push(const char *s)
1868 {
1869         if (current_indent < MAX_INDENT)
1870                 indent_s[current_indent++] = s;
1871         else
1872                 out(O_DIE, "unexpected recursion depth (%d)", current_indent);
1873 }
1874 
1875 static void
1876 indent_set(const char *s)
1877 {
1878         current_indent = 0;
1879         indent_push(s);
1880 }
1881 
1882 static void
1883 indent_pop(void)
1884 {
1885         if (current_indent > 0)
1886                 current_indent--;
1887         else
1888                 out(O_DIE, "recursion underflow");
1889 }
1890 
1891 static void
1892 indent(void)
1893 {
1894         int i;
1895         if (!Verbose)
1896                 return;
1897         for (i = 0; i < current_indent; i++)
1898                 out(O_ALTFP|O_VERB|O_NONL, indent_s[i]);
1899 }
1900 
1901 #define SLNEW           1
1902 #define SLCHANGED       2
1903 #define SLWAIT          3
1904 #define SLDISPROVED     4
1905 
1906 static void
1907 print_suspects(int circumstance, struct fme *fmep)
1908 {
1909         struct event *ep;
1910 
1911         out(O_ALTFP|O_NONL, "[");
1912         if (circumstance == SLCHANGED) {
1913                 out(O_ALTFP|O_NONL, "FME%d diagnosis changed. state: %s, "
1914                     "suspect list:", fmep->id, fme_state2str(fmep->state));
1915         } else if (circumstance == SLWAIT) {
1916                 out(O_ALTFP|O_NONL, "FME%d set wait timer %ld ", fmep->id,
1917                     fmep->timer);
1918                 ptree_timeval(O_ALTFP|O_NONL, &fmep->wull);
1919         } else if (circumstance == SLDISPROVED) {
1920                 out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS UNKNOWN", fmep->id);
1921         } else {
1922                 out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS PRODUCED:", fmep->id);
1923         }
1924 
1925         if (circumstance == SLWAIT || circumstance == SLDISPROVED) {
1926                 out(O_ALTFP, "]");
1927                 return;
1928         }
1929 
1930         for (ep = fmep->suspects; ep; ep = ep->suspects) {
1931                 out(O_ALTFP|O_NONL, " ");
1932                 itree_pevent_brief(O_ALTFP|O_NONL, ep);
1933         }
1934         out(O_ALTFP, "]");
1935 }
1936 
1937 static struct node *
1938 eventprop_lookup(struct event *ep, const char *propname)
1939 {
1940         return (lut_lookup(ep->props, (void *)propname, NULL));
1941 }
1942 
1943 #define MAXDIGITIDX     23
1944 static char numbuf[MAXDIGITIDX + 1];
1945 
1946 static int
1947 node2uint(struct node *n, uint_t *valp)
1948 {
1949         struct evalue value;
1950         struct lut *globals = NULL;
1951 
1952         if (n == NULL)
1953                 return (1);
1954 
1955         /*
1956          * check value.v since we are being asked to convert an unsigned
1957          * long long int to an unsigned int
1958          */
1959         if (! eval_expr(n, NULL, NULL, &globals, NULL, NULL, 0, &value) ||
1960             value.t != UINT64 || value.v > (1ULL << 32))
1961                 return (1);
1962 
1963         *valp = (uint_t)value.v;
1964 
1965         return (0);
1966 }
1967 
1968 static nvlist_t *
1969 node2fmri(struct node *n)
1970 {
1971         nvlist_t **pa, *f, *p;
1972         struct node *nc;
1973         uint_t depth = 0;
1974         char *numstr, *nullbyte;
1975         char *failure;
1976         int err, i;
1977 
1978         /* XXX do we need to be able to handle a non-T_NAME node? */
1979         if (n == NULL || n->t != T_NAME)
1980                 return (NULL);
1981 
1982         for (nc = n; nc != NULL; nc = nc->u.name.next) {
1983                 if (nc->u.name.child == NULL || nc->u.name.child->t != T_NUM)
1984                         break;
1985                 depth++;
1986         }
1987 
1988         if (nc != NULL) {
1989                 /* We bailed early, something went wrong */
1990                 return (NULL);
1991         }
1992 
1993         if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0)
1994                 out(O_DIE|O_SYS, "alloc of fmri nvl failed");
1995         pa = alloca(depth * sizeof (nvlist_t *));
1996         for (i = 0; i < depth; i++)
1997                 pa[i] = NULL;
1998 
1999         err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC);
2000         err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION);
2001         err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, "");
2002         err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth);
2003         if (err != 0) {
2004                 failure = "basic construction of FMRI failed";
2005                 goto boom;
2006         }
2007 
2008         numbuf[MAXDIGITIDX] = '\0';
2009         nullbyte = &numbuf[MAXDIGITIDX];
2010         i = 0;
2011 
2012         for (nc = n; nc != NULL; nc = nc->u.name.next) {
2013                 err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl);
2014                 if (err != 0) {
2015                         failure = "alloc of an hc-pair failed";
2016                         goto boom;
2017                 }
2018                 err = nvlist_add_string(p, FM_FMRI_HC_NAME, nc->u.name.s);
2019                 numstr = ulltostr(nc->u.name.child->u.ull, nullbyte);
2020                 err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr);
2021                 if (err != 0) {
2022                         failure = "construction of an hc-pair failed";
2023                         goto boom;
2024                 }
2025                 pa[i++] = p;
2026         }
2027 
2028         err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth);
2029         if (err == 0) {
2030                 for (i = 0; i < depth; i++)
2031                         if (pa[i] != NULL)
2032                                 nvlist_free(pa[i]);
2033                 return (f);
2034         }
2035         failure = "addition of hc-pair array to FMRI failed";
2036 
2037 boom:
2038         for (i = 0; i < depth; i++)
2039                 if (pa[i] != NULL)
2040                         nvlist_free(pa[i]);
2041         nvlist_free(f);
2042         out(O_DIE, "%s", failure);
2043         /*NOTREACHED*/
2044         return (NULL);
2045 }
2046 
2047 /* an ipath cache entry is an array of these, with s==NULL at the end */
2048 struct ipath {
2049         const char *s;  /* component name (in stable) */
2050         int i;          /* instance number */
2051 };
2052 
2053 static nvlist_t *
2054 ipath2fmri(struct ipath *ipath)
2055 {
2056         nvlist_t **pa, *f, *p;
2057         uint_t depth = 0;
2058         char *numstr, *nullbyte;
2059         char *failure;
2060         int err, i;
2061         struct ipath *ipp;
2062 
2063         for (ipp = ipath; ipp->s != NULL; ipp++)
2064                 depth++;
2065 
2066         if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0)
2067                 out(O_DIE|O_SYS, "alloc of fmri nvl failed");
2068         pa = alloca(depth * sizeof (nvlist_t *));
2069         for (i = 0; i < depth; i++)
2070                 pa[i] = NULL;
2071 
2072         err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC);
2073         err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION);
2074         err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, "");
2075         err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth);
2076         if (err != 0) {
2077                 failure = "basic construction of FMRI failed";
2078                 goto boom;
2079         }
2080 
2081         numbuf[MAXDIGITIDX] = '\0';
2082         nullbyte = &numbuf[MAXDIGITIDX];
2083         i = 0;
2084 
2085         for (ipp = ipath; ipp->s != NULL; ipp++) {
2086                 err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl);
2087                 if (err != 0) {
2088                         failure = "alloc of an hc-pair failed";
2089                         goto boom;
2090                 }
2091                 err = nvlist_add_string(p, FM_FMRI_HC_NAME, ipp->s);
2092                 numstr = ulltostr(ipp->i, nullbyte);
2093                 err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr);
2094                 if (err != 0) {
2095                         failure = "construction of an hc-pair failed";
2096                         goto boom;
2097                 }
2098                 pa[i++] = p;
2099         }
2100 
2101         err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth);
2102         if (err == 0) {
2103                 for (i = 0; i < depth; i++)
2104                         if (pa[i] != NULL)
2105                                 nvlist_free(pa[i]);
2106                 return (f);
2107         }
2108         failure = "addition of hc-pair array to FMRI failed";
2109 
2110 boom:
2111         for (i = 0; i < depth; i++)
2112                 if (pa[i] != NULL)
2113                         nvlist_free(pa[i]);
2114         nvlist_free(f);
2115         out(O_DIE, "%s", failure);
2116         /*NOTREACHED*/
2117         return (NULL);
2118 }
2119 
2120 static uint8_t
2121 percentof(uint_t part, uint_t whole)
2122 {
2123         unsigned long long p = part * 1000;
2124 
2125         return ((p / whole / 10) + (((p / whole % 10) >= 5) ? 1 : 0));
2126 }
2127 
2128 struct rsl {
2129         struct event *suspect;
2130         nvlist_t *asru;
2131         nvlist_t *fru;
2132         nvlist_t *rsrc;
2133 };
2134 
2135 static void publish_suspects(struct fme *fmep, struct rsl *srl);
2136 
2137 /*
2138  *  rslfree -- free internal members of struct rsl not expected to be
2139  *      freed elsewhere.
2140  */
2141 static void
2142 rslfree(struct rsl *freeme)
2143 {
2144         if (freeme->asru != NULL)
2145                 nvlist_free(freeme->asru);
2146         if (freeme->fru != NULL)
2147                 nvlist_free(freeme->fru);
2148         if (freeme->rsrc != NULL && freeme->rsrc != freeme->asru)
2149                 nvlist_free(freeme->rsrc);
2150 }
2151 
2152 /*
2153  *  rslcmp -- compare two rsl structures.  Use the following
2154  *      comparisons to establish cardinality:
2155  *
2156  *      1. Name of the suspect's class. (simple strcmp)
2157  *      2. Name of the suspect's ASRU. (trickier, since nvlist)
2158  *
2159  */
2160 static int
2161 rslcmp(const void *a, const void *b)
2162 {
2163         struct rsl *r1 = (struct rsl *)a;
2164         struct rsl *r2 = (struct rsl *)b;
2165         int rv;
2166 
2167         rv = strcmp(r1->suspect->enode->u.event.ename->u.name.s,
2168             r2->suspect->enode->u.event.ename->u.name.s);
2169         if (rv != 0)
2170                 return (rv);
2171 
2172         if (r1->rsrc == NULL && r2->rsrc == NULL)
2173                 return (0);
2174         if (r1->rsrc == NULL)
2175                 return (-1);
2176         if (r2->rsrc == NULL)
2177                 return (1);
2178         return (evnv_cmpnvl(r1->rsrc, r2->rsrc, 0));
2179 }
2180 
2181 /*
2182  * get_resources -- for a given suspect, determine what ASRU, FRU and
2183  *     RSRC nvlists should be advertised in the final suspect list.
2184  */
2185 void
2186 get_resources(struct event *sp, struct rsl *rsrcs, struct config *croot)
2187 {
2188         struct node *asrudef, *frudef;
2189         nvlist_t *asru, *fru;
2190         nvlist_t *rsrc = NULL;
2191         char *pathstr;
2192 
2193         /*
2194          * First find any ASRU and/or FRU defined in the
2195          * initial fault tree.
2196          */
2197         asrudef = eventprop_lookup(sp, L_ASRU);
2198         frudef = eventprop_lookup(sp, L_FRU);
2199 
2200         /*
2201          * Create FMRIs based on those definitions
2202          */
2203         asru = node2fmri(asrudef);
2204         fru = node2fmri(frudef);
2205         pathstr = ipath2str(NULL, sp->ipp);
2206 
2207         /*
2208          *  Allow for platform translations of the FMRIs
2209          */
2210         platform_units_translate(is_defect(sp->t), croot, &asru, &fru, &rsrc,
2211             pathstr);
2212 
2213         FREE(pathstr);
2214         rsrcs->suspect = sp;
2215         rsrcs->asru = asru;
2216         rsrcs->fru = fru;
2217         rsrcs->rsrc = rsrc;
2218 }
2219 
2220 /*
2221  * trim_suspects -- prior to publishing, we may need to remove some
2222  *    suspects from the list.  If we're auto-closing upsets, we don't
2223  *    want any of those in the published list.  If the ASRUs for multiple
2224  *    defects resolve to the same ASRU (driver) we only want to publish
2225  *    that as a single suspect.
2226  */
2227 static int
2228 trim_suspects(struct fme *fmep, struct rsl *begin, struct rsl *begin2,
2229     fmd_event_t *ffep)
2230 {
2231         struct event *ep;
2232         struct rsl *rp = begin;
2233         struct rsl *rp2 = begin2;
2234         int mess_zero_count = 0;
2235         int serd_rval;
2236         uint_t messval;
2237 
2238         /* remove any unwanted upsets and populate our array */
2239         for (ep = fmep->psuspects; ep; ep = ep->psuspects) {
2240                 if (is_upset(ep->t))
2241                         continue;
2242                 serd_rval = serd_eval(fmep, fmep->hdl, ffep, fmep->fmcase, ep,
2243                     NULL, NULL);
2244                 if (serd_rval == 0)
2245                         continue;
2246                 if (node2uint(eventprop_lookup(ep, L_message),
2247                     &messval) == 0 && messval == 0) {
2248                         get_resources(ep, rp2, fmep->config);
2249                         rp2++;
2250                         mess_zero_count++;
2251                 } else {
2252                         get_resources(ep, rp, fmep->config);
2253                         rp++;
2254                         fmep->nsuspects++;
2255                 }
2256         }
2257         return (mess_zero_count);
2258 }
2259 
2260 /*
2261  * addpayloadprop -- add a payload prop to a problem
2262  */
2263 static void
2264 addpayloadprop(const char *lhs, struct evalue *rhs, nvlist_t *fault)
2265 {
2266         nvlist_t *rsrc, *hcs;
2267 
2268         ASSERT(fault != NULL);
2269         ASSERT(lhs != NULL);
2270         ASSERT(rhs != NULL);
2271 
2272         if (nvlist_lookup_nvlist(fault, FM_FAULT_RESOURCE, &rsrc) != 0)
2273                 out(O_DIE, "cannot add payloadprop \"%s\" to fault", lhs);
2274 
2275         if (nvlist_lookup_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, &hcs) != 0) {
2276                 out(O_ALTFP|O_VERB2, "addpayloadprop: create hc_specific");
2277                 if (nvlist_xalloc(&hcs, NV_UNIQUE_NAME, &Eft_nv_hdl) != 0)
2278                         out(O_DIE,
2279                             "cannot add payloadprop \"%s\" to fault", lhs);
2280                 if (nvlist_add_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, hcs) != 0)
2281                         out(O_DIE,
2282                             "cannot add payloadprop \"%s\" to fault", lhs);
2283                 nvlist_free(hcs);
2284                 if (nvlist_lookup_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, &hcs) != 0)
2285                         out(O_DIE,
2286                             "cannot add payloadprop \"%s\" to fault", lhs);
2287         } else
2288                 out(O_ALTFP|O_VERB2, "addpayloadprop: reuse hc_specific");
2289 
2290         if (rhs->t == UINT64) {
2291                 out(O_ALTFP|O_VERB2, "addpayloadprop: %s=%llu", lhs, rhs->v);
2292 
2293                 if (nvlist_add_uint64(hcs, lhs, rhs->v) != 0)
2294                         out(O_DIE,
2295                             "cannot add payloadprop \"%s\" to fault", lhs);
2296         } else {
2297                 out(O_ALTFP|O_VERB2, "addpayloadprop: %s=\"%s\"",
2298                     lhs, (char *)(uintptr_t)rhs->v);
2299 
2300                 if (nvlist_add_string(hcs, lhs, (char *)(uintptr_t)rhs->v) != 0)
2301                         out(O_DIE,
2302                             "cannot add payloadprop \"%s\" to fault", lhs);
2303         }
2304 }
2305 
2306 static char *Istatbuf;
2307 static char *Istatbufptr;
2308 static int Istatsz;
2309 
2310 /*
2311  * istataddsize -- calculate size of istat and add it to Istatsz
2312  */
2313 /*ARGSUSED2*/
2314 static void
2315 istataddsize(const struct istat_entry *lhs, struct stats *rhs, void *arg)
2316 {
2317         int val;
2318 
2319         ASSERT(lhs != NULL);
2320         ASSERT(rhs != NULL);
2321 
2322         if ((val = stats_counter_value(rhs)) == 0)
2323                 return; /* skip zero-valued stats */
2324 
2325         /* count up the size of the stat name */
2326         Istatsz += ipath2strlen(lhs->ename, lhs->ipath);
2327         Istatsz++;      /* for the trailing NULL byte */
2328 
2329         /* count up the size of the stat value */
2330         Istatsz += snprintf(NULL, 0, "%d", val);
2331         Istatsz++;      /* for the trailing NULL byte */
2332 }
2333 
2334 /*
2335  * istat2str -- serialize an istat, writing result to *Istatbufptr
2336  */
2337 /*ARGSUSED2*/
2338 static void
2339 istat2str(const struct istat_entry *lhs, struct stats *rhs, void *arg)
2340 {
2341         char *str;
2342         int len;
2343         int val;
2344 
2345         ASSERT(lhs != NULL);
2346         ASSERT(rhs != NULL);
2347 
2348         if ((val = stats_counter_value(rhs)) == 0)
2349                 return; /* skip zero-valued stats */
2350 
2351         /* serialize the stat name */
2352         str = ipath2str(lhs->ename, lhs->ipath);
2353         len = strlen(str);
2354 
2355         ASSERT(Istatbufptr + len + 1 < &Istatbuf[Istatsz]);
2356         (void) strlcpy(Istatbufptr, str, &Istatbuf[Istatsz] - Istatbufptr);
2357         Istatbufptr += len;
2358         FREE(str);
2359         *Istatbufptr++ = '\0';
2360 
2361         /* serialize the stat value */
2362         Istatbufptr += snprintf(Istatbufptr, &Istatbuf[Istatsz] - Istatbufptr,
2363             "%d", val);
2364         *Istatbufptr++ = '\0';
2365 
2366         ASSERT(Istatbufptr <= &Istatbuf[Istatsz]);
2367 }
2368 
2369 void
2370 istat_save()
2371 {
2372         if (Istat_need_save == 0)
2373                 return;
2374 
2375         /* figure out how big the serialzed info is */
2376         Istatsz = 0;
2377         lut_walk(Istats, (lut_cb)istataddsize, NULL);
2378 
2379         if (Istatsz == 0) {
2380                 /* no stats to save */
2381                 fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS);
2382                 return;
2383         }
2384 
2385         /* create the serialized buffer */
2386         Istatbufptr = Istatbuf = MALLOC(Istatsz);
2387         lut_walk(Istats, (lut_cb)istat2str, NULL);
2388 
2389         /* clear out current saved stats */
2390         fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS);
2391 
2392         /* write out the new version */
2393         fmd_buf_write(Hdl, NULL, WOBUF_ISTATS, Istatbuf, Istatsz);
2394         FREE(Istatbuf);
2395 
2396         Istat_need_save = 0;
2397 }
2398 
2399 int
2400 istat_cmp(struct istat_entry *ent1, struct istat_entry *ent2)
2401 {
2402         if (ent1->ename != ent2->ename)
2403                 return (ent2->ename - ent1->ename);
2404         if (ent1->ipath != ent2->ipath)
2405                 return ((char *)ent2->ipath - (char *)ent1->ipath);
2406 
2407         return (0);
2408 }
2409 
2410 /*
2411  * istat-verify -- verify the component associated with a stat still exists
2412  *
2413  * if the component no longer exists, this routine resets the stat and
2414  * returns 0.  if the component still exists, it returns 1.
2415  */
2416 static int
2417 istat_verify(struct node *snp, struct istat_entry *entp)
2418 {
2419         struct stats *statp;
2420         nvlist_t *fmri;
2421 
2422         fmri = node2fmri(snp->u.event.epname);
2423         if (platform_path_exists(fmri)) {
2424                 nvlist_free(fmri);
2425                 return (1);
2426         }
2427         nvlist_free(fmri);
2428 
2429         /* component no longer in system.  zero out the associated stats */
2430         if ((statp = (struct stats *)
2431             lut_lookup(Istats, entp, (lut_cmp)istat_cmp)) == NULL ||
2432             stats_counter_value(statp) == 0)
2433                 return (0);     /* stat is already reset */
2434 
2435         Istat_need_save = 1;
2436         stats_counter_reset(statp);
2437         return (0);
2438 }
2439 
2440 static void
2441 istat_bump(struct node *snp, int n)
2442 {
2443         struct stats *statp;
2444         struct istat_entry ent;
2445 
2446         ASSERT(snp != NULL);
2447         ASSERTinfo(snp->t == T_EVENT, ptree_nodetype2str(snp->t));
2448         ASSERT(snp->u.event.epname != NULL);
2449 
2450         /* class name should be hoisted into a single stable entry */
2451         ASSERT(snp->u.event.ename->u.name.next == NULL);
2452         ent.ename = snp->u.event.ename->u.name.s;
2453         ent.ipath = ipath(snp->u.event.epname);
2454 
2455         if (!istat_verify(snp, &ent)) {
2456                 /* component no longer exists in system, nothing to do */
2457                 return;
2458         }
2459 
2460         if ((statp = (struct stats *)
2461             lut_lookup(Istats, &ent, (lut_cmp)istat_cmp)) == NULL) {
2462                 /* need to create the counter */
2463                 int cnt = 0;
2464                 struct node *np;
2465                 char *sname;
2466                 char *snamep;
2467                 struct istat_entry *newentp;
2468 
2469                 /* count up the size of the stat name */
2470                 np = snp->u.event.ename;
2471                 while (np != NULL) {
2472                         cnt += strlen(np->u.name.s);
2473                         cnt++;  /* for the '.' or '@' */
2474                         np = np->u.name.next;
2475                 }
2476                 np = snp->u.event.epname;
2477                 while (np != NULL) {
2478                         cnt += snprintf(NULL, 0, "%s%llu",
2479                             np->u.name.s, np->u.name.child->u.ull);
2480                         cnt++;  /* for the '/' or trailing NULL byte */
2481                         np = np->u.name.next;
2482                 }
2483 
2484                 /* build the stat name */
2485                 snamep = sname = alloca(cnt);
2486                 np = snp->u.event.ename;
2487                 while (np != NULL) {
2488                         snamep += snprintf(snamep, &sname[cnt] - snamep,
2489                             "%s", np->u.name.s);
2490                         np = np->u.name.next;
2491                         if (np)
2492                                 *snamep++ = '.';
2493                 }
2494                 *snamep++ = '@';
2495                 np = snp->u.event.epname;
2496                 while (np != NULL) {
2497                         snamep += snprintf(snamep, &sname[cnt] - snamep,
2498                             "%s%llu", np->u.name.s, np->u.name.child->u.ull);
2499                         np = np->u.name.next;
2500                         if (np)
2501                                 *snamep++ = '/';
2502                 }
2503                 *snamep++ = '\0';
2504 
2505                 /* create the new stat & add it to our list */
2506                 newentp = MALLOC(sizeof (*newentp));
2507                 *newentp = ent;
2508                 statp = stats_new_counter(NULL, sname, 0);
2509                 Istats = lut_add(Istats, (void *)newentp, (void *)statp,
2510                     (lut_cmp)istat_cmp);
2511         }
2512 
2513         /* if n is non-zero, set that value instead of bumping */
2514         if (n) {
2515                 stats_counter_reset(statp);
2516                 stats_counter_add(statp, n);
2517         } else
2518                 stats_counter_bump(statp);
2519         Istat_need_save = 1;
2520 
2521         ipath_print(O_ALTFP|O_VERB2, ent.ename, ent.ipath);
2522         out(O_ALTFP|O_VERB2, " %s to value %d", n ? "set" : "incremented",
2523             stats_counter_value(statp));
2524 }
2525 
2526 /*ARGSUSED*/
2527 static void
2528 istat_destructor(void *left, void *right, void *arg)
2529 {
2530         struct istat_entry *entp = (struct istat_entry *)left;
2531         struct stats *statp = (struct stats *)right;
2532         FREE(entp);
2533         stats_delete(statp);
2534 }
2535 
2536 /*
2537  * Callback used in a walk of the Istats to reset matching stat counters.
2538  */
2539 static void
2540 istat_counter_reset_cb(struct istat_entry *entp, struct stats *statp,
2541     const struct ipath *ipp)
2542 {
2543         char *path;
2544 
2545         if (entp->ipath == ipp) {
2546                 path = ipath2str(entp->ename, ipp);
2547                 out(O_ALTFP, "istat_counter_reset_cb: resetting %s", path);
2548                 FREE(path);
2549                 stats_counter_reset(statp);
2550                 Istat_need_save = 1;
2551         }
2552 }
2553 
2554 /*ARGSUSED*/
2555 static void
2556 istat_counter_topo_chg_cb(struct istat_entry *entp, struct stats *statp,
2557     void *unused)
2558 {
2559         char *path;
2560         nvlist_t *fmri;
2561 
2562         fmri = ipath2fmri((struct ipath *)(entp->ipath));
2563         if (!platform_path_exists(fmri)) {
2564                 path = ipath2str(entp->ename, entp->ipath);
2565                 out(O_ALTFP, "istat_counter_topo_chg_cb: not present %s", path);
2566                 FREE(path);
2567                 stats_counter_reset(statp);
2568                 Istat_need_save = 1;
2569         }
2570         nvlist_free(fmri);
2571 }
2572 
2573 void
2574 istat_fini(void)
2575 {
2576         lut_free(Istats, istat_destructor, NULL);
2577 }
2578 
2579 static char *Serdbuf;
2580 static char *Serdbufptr;
2581 static int Serdsz;
2582 
2583 /*
2584  * serdaddsize -- calculate size of serd and add it to Serdsz
2585  */
2586 /*ARGSUSED*/
2587 static void
2588 serdaddsize(const struct serd_entry *lhs, struct stats *rhs, void *arg)
2589 {
2590         ASSERT(lhs != NULL);
2591 
2592         /* count up the size of the stat name */
2593         Serdsz += ipath2strlen(lhs->ename, lhs->ipath);
2594         Serdsz++;       /* for the trailing NULL byte */
2595 }
2596 
2597 /*
2598  * serd2str -- serialize a serd engine, writing result to *Serdbufptr
2599  */
2600 /*ARGSUSED*/
2601 static void
2602 serd2str(const struct serd_entry *lhs, struct stats *rhs, void *arg)
2603 {
2604         char *str;
2605         int len;
2606 
2607         ASSERT(lhs != NULL);
2608 
2609         /* serialize the serd engine name */
2610         str = ipath2str(lhs->ename, lhs->ipath);
2611         len = strlen(str);
2612 
2613         ASSERT(Serdbufptr + len + 1 <= &Serdbuf[Serdsz]);
2614         (void) strlcpy(Serdbufptr, str, &Serdbuf[Serdsz] - Serdbufptr);
2615         Serdbufptr += len;
2616         FREE(str);
2617         *Serdbufptr++ = '\0';
2618         ASSERT(Serdbufptr <= &Serdbuf[Serdsz]);
2619 }
2620 
2621 void
2622 serd_save()
2623 {
2624         if (Serd_need_save == 0)
2625                 return;
2626 
2627         /* figure out how big the serialzed info is */
2628         Serdsz = 0;
2629         lut_walk(SerdEngines, (lut_cb)serdaddsize, NULL);
2630 
2631         if (Serdsz == 0) {
2632                 /* no serd engines to save */
2633                 fmd_buf_destroy(Hdl, NULL, WOBUF_SERDS);
2634                 return;
2635         }
2636 
2637         /* create the serialized buffer */
2638         Serdbufptr = Serdbuf = MALLOC(Serdsz);
2639         lut_walk(SerdEngines, (lut_cb)serd2str, NULL);
2640 
2641         /* clear out current saved stats */
2642         fmd_buf_destroy(Hdl, NULL, WOBUF_SERDS);
2643 
2644         /* write out the new version */
2645         fmd_buf_write(Hdl, NULL, WOBUF_SERDS, Serdbuf, Serdsz);
2646         FREE(Serdbuf);
2647         Serd_need_save = 0;
2648 }
2649 
2650 int
2651 serd_cmp(struct serd_entry *ent1, struct serd_entry *ent2)
2652 {
2653         if (ent1->ename != ent2->ename)
2654                 return (ent2->ename - ent1->ename);
2655         if (ent1->ipath != ent2->ipath)
2656                 return ((char *)ent2->ipath - (char *)ent1->ipath);
2657 
2658         return (0);
2659 }
2660 
2661 void
2662 fme_serd_load(fmd_hdl_t *hdl)
2663 {
2664         int sz;
2665         char *sbuf;
2666         char *sepptr;
2667         char *ptr;
2668         struct serd_entry *newentp;
2669         struct node *epname;
2670         nvlist_t *fmri;
2671         char *namestring;
2672 
2673         if ((sz = fmd_buf_size(hdl, NULL, WOBUF_SERDS)) == 0)
2674                 return;
2675         sbuf = alloca(sz);
2676         fmd_buf_read(hdl, NULL, WOBUF_SERDS, sbuf, sz);
2677         ptr = sbuf;
2678         while (ptr < &sbuf[sz]) {
2679                 sepptr = strchr(ptr, '@');
2680                 *sepptr = '\0';
2681                 namestring = ptr;
2682                 sepptr++;
2683                 ptr = sepptr;
2684                 ptr += strlen(ptr);
2685                 ptr++;  /* move past the '\0' separating paths */
2686                 epname = pathstring2epnamenp(sepptr);
2687                 fmri = node2fmri(epname);
2688                 if (platform_path_exists(fmri)) {
2689                         newentp = MALLOC(sizeof (*newentp));
2690                         newentp->hdl = hdl;
2691                         newentp->ipath = ipath(epname);
2692                         newentp->ename = stable(namestring);
2693                         SerdEngines = lut_add(SerdEngines, (void *)newentp,
2694                             (void *)newentp, (lut_cmp)serd_cmp);
2695                 } else
2696                         Serd_need_save = 1;
2697                 tree_free(epname);
2698                 nvlist_free(fmri);
2699         }
2700         /* save it back again in case some of the paths no longer exist */
2701         serd_save();
2702 }
2703 
2704 /*ARGSUSED*/
2705 static void
2706 serd_destructor(void *left, void *right, void *arg)
2707 {
2708         struct serd_entry *entp = (struct serd_entry *)left;
2709         FREE(entp);
2710 }
2711 
2712 /*
2713  * Callback used in a walk of the SerdEngines to reset matching serd engines.
2714  */
2715 /*ARGSUSED*/
2716 static void
2717 serd_reset_cb(struct serd_entry *entp, void *unused, const struct ipath *ipp)
2718 {
2719         char *path;
2720 
2721         if (entp->ipath == ipp) {
2722                 path = ipath2str(entp->ename, ipp);
2723                 out(O_ALTFP, "serd_reset_cb: resetting %s", path);
2724                 fmd_serd_reset(entp->hdl, path);
2725                 FREE(path);
2726                 Serd_need_save = 1;
2727         }
2728 }
2729 
2730 /*ARGSUSED*/
2731 static void
2732 serd_topo_chg_cb(struct serd_entry *entp, void *unused, void *unused2)
2733 {
2734         char *path;
2735         nvlist_t *fmri;
2736 
2737         fmri = ipath2fmri((struct ipath *)(entp->ipath));
2738         if (!platform_path_exists(fmri)) {
2739                 path = ipath2str(entp->ename, entp->ipath);
2740                 out(O_ALTFP, "serd_topo_chg_cb: not present %s", path);
2741                 fmd_serd_reset(entp->hdl, path);
2742                 FREE(path);
2743                 Serd_need_save = 1;
2744         }
2745         nvlist_free(fmri);
2746 }
2747 
2748 void
2749 serd_fini(void)
2750 {
2751         lut_free(SerdEngines, serd_destructor, NULL);
2752 }
2753 
2754 static void
2755 publish_suspects(struct fme *fmep, struct rsl *srl)
2756 {
2757         struct rsl *rp;
2758         nvlist_t *fault;
2759         uint8_t cert;
2760         uint_t *frs;
2761         uint_t frsum, fr;
2762         uint_t messval;
2763         uint_t retireval;
2764         uint_t responseval;
2765         struct node *snp;
2766         int frcnt, fridx;
2767         boolean_t allfaulty = B_TRUE;
2768         struct rsl *erl = srl + fmep->nsuspects - 1;
2769 
2770         /*
2771          * sort the array
2772          */
2773         qsort(srl, fmep->nsuspects, sizeof (struct rsl), rslcmp);
2774 
2775         /* sum the fitrates */
2776         frs = alloca(fmep->nsuspects * sizeof (uint_t));
2777         fridx = frcnt = frsum = 0;
2778 
2779         for (rp = srl; rp <= erl; rp++) {
2780                 struct node *n;
2781 
2782                 n = eventprop_lookup(rp->suspect, L_FITrate);
2783                 if (node2uint(n, &fr) != 0) {
2784                         out(O_DEBUG|O_NONL, "event ");
2785                         ipath_print(O_DEBUG|O_NONL,
2786                             rp->suspect->enode->u.event.ename->u.name.s,
2787                             rp->suspect->ipp);
2788                         out(O_VERB, " has no FITrate (using 1)");
2789                         fr = 1;
2790                 } else if (fr == 0) {
2791                         out(O_DEBUG|O_NONL, "event ");
2792                         ipath_print(O_DEBUG|O_NONL,
2793                             rp->suspect->enode->u.event.ename->u.name.s,
2794                             rp->suspect->ipp);
2795                         out(O_VERB, " has zero FITrate (using 1)");
2796                         fr = 1;
2797                 }
2798 
2799                 frs[fridx++] = fr;
2800                 frsum += fr;
2801                 frcnt++;
2802         }
2803 
2804         /* Add them in reverse order of our sort, as fmd reverses order */
2805         for (rp = erl; rp >= srl; rp--) {
2806                 cert = percentof(frs[--fridx], frsum);
2807                 fault = fmd_nvl_create_fault(fmep->hdl,
2808                     rp->suspect->enode->u.event.ename->u.name.s,
2809                     cert,
2810                     rp->asru,
2811                     rp->fru,
2812                     rp->rsrc);
2813                 if (fault == NULL)
2814                         out(O_DIE, "fault creation failed");
2815                 /* if "message" property exists, add it to the fault */
2816                 if (node2uint(eventprop_lookup(rp->suspect, L_message),
2817                     &messval) == 0) {
2818 
2819                         out(O_ALTFP,
2820                             "[FME%d, %s adds message=%d to suspect list]",
2821                             fmep->id,
2822                             rp->suspect->enode->u.event.ename->u.name.s,
2823                             messval);
2824                         if (nvlist_add_boolean_value(fault,
2825                             FM_SUSPECT_MESSAGE,
2826                             (messval) ? B_TRUE : B_FALSE) != 0) {
2827                                 out(O_DIE, "cannot add no-message to fault");
2828                         }
2829                 }
2830 
2831                 /* if "retire" property exists, add it to the fault */
2832                 if (node2uint(eventprop_lookup(rp->suspect, L_retire),
2833                     &retireval) == 0) {
2834 
2835                         out(O_ALTFP,
2836                             "[FME%d, %s adds retire=%d to suspect list]",
2837                             fmep->id,
2838                             rp->suspect->enode->u.event.ename->u.name.s,
2839                             retireval);
2840                         if (nvlist_add_boolean_value(fault,
2841                             FM_SUSPECT_RETIRE,
2842                             (retireval) ? B_TRUE : B_FALSE) != 0) {
2843                                 out(O_DIE, "cannot add no-retire to fault");
2844                         }
2845                 }
2846 
2847                 /* if "response" property exists, add it to the fault */
2848                 if (node2uint(eventprop_lookup(rp->suspect, L_response),
2849                     &responseval) == 0) {
2850 
2851                         out(O_ALTFP,
2852                             "[FME%d, %s adds response=%d to suspect list]",
2853                             fmep->id,
2854                             rp->suspect->enode->u.event.ename->u.name.s,
2855                             responseval);
2856                         if (nvlist_add_boolean_value(fault,
2857                             FM_SUSPECT_RESPONSE,
2858                             (responseval) ? B_TRUE : B_FALSE) != 0) {
2859                                 out(O_DIE, "cannot add no-response to fault");
2860                         }
2861                 }
2862 
2863                 /* add any payload properties */
2864                 lut_walk(rp->suspect->payloadprops,
2865                     (lut_cb)addpayloadprop, (void *)fault);
2866                 rslfree(rp);
2867 
2868                 /*
2869                  * If "action" property exists, evaluate it;  this must be done
2870                  * before the allfaulty check below since some actions may
2871                  * modify the asru to be used in fmd_nvl_fmri_has_fault.  This
2872                  * needs to be restructured if any new actions are introduced
2873                  * that have effects that we do not want to be visible if
2874                  * we decide not to publish in the dupclose check below.
2875                  */
2876                 if ((snp = eventprop_lookup(rp->suspect, L_action)) != NULL) {
2877                         struct evalue evalue;
2878 
2879                         out(O_ALTFP|O_NONL,
2880                             "[FME%d, %s action ", fmep->id,
2881                             rp->suspect->enode->u.event.ename->u.name.s);
2882                         ptree_name_iter(O_ALTFP|O_NONL, snp);
2883                         out(O_ALTFP, "]");
2884                         Action_nvl = fault;
2885                         (void) eval_expr(snp, NULL, NULL, NULL, NULL,
2886                             NULL, 0, &evalue);
2887                 }
2888 
2889                 fmd_case_add_suspect(fmep->hdl, fmep->fmcase, fault);
2890 
2891                 /*
2892                  * check if the asru is already marked as "faulty".
2893                  */
2894                 if (allfaulty) {
2895                         nvlist_t *asru;
2896 
2897                         out(O_ALTFP|O_VERB, "FME%d dup check ", fmep->id);
2898                         itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, rp->suspect);
2899                         out(O_ALTFP|O_VERB|O_NONL, " ");
2900                         if (nvlist_lookup_nvlist(fault,
2901                             FM_FAULT_ASRU, &asru) != 0) {
2902                                 out(O_ALTFP|O_VERB, "NULL asru");
2903                                 allfaulty = B_FALSE;
2904                         } else if (fmd_nvl_fmri_has_fault(fmep->hdl, asru,
2905                             FMD_HAS_FAULT_ASRU, NULL)) {
2906                                 out(O_ALTFP|O_VERB, "faulty");
2907                         } else {
2908                                 out(O_ALTFP|O_VERB, "not faulty");
2909                                 allfaulty = B_FALSE;
2910                         }
2911                 }
2912 
2913         }
2914 
2915         if (!allfaulty) {
2916                 /*
2917                  * don't update the count stat if all asrus are already
2918                  * present and unrepaired in the asru cache
2919                  */
2920                 for (rp = erl; rp >= srl; rp--) {
2921                         struct event *suspect = rp->suspect;
2922 
2923                         if (suspect == NULL)
2924                                 continue;
2925 
2926                         /* if "count" exists, increment the appropriate stat */
2927                         if ((snp = eventprop_lookup(suspect,
2928                             L_count)) != NULL) {
2929                                 out(O_ALTFP|O_NONL,
2930                                     "[FME%d, %s count ", fmep->id,
2931                                     suspect->enode->u.event.ename->u.name.s);
2932                                 ptree_name_iter(O_ALTFP|O_NONL, snp);
2933                                 out(O_ALTFP, "]");
2934                                 istat_bump(snp, 0);
2935 
2936                         }
2937                 }
2938                 istat_save();   /* write out any istat changes */
2939         }
2940 }
2941 
2942 static const char *
2943 undiag_2defect_str(int ud)
2944 {
2945         switch (ud) {
2946         case UD_VAL_MISSINGINFO:
2947         case UD_VAL_MISSINGOBS:
2948         case UD_VAL_MISSINGPATH:
2949         case UD_VAL_MISSINGZERO:
2950         case UD_VAL_BADOBS:
2951         case UD_VAL_CFGMISMATCH:
2952                 return (UNDIAG_DEFECT_CHKPT);
2953 
2954         case UD_VAL_BADEVENTI:
2955         case UD_VAL_BADEVENTPATH:
2956         case UD_VAL_BADEVENTCLASS:
2957         case UD_VAL_INSTFAIL:
2958         case UD_VAL_NOPATH:
2959         case UD_VAL_UNSOLVD:
2960                 return (UNDIAG_DEFECT_FME);
2961 
2962         case UD_VAL_MAXFME:
2963                 return (UNDIAG_DEFECT_LIMIT);
2964 
2965         case UD_VAL_UNKNOWN:
2966         default:
2967                 return (UNDIAG_DEFECT_UNKNOWN);
2968         }
2969 }
2970 
2971 static const char *
2972 undiag_2fault_str(int ud)
2973 {
2974         switch (ud) {
2975         case UD_VAL_BADEVENTI:
2976         case UD_VAL_BADEVENTPATH:
2977         case UD_VAL_BADEVENTCLASS:
2978         case UD_VAL_INSTFAIL:
2979         case UD_VAL_NOPATH:
2980         case UD_VAL_UNSOLVD:
2981                 return (UNDIAG_FAULT_FME);
2982         default:
2983                 return (NULL);
2984         }
2985 }
2986 
2987 static char *
2988 undiag_2reason_str(int ud, char *arg)
2989 {
2990         const char *ptr;
2991         char *buf;
2992         int with_arg = 0;
2993 
2994         switch (ud) {
2995         case UD_VAL_BADEVENTPATH:
2996                 ptr = UD_STR_BADEVENTPATH;
2997                 with_arg = 1;
2998                 break;
2999         case UD_VAL_BADEVENTCLASS:
3000                 ptr = UD_STR_BADEVENTCLASS;
3001                 with_arg = 1;
3002                 break;
3003         case UD_VAL_BADEVENTI:
3004                 ptr = UD_STR_BADEVENTI;
3005                 with_arg = 1;
3006                 break;
3007         case UD_VAL_BADOBS:
3008                 ptr = UD_STR_BADOBS;
3009                 break;
3010         case UD_VAL_CFGMISMATCH:
3011                 ptr = UD_STR_CFGMISMATCH;
3012                 break;
3013         case UD_VAL_INSTFAIL:
3014                 ptr = UD_STR_INSTFAIL;
3015                 with_arg = 1;
3016                 break;
3017         case UD_VAL_MAXFME:
3018                 ptr = UD_STR_MAXFME;
3019                 break;
3020         case UD_VAL_MISSINGINFO:
3021                 ptr = UD_STR_MISSINGINFO;
3022                 break;
3023         case UD_VAL_MISSINGOBS:
3024                 ptr = UD_STR_MISSINGOBS;
3025                 break;
3026         case UD_VAL_MISSINGPATH:
3027                 ptr = UD_STR_MISSINGPATH;
3028                 break;
3029         case UD_VAL_MISSINGZERO:
3030                 ptr = UD_STR_MISSINGZERO;
3031                 break;
3032         case UD_VAL_NOPATH:
3033                 ptr = UD_STR_NOPATH;
3034                 with_arg = 1;
3035                 break;
3036         case UD_VAL_UNSOLVD:
3037                 ptr = UD_STR_UNSOLVD;
3038                 break;
3039         case UD_VAL_UNKNOWN:
3040         default:
3041                 ptr = UD_STR_UNKNOWN;
3042                 break;
3043         }
3044         if (with_arg) {
3045                 buf = MALLOC(strlen(ptr) + strlen(arg) - 1);
3046                 (void) sprintf(buf, ptr, arg);
3047         } else {
3048                 buf = MALLOC(strlen(ptr) + 1);
3049                 (void) sprintf(buf, ptr);
3050         }
3051         return (buf);
3052 }
3053 
3054 static void
3055 publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep, fmd_case_t *fmcase,
3056     nvlist_t *detector, char *arg)
3057 {
3058         struct case_list *newcase;
3059         nvlist_t *defect, *fault;
3060         const char *faultstr;
3061         char *reason = undiag_2reason_str(Undiag_reason, arg);
3062 
3063         out(O_ALTFP,
3064             "[undiagnosable ereport received, "
3065             "creating and closing a new case (%s)]", reason);
3066 
3067         newcase = MALLOC(sizeof (struct case_list));
3068         newcase->next = NULL;
3069         newcase->fmcase = fmcase;
3070         if (Undiagablecaselist != NULL)
3071                 newcase->next = Undiagablecaselist;
3072         Undiagablecaselist = newcase;
3073 
3074         if (ffep != NULL)
3075                 fmd_case_add_ereport(hdl, newcase->fmcase, ffep);
3076 
3077         /* add defect */
3078         defect = fmd_nvl_create_fault(hdl,
3079             undiag_2defect_str(Undiag_reason), 50, NULL, NULL, detector);
3080         (void) nvlist_add_string(defect, UNDIAG_REASON, reason);
3081         (void) nvlist_add_boolean_value(defect, FM_SUSPECT_RETIRE, B_FALSE);
3082         (void) nvlist_add_boolean_value(defect, FM_SUSPECT_RESPONSE, B_FALSE);
3083         fmd_case_add_suspect(hdl, newcase->fmcase, defect);
3084 
3085         /* add fault if appropriate */
3086         faultstr = undiag_2fault_str(Undiag_reason);
3087         if (faultstr != NULL) {
3088                 fault = fmd_nvl_create_fault(hdl, faultstr, 50, NULL, NULL,
3089                     detector);
3090                 (void) nvlist_add_string(fault, UNDIAG_REASON, reason);
3091                 (void) nvlist_add_boolean_value(fault, FM_SUSPECT_RETIRE,
3092                     B_FALSE);
3093                 (void) nvlist_add_boolean_value(fault, FM_SUSPECT_RESPONSE,
3094                     B_FALSE);
3095                 fmd_case_add_suspect(hdl, newcase->fmcase, fault);
3096         }
3097         FREE(reason);
3098 
3099         /* solve and close case */
3100         fmd_case_solve(hdl, newcase->fmcase);
3101         fmd_case_close(hdl, newcase->fmcase);
3102         Undiag_reason = UD_VAL_UNKNOWN;
3103 }
3104 
3105 static void
3106 fme_undiagnosable(struct fme *f)
3107 {
3108         nvlist_t *defect, *fault, *detector = NULL;
3109         struct event *ep;
3110         char *pathstr;
3111         const char *faultstr;
3112         char *reason = undiag_2reason_str(Undiag_reason, NULL);
3113 
3114         out(O_ALTFP, "[solving/closing FME%d, case %s (%s)]",
3115             f->id, fmd_case_uuid(f->hdl, f->fmcase), reason);
3116 
3117         for (ep = f->observations; ep; ep = ep->observations) {
3118 
3119                 if (ep->ffep != f->e0r)
3120                         fmd_case_add_ereport(f->hdl, f->fmcase, ep->ffep);
3121 
3122                 pathstr = ipath2str(NULL, ipath(platform_getpath(ep->nvp)));
3123                 platform_units_translate(0, f->config, NULL, NULL, &detector,
3124                     pathstr);
3125                 FREE(pathstr);
3126 
3127                 /* add defect */
3128                 defect = fmd_nvl_create_fault(f->hdl,
3129                     undiag_2defect_str(Undiag_reason), 50 / f->uniqobs,
3130                     NULL, NULL, detector);
3131                 (void) nvlist_add_string(defect, UNDIAG_REASON, reason);
3132                 (void) nvlist_add_boolean_value(defect, FM_SUSPECT_RETIRE,
3133                     B_FALSE);
3134                 (void) nvlist_add_boolean_value(defect, FM_SUSPECT_RESPONSE,
3135                     B_FALSE);
3136                 fmd_case_add_suspect(f->hdl, f->fmcase, defect);
3137 
3138                 /* add fault if appropriate */
3139                 faultstr = undiag_2fault_str(Undiag_reason);
3140                 if (faultstr == NULL)
3141                         continue;
3142                 fault = fmd_nvl_create_fault(f->hdl, faultstr, 50 / f->uniqobs,
3143                     NULL, NULL, detector);
3144                 (void) nvlist_add_string(fault, UNDIAG_REASON, reason);
3145                 (void) nvlist_add_boolean_value(fault, FM_SUSPECT_RETIRE,
3146                     B_FALSE);
3147                 (void) nvlist_add_boolean_value(fault, FM_SUSPECT_RESPONSE,
3148                     B_FALSE);
3149                 fmd_case_add_suspect(f->hdl, f->fmcase, fault);
3150                 nvlist_free(detector);
3151         }
3152         FREE(reason);
3153         fmd_case_solve(f->hdl, f->fmcase);
3154         fmd_case_close(f->hdl, f->fmcase);
3155         Undiag_reason = UD_VAL_UNKNOWN;
3156 }
3157 
3158 /*
3159  * fme_close_case
3160  *
3161  *      Find the requested case amongst our fmes and close it.  Free up
3162  *      the related fme.
3163  */
3164 void
3165 fme_close_case(fmd_hdl_t *hdl, fmd_case_t *fmcase)
3166 {
3167         struct case_list *ucasep, *prevcasep = NULL;
3168         struct fme *prev = NULL;
3169         struct fme *fmep;
3170 
3171         for (ucasep = Undiagablecaselist; ucasep; ucasep = ucasep->next) {
3172                 if (fmcase != ucasep->fmcase) {
3173                         prevcasep = ucasep;
3174                         continue;
3175                 }
3176 
3177                 if (prevcasep == NULL)
3178                         Undiagablecaselist = Undiagablecaselist->next;
3179                 else
3180                         prevcasep->next = ucasep->next;
3181 
3182                 FREE(ucasep);
3183                 return;
3184         }
3185 
3186         for (fmep = FMElist; fmep; fmep = fmep->next) {
3187                 if (fmep->hdl == hdl && fmep->fmcase == fmcase)
3188                         break;
3189                 prev = fmep;
3190         }
3191 
3192         if (fmep == NULL) {
3193                 out(O_WARN, "Eft asked to close unrecognized case [%s].",
3194                     fmd_case_uuid(hdl, fmcase));
3195                 return;
3196         }
3197 
3198         if (EFMElist == fmep)
3199                 EFMElist = prev;
3200 
3201         if (prev == NULL)
3202                 FMElist = FMElist->next;
3203         else
3204                 prev->next = fmep->next;
3205 
3206         fmep->next = NULL;
3207 
3208         /* Get rid of any timer this fme has set */
3209         if (fmep->wull != 0)
3210                 fmd_timer_remove(fmep->hdl, fmep->timer);
3211 
3212         if (ClosedFMEs == NULL) {
3213                 ClosedFMEs = fmep;
3214         } else {
3215                 fmep->next = ClosedFMEs;
3216                 ClosedFMEs = fmep;
3217         }
3218 
3219         Open_fme_count--;
3220 
3221         /* See if we can close the overflow FME */
3222         if (Open_fme_count <= Max_fme) {
3223                 for (fmep = FMElist; fmep; fmep = fmep->next) {
3224                         if (fmep->overflow && !(fmd_case_closed(fmep->hdl,
3225                             fmep->fmcase)))
3226                                 break;
3227                 }
3228 
3229                 if (fmep != NULL)
3230                         fmd_case_close(fmep->hdl, fmep->fmcase);
3231         }
3232 }
3233 
3234 /*
3235  * fme_set_timer()
3236  *      If the time we need to wait for the given FME is less than the
3237  *      current timer, kick that old timer out and establish a new one.
3238  */
3239 static int
3240 fme_set_timer(struct fme *fmep, unsigned long long wull)
3241 {
3242         out(O_ALTFP|O_VERB|O_NONL, " fme_set_timer: request to wait ");
3243         ptree_timeval(O_ALTFP|O_VERB, &wull);
3244 
3245         if (wull <= fmep->pull) {
3246                 out(O_ALTFP|O_VERB|O_NONL, "already have waited at least ");
3247                 ptree_timeval(O_ALTFP|O_VERB, &fmep->pull);
3248                 out(O_ALTFP|O_VERB, NULL);
3249                 /* we've waited at least wull already, don't need timer */
3250                 return (0);
3251         }
3252 
3253         out(O_ALTFP|O_VERB|O_NONL, " currently ");
3254         if (fmep->wull != 0) {
3255                 out(O_ALTFP|O_VERB|O_NONL, "waiting ");
3256                 ptree_timeval(O_ALTFP|O_VERB, &fmep->wull);
3257                 out(O_ALTFP|O_VERB, NULL);
3258         } else {
3259                 out(O_ALTFP|O_VERB|O_NONL, "not waiting");
3260                 out(O_ALTFP|O_VERB, NULL);
3261         }
3262 
3263         if (fmep->wull != 0)
3264                 if (wull >= fmep->wull)
3265                         /* New timer would fire later than established timer */
3266                         return (0);
3267 
3268         if (fmep->wull != 0) {
3269                 fmd_timer_remove(fmep->hdl, fmep->timer);
3270         }
3271 
3272         fmep->timer = fmd_timer_install(fmep->hdl, (void *)fmep,
3273             fmep->e0r, wull);
3274         out(O_ALTFP|O_VERB, "timer set, id is %ld", fmep->timer);
3275         fmep->wull = wull;
3276         return (1);
3277 }
3278 
3279 void
3280 fme_timer_fired(struct fme *fmep, id_t tid)
3281 {
3282         struct fme *ffmep = NULL;
3283 
3284         for (ffmep = FMElist; ffmep; ffmep = ffmep->next)
3285                 if (ffmep == fmep)
3286                         break;
3287 
3288         if (ffmep == NULL) {
3289                 out(O_WARN, "Timer fired for an FME (%p) not in FMEs list.",
3290                     (void *)fmep);
3291                 return;
3292         }
3293 
3294         out(O_ALTFP|O_VERB, "Timer fired %lx", tid);
3295         fmep->pull = fmep->wull;
3296         fmep->wull = 0;
3297         fmd_buf_write(fmep->hdl, fmep->fmcase,
3298             WOBUF_PULL, (void *)&fmep->pull, sizeof (fmep->pull));
3299 
3300         fme_eval(fmep, fmep->e0r);
3301 }
3302 
3303 /*
3304  * Preserve the fme's suspect list in its psuspects list, NULLing the
3305  * suspects list in the meantime.
3306  */
3307 static void
3308 save_suspects(struct fme *fmep)
3309 {
3310         struct event *ep;
3311         struct event *nextep;
3312 
3313         /* zero out the previous suspect list */
3314         for (ep = fmep->psuspects; ep; ep = nextep) {
3315                 nextep = ep->psuspects;
3316                 ep->psuspects = NULL;
3317         }
3318         fmep->psuspects = NULL;
3319 
3320         /* zero out the suspect list, copying it to previous suspect list */
3321         fmep->psuspects = fmep->suspects;
3322         for (ep = fmep->suspects; ep; ep = nextep) {
3323                 nextep = ep->suspects;
3324                 ep->psuspects = ep->suspects;
3325                 ep->suspects = NULL;
3326                 ep->is_suspect = 0;
3327         }
3328         fmep->suspects = NULL;
3329         fmep->nsuspects = 0;
3330 }
3331 
3332 /*
3333  * Retrieve the fme's suspect list from its psuspects list.
3334  */
3335 static void
3336 restore_suspects(struct fme *fmep)
3337 {
3338         struct event *ep;
3339         struct event *nextep;
3340 
3341         fmep->nsuspects = 0;
3342         fmep->suspects = fmep->psuspects;
3343         for (ep = fmep->psuspects; ep; ep = nextep) {
3344                 fmep->nsuspects++;
3345                 nextep = ep->psuspects;
3346                 ep->suspects = ep->psuspects;
3347         }
3348 }
3349 
3350 /*
3351  * this is what we use to call the Emrys prototype code instead of main()
3352  */
3353 static void
3354 fme_eval(struct fme *fmep, fmd_event_t *ffep)
3355 {
3356         struct event *ep;
3357         unsigned long long my_delay = TIMEVAL_EVENTUALLY;
3358         struct rsl *srl = NULL;
3359         struct rsl *srl2 = NULL;
3360         int mess_zero_count;
3361         int rpcnt;
3362 
3363         save_suspects(fmep);
3364 
3365         out(O_ALTFP, "Evaluate FME %d", fmep->id);
3366         indent_set("  ");
3367 
3368         lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep);
3369         fmep->state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay);
3370 
3371         out(O_ALTFP|O_NONL, "FME%d state: %s, suspect list:", fmep->id,
3372             fme_state2str(fmep->state));
3373         for (ep = fmep->suspects; ep; ep = ep->suspects) {
3374                 out(O_ALTFP|O_NONL, " ");
3375                 itree_pevent_brief(O_ALTFP|O_NONL, ep);
3376         }
3377         out(O_ALTFP, NULL);
3378 
3379         switch (fmep->state) {
3380         case FME_CREDIBLE:
3381                 print_suspects(SLNEW, fmep);
3382                 (void) upsets_eval(fmep, ffep);
3383 
3384                 /*
3385                  * we may have already posted suspects in upsets_eval() which
3386                  * can recurse into fme_eval() again. If so then just return.
3387                  */
3388                 if (fmep->posted_suspects)
3389                         return;
3390 
3391                 stats_counter_bump(fmep->diags);
3392                 rpcnt = fmep->nsuspects;
3393                 save_suspects(fmep);
3394 
3395                 /*
3396                  * create two lists, one for "message=1" faults and one for
3397                  * "message=0" faults. If we have a mixture we will generate
3398                  * two separate suspect lists.
3399                  */
3400                 srl = MALLOC(rpcnt * sizeof (struct rsl));
3401                 bzero(srl, rpcnt * sizeof (struct rsl));
3402                 srl2 = MALLOC(rpcnt * sizeof (struct rsl));
3403                 bzero(srl2, rpcnt * sizeof (struct rsl));
3404                 mess_zero_count = trim_suspects(fmep, srl, srl2, ffep);
3405 
3406                 /*
3407                  * If the resulting suspect list has no members, we're
3408                  * done so simply close the case. Otherwise sort and publish.
3409                  */
3410                 if (fmep->nsuspects == 0 && mess_zero_count == 0) {
3411                         out(O_ALTFP,
3412                             "[FME%d, case %s (all suspects are upsets)]",
3413                             fmep->id, fmd_case_uuid(fmep->hdl, fmep->fmcase));
3414                         fmd_case_close(fmep->hdl, fmep->fmcase);
3415                 } else if (fmep->nsuspects != 0 && mess_zero_count == 0) {
3416                         publish_suspects(fmep, srl);
3417                         out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
3418                             fmd_case_uuid(fmep->hdl, fmep->fmcase));
3419                         fmd_case_solve(fmep->hdl, fmep->fmcase);
3420                 } else if (fmep->nsuspects == 0 && mess_zero_count != 0) {
3421                         fmep->nsuspects = mess_zero_count;
3422                         publish_suspects(fmep, srl2);
3423                         out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
3424                             fmd_case_uuid(fmep->hdl, fmep->fmcase));
3425                         fmd_case_solve(fmep->hdl, fmep->fmcase);
3426                 } else {
3427                         struct event *obsp;
3428                         struct fme *nfmep;
3429 
3430                         publish_suspects(fmep, srl);
3431                         out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
3432                             fmd_case_uuid(fmep->hdl, fmep->fmcase));
3433                         fmd_case_solve(fmep->hdl, fmep->fmcase);
3434 
3435                         /*
3436                          * Got both message=0 and message=1 so create a
3437                          * duplicate case. Also need a temporary duplicate fme
3438                          * structure for use by publish_suspects().
3439                          */
3440                         nfmep = alloc_fme();
3441                         nfmep->id =  Nextid++;
3442                         nfmep->hdl = fmep->hdl;
3443                         nfmep->nsuspects = mess_zero_count;
3444                         nfmep->fmcase = fmd_case_open(fmep->hdl, NULL);
3445                         out(O_ALTFP|O_STAMP,
3446                             "[creating parallel FME%d, case %s]", nfmep->id,
3447                             fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
3448                         Open_fme_count++;
3449                         if (ffep) {
3450                                 fmd_case_setprincipal(nfmep->hdl,
3451                                     nfmep->fmcase, ffep);
3452                                 fmd_case_add_ereport(nfmep->hdl,
3453                                     nfmep->fmcase, ffep);
3454                         }
3455                         for (obsp = fmep->observations; obsp;
3456                             obsp = obsp->observations)
3457                                 if (obsp->ffep && obsp->ffep != ffep)
3458                                         fmd_case_add_ereport(nfmep->hdl,
3459                                             nfmep->fmcase, obsp->ffep);
3460 
3461                         publish_suspects(nfmep, srl2);
3462                         out(O_ALTFP, "[solving FME%d, case %s]", nfmep->id,
3463                             fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
3464                         fmd_case_solve(nfmep->hdl, nfmep->fmcase);
3465                         FREE(nfmep);
3466                 }
3467                 FREE(srl);
3468                 FREE(srl2);
3469                 restore_suspects(fmep);
3470 
3471                 fmep->posted_suspects = 1;
3472                 fmd_buf_write(fmep->hdl, fmep->fmcase,
3473                     WOBUF_POSTD,
3474                     (void *)&fmep->posted_suspects,
3475                     sizeof (fmep->posted_suspects));
3476 
3477                 /*
3478                  * Now the suspects have been posted, we can clear up
3479                  * the instance tree as we won't be looking at it again.
3480                  * Also cancel the timer as the case is now solved.
3481                  */
3482                 if (fmep->wull != 0) {
3483                         fmd_timer_remove(fmep->hdl, fmep->timer);
3484                         fmep->wull = 0;
3485                 }
3486                 break;
3487 
3488         case FME_WAIT:
3489                 ASSERT(my_delay > fmep->ull);
3490                 (void) fme_set_timer(fmep, my_delay);
3491                 print_suspects(SLWAIT, fmep);
3492                 itree_prune(fmep->eventtree);
3493                 return;
3494 
3495         case FME_DISPROVED:
3496                 print_suspects(SLDISPROVED, fmep);
3497                 Undiag_reason = UD_VAL_UNSOLVD;
3498                 fme_undiagnosable(fmep);
3499                 break;
3500         }
3501 
3502         itree_free(fmep->eventtree);
3503         fmep->eventtree = NULL;
3504         structconfig_free(fmep->config);
3505         fmep->config = NULL;
3506         destroy_fme_bufs(fmep);
3507 }
3508 
3509 static void indent(void);
3510 static int triggered(struct fme *fmep, struct event *ep, int mark);
3511 static enum fme_state effects_test(struct fme *fmep,
3512     struct event *fault_event, unsigned long long at_latest_by,
3513     unsigned long long *pdelay);
3514 static enum fme_state requirements_test(struct fme *fmep, struct event *ep,
3515     unsigned long long at_latest_by, unsigned long long *pdelay);
3516 static enum fme_state causes_test(struct fme *fmep, struct event *ep,
3517     unsigned long long at_latest_by, unsigned long long *pdelay);
3518 
3519 static int
3520 checkconstraints(struct fme *fmep, struct arrow *arrowp)
3521 {
3522         struct constraintlist *ctp;
3523         struct evalue value;
3524         char *sep = "";
3525 
3526         if (arrowp->forever_false) {
3527                 indent();
3528                 out(O_ALTFP|O_VERB|O_NONL, "  Forever false constraint: ");
3529                 for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3530                         out(O_ALTFP|O_VERB|O_NONL, sep);
3531                         ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3532                         sep = ", ";
3533                 }
3534                 out(O_ALTFP|O_VERB, NULL);
3535                 return (0);
3536         }
3537         if (arrowp->forever_true) {
3538                 indent();
3539                 out(O_ALTFP|O_VERB|O_NONL, "  Forever true constraint: ");
3540                 for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3541                         out(O_ALTFP|O_VERB|O_NONL, sep);
3542                         ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3543                         sep = ", ";
3544                 }
3545                 out(O_ALTFP|O_VERB, NULL);
3546                 return (1);
3547         }
3548 
3549         for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3550                 if (eval_expr(ctp->cnode, NULL, NULL,
3551                     &fmep->globals, fmep->config,
3552                     arrowp, 0, &value)) {
3553                         /* evaluation successful */
3554                         if (value.t == UNDEFINED || value.v == 0) {
3555                                 /* known false */
3556                                 arrowp->forever_false = 1;
3557                                 indent();
3558                                 out(O_ALTFP|O_VERB|O_NONL,
3559                                     "  False constraint: ");
3560                                 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3561                                 out(O_ALTFP|O_VERB, NULL);
3562                                 return (0);
3563                         }
3564                 } else {
3565                         /* evaluation unsuccessful -- unknown value */
3566                         indent();
3567                         out(O_ALTFP|O_VERB|O_NONL,
3568                             "  Deferred constraint: ");
3569                         ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3570                         out(O_ALTFP|O_VERB, NULL);
3571                         return (1);
3572                 }
3573         }
3574         /* known true */
3575         arrowp->forever_true = 1;
3576         indent();
3577         out(O_ALTFP|O_VERB|O_NONL, "  True constraint: ");
3578         for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3579                 out(O_ALTFP|O_VERB|O_NONL, sep);
3580                 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3581                 sep = ", ";
3582         }
3583         out(O_ALTFP|O_VERB, NULL);
3584         return (1);
3585 }
3586 
3587 static int
3588 triggered(struct fme *fmep, struct event *ep, int mark)
3589 {
3590         struct bubble *bp;
3591         struct arrowlist *ap;
3592         int count = 0;
3593 
3594         stats_counter_bump(fmep->Tcallcount);
3595         for (bp = itree_next_bubble(ep, NULL); bp;
3596             bp = itree_next_bubble(ep, bp)) {
3597                 if (bp->t != B_TO)
3598                         continue;
3599                 for (ap = itree_next_arrow(bp, NULL); ap;
3600                     ap = itree_next_arrow(bp, ap)) {
3601                         /* check count of marks against K in the bubble */
3602                         if ((ap->arrowp->mark & mark) &&
3603                             ++count >= bp->nork)
3604                                 return (1);
3605                 }
3606         }
3607         return (0);
3608 }
3609 
3610 static int
3611 mark_arrows(struct fme *fmep, struct event *ep, int mark,
3612     unsigned long long at_latest_by, unsigned long long *pdelay, int keep)
3613 {
3614         struct bubble *bp;
3615         struct arrowlist *ap;
3616         unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3617         unsigned long long my_delay;
3618         enum fme_state result;
3619         int retval = 0;
3620 
3621         for (bp = itree_next_bubble(ep, NULL); bp;
3622             bp = itree_next_bubble(ep, bp)) {
3623                 if (bp->t != B_FROM)
3624                         continue;
3625                 stats_counter_bump(fmep->Marrowcount);
3626                 for (ap = itree_next_arrow(bp, NULL); ap;
3627                     ap = itree_next_arrow(bp, ap)) {
3628                         struct event *ep2 = ap->arrowp->head->myevent;
3629                         /*
3630                          * if we're clearing marks, we can avoid doing
3631                          * all that work evaluating constraints.
3632                          */
3633                         if (mark == 0) {
3634                                 if (ap->arrowp->arrow_marked == 0)
3635                                         continue;
3636                                 ap->arrowp->arrow_marked = 0;
3637                                 ap->arrowp->mark &= ~EFFECTS_COUNTER;
3638                                 if (keep && (ep2->cached_state &
3639                                     (WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT)))
3640                                         ep2->keep_in_tree = 1;
3641                                 ep2->cached_state &=
3642                                     ~(WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT);
3643                                 (void) mark_arrows(fmep, ep2, mark, 0, NULL,
3644                                     keep);
3645                                 continue;
3646                         }
3647                         ap->arrowp->arrow_marked = 1;
3648                         if (ep2->cached_state & REQMNTS_DISPROVED) {
3649                                 indent();
3650                                 out(O_ALTFP|O_VERB|O_NONL,
3651                                     "  ALREADY DISPROVED ");
3652                                 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3653                                 out(O_ALTFP|O_VERB, NULL);
3654                                 continue;
3655                         }
3656                         if (ep2->cached_state & WAIT_EFFECT) {
3657                                 indent();
3658                                 out(O_ALTFP|O_VERB|O_NONL,
3659                                     "  ALREADY EFFECTS WAIT ");
3660                                 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3661                                 out(O_ALTFP|O_VERB, NULL);
3662                                 continue;
3663                         }
3664                         if (ep2->cached_state & CREDIBLE_EFFECT) {
3665                                 indent();
3666                                 out(O_ALTFP|O_VERB|O_NONL,
3667                                     "  ALREADY EFFECTS CREDIBLE ");
3668                                 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3669                                 out(O_ALTFP|O_VERB, NULL);
3670                                 continue;
3671                         }
3672                         if ((ep2->cached_state & PARENT_WAIT) &&
3673                             (mark & PARENT_WAIT)) {
3674                                 indent();
3675                                 out(O_ALTFP|O_VERB|O_NONL,
3676                                     "  ALREADY PARENT EFFECTS WAIT ");
3677                                 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3678                                 out(O_ALTFP|O_VERB, NULL);
3679                                 continue;
3680                         }
3681                         platform_set_payloadnvp(ep2->nvp);
3682                         if (checkconstraints(fmep, ap->arrowp) == 0) {
3683                                 platform_set_payloadnvp(NULL);
3684                                 indent();
3685                                 out(O_ALTFP|O_VERB|O_NONL,
3686                                     "  CONSTRAINTS FAIL ");
3687                                 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3688                                 out(O_ALTFP|O_VERB, NULL);
3689                                 continue;
3690                         }
3691                         platform_set_payloadnvp(NULL);
3692                         ap->arrowp->mark |= EFFECTS_COUNTER;
3693                         if (!triggered(fmep, ep2, EFFECTS_COUNTER)) {
3694                                 indent();
3695                                 out(O_ALTFP|O_VERB|O_NONL,
3696                                     "  K-COUNT NOT YET MET ");
3697                                 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3698                                 out(O_ALTFP|O_VERB, NULL);
3699                                 continue;
3700                         }
3701                         ep2->cached_state &= ~PARENT_WAIT;
3702                         /*
3703                          * if we've reached an ereport and no propagation time
3704                          * is specified, use the Hesitate value
3705                          */
3706                         if (ep2->t == N_EREPORT && at_latest_by == 0ULL &&
3707                             ap->arrowp->maxdelay == 0ULL) {
3708                                 out(O_ALTFP|O_VERB|O_NONL, "  default wait ");
3709                                 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3710                                 out(O_ALTFP|O_VERB, NULL);
3711                                 result = requirements_test(fmep, ep2, Hesitate,
3712                                     &my_delay);
3713                         } else {
3714                                 result = requirements_test(fmep, ep2,
3715                                     at_latest_by + ap->arrowp->maxdelay,
3716                                     &my_delay);
3717                         }
3718                         if (result == FME_WAIT) {
3719                                 retval = WAIT_EFFECT;
3720                                 if (overall_delay > my_delay)
3721                                         overall_delay = my_delay;
3722                                 ep2->cached_state |= WAIT_EFFECT;
3723                                 indent();
3724                                 out(O_ALTFP|O_VERB|O_NONL, "  EFFECTS WAIT ");
3725                                 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3726                                 out(O_ALTFP|O_VERB, NULL);
3727                                 indent_push("  E");
3728                                 if (mark_arrows(fmep, ep2, PARENT_WAIT,
3729                                     at_latest_by, &my_delay, 0) ==
3730                                     WAIT_EFFECT) {
3731                                         retval = WAIT_EFFECT;
3732                                         if (overall_delay > my_delay)
3733                                                 overall_delay = my_delay;
3734                                 }
3735                                 indent_pop();
3736                         } else if (result == FME_DISPROVED) {
3737                                 indent();
3738                                 out(O_ALTFP|O_VERB|O_NONL,
3739                                     "  EFFECTS DISPROVED ");
3740                                 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3741                                 out(O_ALTFP|O_VERB, NULL);
3742                         } else {
3743                                 ep2->cached_state |= mark;
3744                                 indent();
3745                                 if (mark == CREDIBLE_EFFECT)
3746                                         out(O_ALTFP|O_VERB|O_NONL,
3747                                             "  EFFECTS CREDIBLE ");
3748                                 else
3749                                         out(O_ALTFP|O_VERB|O_NONL,
3750                                             "  PARENT EFFECTS WAIT ");
3751                                 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3752                                 out(O_ALTFP|O_VERB, NULL);
3753                                 indent_push("  E");
3754                                 if (mark_arrows(fmep, ep2, mark, at_latest_by,
3755                                     &my_delay, 0) == WAIT_EFFECT) {
3756                                         retval = WAIT_EFFECT;
3757                                         if (overall_delay > my_delay)
3758                                                 overall_delay = my_delay;
3759                                 }
3760                                 indent_pop();
3761                         }
3762                 }
3763         }
3764         if (retval == WAIT_EFFECT)
3765                 *pdelay = overall_delay;
3766         return (retval);
3767 }
3768 
3769 static enum fme_state
3770 effects_test(struct fme *fmep, struct event *fault_event,
3771     unsigned long long at_latest_by, unsigned long long *pdelay)
3772 {
3773         struct event *error_event;
3774         enum fme_state return_value = FME_CREDIBLE;
3775         unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3776         unsigned long long my_delay;
3777 
3778         stats_counter_bump(fmep->Ecallcount);
3779         indent_push("  E");
3780         indent();
3781         out(O_ALTFP|O_VERB|O_NONL, "->");
3782         itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event);
3783         out(O_ALTFP|O_VERB, NULL);
3784 
3785         if (mark_arrows(fmep, fault_event, CREDIBLE_EFFECT, at_latest_by,
3786             &my_delay, 0) == WAIT_EFFECT) {
3787                 return_value = FME_WAIT;
3788                 if (overall_delay > my_delay)
3789                         overall_delay = my_delay;
3790         }
3791         for (error_event = fmep->observations;
3792             error_event; error_event = error_event->observations) {
3793                 indent();
3794                 out(O_ALTFP|O_VERB|O_NONL, " ");
3795                 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, error_event);
3796                 if (!(error_event->cached_state & CREDIBLE_EFFECT)) {
3797                         if (error_event->cached_state &
3798                             (PARENT_WAIT|WAIT_EFFECT)) {
3799                                 out(O_ALTFP|O_VERB, " NOT YET triggered");
3800                                 continue;
3801                         }
3802                         return_value = FME_DISPROVED;
3803                         out(O_ALTFP|O_VERB, " NOT triggered");
3804                         break;
3805                 } else {
3806                         out(O_ALTFP|O_VERB, " triggered");
3807                 }
3808         }
3809         if (return_value == FME_DISPROVED) {
3810                 (void) mark_arrows(fmep, fault_event, 0, 0, NULL, 0);
3811         } else {
3812                 fault_event->keep_in_tree = 1;
3813                 (void) mark_arrows(fmep, fault_event, 0, 0, NULL, 1);
3814         }
3815 
3816         indent();
3817         out(O_ALTFP|O_VERB|O_NONL, "<-EFFECTS %s ",
3818             fme_state2str(return_value));
3819         itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event);
3820         out(O_ALTFP|O_VERB, NULL);
3821         indent_pop();
3822         if (return_value == FME_WAIT)
3823                 *pdelay = overall_delay;
3824         return (return_value);
3825 }
3826 
3827 static enum fme_state
3828 requirements_test(struct fme *fmep, struct event *ep,
3829     unsigned long long at_latest_by, unsigned long long *pdelay)
3830 {
3831         int waiting_events;
3832         int credible_events;
3833         int deferred_events;
3834         enum fme_state return_value = FME_CREDIBLE;
3835         unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3836         unsigned long long arrow_delay;
3837         unsigned long long my_delay;
3838         struct event *ep2;
3839         struct bubble *bp;
3840         struct arrowlist *ap;
3841 
3842         if (ep->cached_state & REQMNTS_CREDIBLE) {
3843                 indent();
3844                 out(O_ALTFP|O_VERB|O_NONL, "  REQMNTS ALREADY CREDIBLE ");
3845                 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3846                 out(O_ALTFP|O_VERB, NULL);
3847                 return (FME_CREDIBLE);
3848         }
3849         if (ep->cached_state & REQMNTS_DISPROVED) {
3850                 indent();
3851                 out(O_ALTFP|O_VERB|O_NONL, "  REQMNTS ALREADY DISPROVED ");
3852                 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3853                 out(O_ALTFP|O_VERB, NULL);
3854                 return (FME_DISPROVED);
3855         }
3856         if (ep->cached_state & REQMNTS_WAIT) {
3857                 indent();
3858                 *pdelay = ep->cached_delay;
3859                 out(O_ALTFP|O_VERB|O_NONL, "  REQMNTS ALREADY WAIT ");
3860                 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3861                 out(O_ALTFP|O_VERB|O_NONL, ", wait for: ");
3862                 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3863                 out(O_ALTFP|O_VERB, NULL);
3864                 return (FME_WAIT);
3865         }
3866         stats_counter_bump(fmep->Rcallcount);
3867         indent_push("  R");
3868         indent();
3869         out(O_ALTFP|O_VERB|O_NONL, "->");
3870         itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3871         out(O_ALTFP|O_VERB|O_NONL, ", at latest by: ");
3872         ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3873         out(O_ALTFP|O_VERB, NULL);
3874 
3875         if (ep->t == N_EREPORT) {
3876                 if (ep->count == 0) {
3877                         if (fmep->pull >= at_latest_by) {
3878                                 return_value = FME_DISPROVED;
3879                         } else {
3880                                 ep->cached_delay = *pdelay = at_latest_by;
3881                                 return_value = FME_WAIT;
3882                         }
3883                 }
3884 
3885                 indent();
3886                 switch (return_value) {
3887                 case FME_CREDIBLE:
3888                         ep->cached_state |= REQMNTS_CREDIBLE;
3889                         out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS CREDIBLE ");
3890                         itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3891                         break;
3892                 case FME_DISPROVED:
3893                         ep->cached_state |= REQMNTS_DISPROVED;
3894                         out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED ");
3895                         itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3896                         break;
3897                 case FME_WAIT:
3898                         ep->cached_state |= REQMNTS_WAIT;
3899                         out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS WAIT ");
3900                         itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3901                         out(O_ALTFP|O_VERB|O_NONL, " to ");
3902                         ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3903                         break;
3904                 default:
3905                         out(O_DIE, "requirements_test: unexpected fme_state");
3906                         break;
3907                 }
3908                 out(O_ALTFP|O_VERB, NULL);
3909                 indent_pop();
3910 
3911                 return (return_value);
3912         }
3913 
3914         /* this event is not a report, descend the tree */
3915         for (bp = itree_next_bubble(ep, NULL); bp;
3916             bp = itree_next_bubble(ep, bp)) {
3917                 int n;
3918 
3919                 if (bp->t != B_FROM)
3920                         continue;
3921 
3922                 n = bp->nork;
3923 
3924                 credible_events = 0;
3925                 waiting_events = 0;
3926                 deferred_events = 0;
3927                 arrow_delay = TIMEVAL_EVENTUALLY;
3928                 /*
3929                  * n is -1 for 'A' so adjust it.
3930                  * XXX just count up the arrows for now.
3931                  */
3932                 if (n < 0) {
3933                         n = 0;
3934                         for (ap = itree_next_arrow(bp, NULL); ap;
3935                             ap = itree_next_arrow(bp, ap))
3936                                 n++;
3937                         indent();
3938                         out(O_ALTFP|O_VERB, " Bubble Counted N=%d", n);
3939                 } else {
3940                         indent();
3941                         out(O_ALTFP|O_VERB, " Bubble N=%d", n);
3942                 }
3943 
3944                 if (n == 0)
3945                         continue;
3946                 if (!(bp->mark & (BUBBLE_ELIDED|BUBBLE_OK))) {
3947                         for (ap = itree_next_arrow(bp, NULL); ap;
3948                             ap = itree_next_arrow(bp, ap)) {
3949                                 ep2 = ap->arrowp->head->myevent;
3950                                 platform_set_payloadnvp(ep2->nvp);
3951                                 (void) checkconstraints(fmep, ap->arrowp);
3952                                 if (!ap->arrowp->forever_false) {
3953                                         /*
3954                                          * if all arrows are invalidated by the
3955                                          * constraints, then we should elide the
3956                                          * whole bubble to be consistant with
3957                                          * the tree creation time behaviour
3958                                          */
3959                                         bp->mark |= BUBBLE_OK;
3960                                         platform_set_payloadnvp(NULL);
3961                                         break;
3962                                 }
3963                                 platform_set_payloadnvp(NULL);
3964                         }
3965                 }
3966                 for (ap = itree_next_arrow(bp, NULL); ap;
3967                     ap = itree_next_arrow(bp, ap)) {
3968                         ep2 = ap->arrowp->head->myevent;
3969                         if (n <= credible_events)
3970                                 break;
3971 
3972                         ap->arrowp->mark |= REQMNTS_COUNTER;
3973                         if (triggered(fmep, ep2, REQMNTS_COUNTER))
3974                                 /* XXX adding max timevals! */
3975                                 switch (requirements_test(fmep, ep2,
3976                                     at_latest_by + ap->arrowp->maxdelay,
3977                                     &my_delay)) {
3978                                 case FME_DEFERRED:
3979                                         deferred_events++;
3980                                         break;
3981                                 case FME_CREDIBLE:
3982                                         credible_events++;
3983                                         break;
3984                                 case FME_DISPROVED:
3985                                         break;
3986                                 case FME_WAIT:
3987                                         if (my_delay < arrow_delay)
3988                                                 arrow_delay = my_delay;
3989                                         waiting_events++;
3990                                         break;
3991                                 default:
3992                                         out(O_DIE,
3993                                         "Bug in requirements_test.");
3994                                 }
3995                         else
3996                                 deferred_events++;
3997                 }
3998                 if (!(bp->mark & BUBBLE_OK) && waiting_events == 0) {
3999                         bp->mark |= BUBBLE_ELIDED;
4000                         continue;
4001                 }
4002                 indent();
4003                 out(O_ALTFP|O_VERB, " Credible: %d Waiting %d",
4004                     credible_events + deferred_events, waiting_events);
4005                 if (credible_events + deferred_events + waiting_events < n) {
4006                         /* Can never meet requirements */
4007                         ep->cached_state |= REQMNTS_DISPROVED;
4008                         indent();
4009                         out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED ");
4010                         itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4011                         out(O_ALTFP|O_VERB, NULL);
4012                         indent_pop();
4013                         return (FME_DISPROVED);
4014                 }
4015                 if (credible_events + deferred_events < n) {
4016                         /* will have to wait */
4017                         /* wait time is shortest known */
4018                         if (arrow_delay < overall_delay)
4019                                 overall_delay = arrow_delay;
4020                         return_value = FME_WAIT;
4021                 } else if (credible_events < n) {
4022                         if (return_value != FME_WAIT)
4023                                 return_value = FME_DEFERRED;
4024                 }
4025         }
4026 
4027         /*
4028          * don't mark as FME_DEFERRED. If this event isn't reached by another
4029          * path, then this will be considered FME_CREDIBLE. But if it is
4030          * reached by a different path so the K-count is met, then might
4031          * get overridden by FME_WAIT or FME_DISPROVED.
4032          */
4033         if (return_value == FME_WAIT) {
4034                 ep->cached_state |= REQMNTS_WAIT;
4035                 ep->cached_delay = *pdelay = overall_delay;
4036         } else if (return_value == FME_CREDIBLE) {
4037                 ep->cached_state |= REQMNTS_CREDIBLE;
4038         }
4039         indent();
4040         out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS %s ",
4041             fme_state2str(return_value));
4042         itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4043         out(O_ALTFP|O_VERB, NULL);
4044         indent_pop();
4045         return (return_value);
4046 }
4047 
4048 static enum fme_state
4049 causes_test(struct fme *fmep, struct event *ep,
4050     unsigned long long at_latest_by, unsigned long long *pdelay)
4051 {
4052         unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
4053         unsigned long long my_delay;
4054         int credible_results = 0;
4055         int waiting_results = 0;
4056         enum fme_state fstate;
4057         struct event *tail_event;
4058         struct bubble *bp;
4059         struct arrowlist *ap;
4060         int k = 1;
4061 
4062         stats_counter_bump(fmep->Ccallcount);
4063         indent_push("  C");
4064         indent();
4065         out(O_ALTFP|O_VERB|O_NONL, "->");
4066         itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4067         out(O_ALTFP|O_VERB, NULL);
4068 
4069         for (bp = itree_next_bubble(ep, NULL); bp;
4070             bp = itree_next_bubble(ep, bp)) {
4071                 if (bp->t != B_TO)
4072                         continue;
4073                 k = bp->nork;        /* remember the K value */
4074                 for (ap = itree_next_arrow(bp, NULL); ap;
4075                     ap = itree_next_arrow(bp, ap)) {
4076                         int do_not_follow = 0;
4077 
4078                         /*
4079                          * if we get to the same event multiple times
4080                          * only worry about the first one.
4081                          */
4082                         if (ap->arrowp->tail->myevent->cached_state &
4083                             CAUSES_TESTED) {
4084                                 indent();
4085                                 out(O_ALTFP|O_VERB|O_NONL,
4086                                     "  causes test already run for ");
4087                                 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL,
4088                                     ap->arrowp->tail->myevent);
4089                                 out(O_ALTFP|O_VERB, NULL);
4090                                 continue;
4091                         }
4092 
4093                         /*
4094                          * see if false constraint prevents us
4095                          * from traversing this arrow
4096                          */
4097                         platform_set_payloadnvp(ep->nvp);
4098                         if (checkconstraints(fmep, ap->arrowp) == 0)
4099                                 do_not_follow = 1;
4100                         platform_set_payloadnvp(NULL);
4101                         if (do_not_follow) {
4102                                 indent();
4103                                 out(O_ALTFP|O_VERB|O_NONL,
4104                                     "  False arrow from ");
4105                                 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL,
4106                                     ap->arrowp->tail->myevent);
4107                                 out(O_ALTFP|O_VERB, NULL);
4108                                 continue;
4109                         }
4110 
4111                         ap->arrowp->tail->myevent->cached_state |=
4112                             CAUSES_TESTED;
4113                         tail_event = ap->arrowp->tail->myevent;
4114                         fstate = hypothesise(fmep, tail_event, at_latest_by,
4115                             &my_delay);
4116 
4117                         switch (fstate) {
4118                         case FME_WAIT:
4119                                 if (my_delay < overall_delay)
4120                                         overall_delay = my_delay;
4121                                 waiting_results++;
4122                                 break;
4123                         case FME_CREDIBLE:
4124                                 credible_results++;
4125                                 break;
4126                         case FME_DISPROVED:
4127                                 break;
4128                         default:
4129                                 out(O_DIE, "Bug in causes_test");
4130                         }
4131                 }
4132         }
4133         /* compare against K */
4134         if (credible_results + waiting_results < k) {
4135                 indent();
4136                 out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES DISPROVED ");
4137                 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4138                 out(O_ALTFP|O_VERB, NULL);
4139                 indent_pop();
4140                 return (FME_DISPROVED);
4141         }
4142         if (waiting_results != 0) {
4143                 *pdelay = overall_delay;
4144                 indent();
4145                 out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES WAIT ");
4146                 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4147                 out(O_ALTFP|O_VERB|O_NONL, " to ");
4148                 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
4149                 out(O_ALTFP|O_VERB, NULL);
4150                 indent_pop();
4151                 return (FME_WAIT);
4152         }
4153         indent();
4154         out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES CREDIBLE ");
4155         itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4156         out(O_ALTFP|O_VERB, NULL);
4157         indent_pop();
4158         return (FME_CREDIBLE);
4159 }
4160 
4161 static enum fme_state
4162 hypothesise(struct fme *fmep, struct event *ep,
4163         unsigned long long at_latest_by, unsigned long long *pdelay)
4164 {
4165         enum fme_state rtr, otr;
4166         unsigned long long my_delay;
4167         unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
4168 
4169         stats_counter_bump(fmep->Hcallcount);
4170         indent_push("  H");
4171         indent();
4172         out(O_ALTFP|O_VERB|O_NONL, "->");
4173         itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4174         out(O_ALTFP|O_VERB|O_NONL, ", at latest by: ");
4175         ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
4176         out(O_ALTFP|O_VERB, NULL);
4177 
4178         rtr = requirements_test(fmep, ep, at_latest_by, &my_delay);
4179         if ((rtr == FME_WAIT) && (my_delay < overall_delay))
4180                 overall_delay = my_delay;
4181         if (rtr != FME_DISPROVED) {
4182                 if (is_problem(ep->t)) {
4183                         otr = effects_test(fmep, ep, at_latest_by, &my_delay);
4184                         if (otr != FME_DISPROVED) {
4185                                 if (fmep->peek == 0 && ep->is_suspect == 0) {
4186                                         ep->suspects = fmep->suspects;
4187                                         ep->is_suspect = 1;
4188                                         fmep->suspects = ep;
4189                                         fmep->nsuspects++;
4190                                 }
4191                         }
4192                 } else
4193                         otr = causes_test(fmep, ep, at_latest_by, &my_delay);
4194                 if ((otr == FME_WAIT) && (my_delay < overall_delay))
4195                         overall_delay = my_delay;
4196                 if ((otr != FME_DISPROVED) &&
4197                     ((rtr == FME_WAIT) || (otr == FME_WAIT)))
4198                         *pdelay = overall_delay;
4199         }
4200         if (rtr == FME_DISPROVED) {
4201                 indent();
4202                 out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
4203                 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4204                 out(O_ALTFP|O_VERB, " (doesn't meet requirements)");
4205                 indent_pop();
4206                 return (FME_DISPROVED);
4207         }
4208         if ((otr == FME_DISPROVED) && is_problem(ep->t)) {
4209                 indent();
4210                 out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
4211                 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4212                 out(O_ALTFP|O_VERB, " (doesn't explain all reports)");
4213                 indent_pop();
4214                 return (FME_DISPROVED);
4215         }
4216         if (otr == FME_DISPROVED) {
4217                 indent();
4218                 out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
4219                 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4220                 out(O_ALTFP|O_VERB, " (causes are not credible)");
4221                 indent_pop();
4222                 return (FME_DISPROVED);
4223         }
4224         if ((rtr == FME_WAIT) || (otr == FME_WAIT)) {
4225                 indent();
4226                 out(O_ALTFP|O_VERB|O_NONL, "<-WAIT ");
4227                 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4228                 out(O_ALTFP|O_VERB|O_NONL, " to ");
4229                 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &overall_delay);
4230                 out(O_ALTFP|O_VERB, NULL);
4231                 indent_pop();
4232                 return (FME_WAIT);
4233         }
4234         indent();
4235         out(O_ALTFP|O_VERB|O_NONL, "<-CREDIBLE ");
4236         itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4237         out(O_ALTFP|O_VERB, NULL);
4238         indent_pop();
4239         return (FME_CREDIBLE);
4240 }
4241 
4242 /*
4243  * fme_istat_load -- reconstitute any persistent istats
4244  */
4245 void
4246 fme_istat_load(fmd_hdl_t *hdl)
4247 {
4248         int sz;
4249         char *sbuf;
4250         char *ptr;
4251 
4252         if ((sz = fmd_buf_size(hdl, NULL, WOBUF_ISTATS)) == 0) {
4253                 out(O_ALTFP, "fme_istat_load: No stats");
4254                 return;
4255         }
4256 
4257         sbuf = alloca(sz);
4258 
4259         fmd_buf_read(hdl, NULL, WOBUF_ISTATS, sbuf, sz);
4260 
4261         /*
4262          * pick apart the serialized stats
4263          *
4264          * format is:
4265          *      <class-name>, '@', <path>, '\0', <value>, '\0'
4266          * for example:
4267          *      "stat.first@stat0/path0\02\0stat.second@stat0/path1\023\0"
4268          *
4269          * since this is parsing our own serialized data, any parsing issues
4270          * are fatal, so we check for them all with ASSERT() below.
4271          */
4272         ptr = sbuf;
4273         while (ptr < &sbuf[sz]) {
4274                 char *sepptr;
4275                 struct node *np;
4276                 int val;
4277 
4278                 sepptr = strchr(ptr, '@');
4279                 ASSERT(sepptr != NULL);
4280                 *sepptr = '\0';
4281 
4282                 /* construct the event */
4283                 np = newnode(T_EVENT, NULL, 0);
4284                 np->u.event.ename = newnode(T_NAME, NULL, 0);
4285                 np->u.event.ename->u.name.t = N_STAT;
4286                 np->u.event.ename->u.name.s = stable(ptr);
4287                 np->u.event.ename->u.name.it = IT_ENAME;
4288                 np->u.event.ename->u.name.last = np->u.event.ename;
4289 
4290                 ptr = sepptr + 1;
4291                 ASSERT(ptr < &sbuf[sz]);
4292                 ptr += strlen(ptr);
4293                 ptr++;  /* move past the '\0' separating path from value */
4294                 ASSERT(ptr < &sbuf[sz]);
4295                 ASSERT(isdigit(*ptr));
4296                 val = atoi(ptr);
4297                 ASSERT(val > 0);
4298                 ptr += strlen(ptr);
4299                 ptr++;  /* move past the final '\0' for this entry */
4300 
4301                 np->u.event.epname = pathstring2epnamenp(sepptr + 1);
4302                 ASSERT(np->u.event.epname != NULL);
4303 
4304                 istat_bump(np, val);
4305                 tree_free(np);
4306         }
4307 
4308         istat_save();
4309 }