1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 
  26 /*
  27  * Ereport-handling routines for memory errors
  28  */
  29 
  30 #include <gmem_mem.h>
  31 #include <gmem_dimm.h>
  32 #include <gmem_page.h>
  33 #include <gmem.h>
  34 
  35 #include <strings.h>
  36 #include <string.h>
  37 #include <errno.h>
  38 #include <assert.h>
  39 #include <fm/fmd_api.h>
  40 #include <fm/libtopo.h>
  41 #include <sys/fm/protocol.h>
  42 #include <sys/async.h>
  43 #include <sys/errclassify.h>
  44 
  45 #define OFFBIT          0xFFFFFFFFFFFC07FFULL
  46 #define BIT28_32        0x00000001F0000000ULL
  47 #define BIT13_17        0x000000000003E000ULL
  48 #define BIT18_19        0x00000000000C0000ULL
  49 #define BIT11_12        0x0000000000001800ULL
  50 
  51 struct ce_name2type {
  52         const char *name;
  53         ce_dispact_t type;
  54 };
  55 
  56 nvlist_t *fru_nvl;
  57 
  58 static ce_dispact_t
  59 gmem_mem_name2type(const char *name)
  60 {
  61         static const struct ce_name2type new[] = {
  62                 { "mem-unk",            CE_DISP_UNKNOWN },
  63                 { "mem-is",             CE_DISP_INTERMITTENT },
  64                 { "mem-cs",             CE_DISP_PERS },
  65                 { "mem-ss",             CE_DISP_STICKY },
  66                 { NULL }
  67         };
  68         const struct ce_name2type *names = &new[0];
  69         const struct ce_name2type *tp;
  70 
  71         for (tp = names; tp->name != NULL; tp++) {
  72                 if (strcasecmp(name, tp->name) == 0)
  73                         return (tp->type);
  74         }
  75 
  76         return (CE_DISP_UNKNOWN);
  77 }
  78 
  79 /*ARGSUSED*/
  80 static int
  81 find_fault_fru(topo_hdl_t *thp, tnode_t *node, void *arg)
  82 {
  83         nvlist_t *nvl = (nvlist_t *)arg;
  84         nvlist_t *rsc = NULL, *fru = NULL;
  85         nvlist_t **hcl, **topo_hcl;
  86         uint_t n1, n2;
  87         char *name, *name1, *name2;
  88         char *id1, *id2;
  89         int err, i;
  90 
  91         if (topo_node_resource(node, &rsc, &err) < 0)
  92                 return (TOPO_WALK_NEXT);
  93 
  94         err = nvlist_lookup_nvlist_array(rsc, FM_FMRI_HC_LIST, &topo_hcl, &n1);
  95 
  96         if (err != 0) {
  97                 nvlist_free(rsc);
  98                 return (TOPO_WALK_NEXT);
  99         }
 100 
 101         (void) nvlist_lookup_string(topo_hcl[n1 - 1], FM_FMRI_HC_NAME, &name);
 102         if (strcmp(name, "chip") != 0) {
 103                 nvlist_free(rsc);
 104                 return (TOPO_WALK_NEXT);
 105         }
 106 
 107         (void) nvlist_lookup_nvlist_array(nvl, FM_FMRI_HC_LIST, &hcl, &n2);
 108 
 109         if (n1 != n2) {
 110                 nvlist_free(rsc);
 111                 return (TOPO_WALK_NEXT);
 112         }
 113 
 114         for (i = 0; i < n1; i++) {
 115                 (void) nvlist_lookup_string(topo_hcl[i], FM_FMRI_HC_NAME,
 116                     &name1);
 117                 (void) nvlist_lookup_string(topo_hcl[i], FM_FMRI_HC_ID, &id1);
 118                 (void) nvlist_lookup_string(hcl[i], FM_FMRI_HC_NAME, &name2);
 119                 (void) nvlist_lookup_string(hcl[i], FM_FMRI_HC_ID, &id2);
 120                 if (strcmp(name1, name2) != 0 || strcmp(id1, id2) != 0) {
 121                         nvlist_free(rsc);
 122                         return (TOPO_WALK_NEXT);
 123                 }
 124         }
 125 
 126         (void) topo_node_fru(node, &fru, NULL, &err);
 127         if (fru != NULL) {
 128                 (void) nvlist_dup(fru, &fru_nvl, NV_UNIQUE_NAME);
 129                 nvlist_free(fru);
 130         }
 131         nvlist_free(rsc);
 132         return (TOPO_WALK_TERMINATE);
 133 }
 134 
 135 nvlist_t *
 136 gmem_find_fault_fru(fmd_hdl_t *hdl, nvlist_t *nvl) {
 137         topo_hdl_t *thp;
 138         topo_walk_t *twp;
 139         int err;
 140         fru_nvl = NULL;
 141 
 142         if ((thp = fmd_hdl_topo_hold(hdl, TOPO_VERSION)) == NULL)
 143                 return (NULL);
 144 
 145         if ((twp = topo_walk_init(thp, FM_FMRI_SCHEME_HC,
 146             find_fault_fru, nvl, &err)) == NULL) {
 147                 fmd_hdl_topo_rele(hdl, thp);
 148                 return (NULL);
 149         }
 150 
 151         (void) topo_walk_step(twp, TOPO_WALK_CHILD);
 152         topo_walk_fini(twp);
 153         fmd_hdl_topo_rele(hdl, thp);
 154         return (fru_nvl);
 155 }
 156 
 157 /*
 158  * fault the FRU of the common detector between two DIMMs
 159  */
 160 void
 161 gmem_gen_datapath_fault(fmd_hdl_t *hdl, nvlist_t *det)
 162 {
 163         char *name, *id;
 164         nvlist_t **hcl1, **hcl;
 165         uint_t n;
 166         int i, j;
 167         fmd_case_t *cp;
 168         nvlist_t *fltlist, *rsrc;
 169         nvlist_t *fru = NULL;
 170 
 171         if (nvlist_lookup_nvlist_array(det, FM_FMRI_HC_LIST, &hcl1, &n) < 0)
 172                 return;
 173 
 174         for (i = 0; i < n; i++) {
 175                 (void) nvlist_lookup_string(hcl1[i], FM_FMRI_HC_NAME, &name);
 176                 if (strcmp(name, "chip") == 0)
 177                         break;
 178         }
 179 
 180         n = i + 1;
 181         hcl = fmd_hdl_zalloc(hdl, sizeof (nvlist_t *) * n, FMD_SLEEP);
 182         if (hcl == NULL)
 183                 return;
 184 
 185         for (i = 0; i < n; i++) {
 186                 (void) nvlist_alloc(&hcl[i],
 187                     NV_UNIQUE_NAME|NV_UNIQUE_NAME_TYPE, 0);
 188         }
 189 
 190         for (i = 0, j = 0; i < n; i++) {
 191                 (void) nvlist_lookup_string(hcl1[i], FM_FMRI_HC_NAME, &name);
 192                 (void) nvlist_lookup_string(hcl1[i], FM_FMRI_HC_ID, &id);
 193                 (void) nvlist_add_string(hcl[j], FM_FMRI_HC_NAME, name);
 194                 (void) nvlist_add_string(hcl[j], FM_FMRI_HC_ID, id);
 195                 j++;
 196                 if (strcmp(name, "chip") == 0)
 197                         break;
 198         }
 199 
 200         if (nvlist_alloc(&rsrc,  NV_UNIQUE_NAME|NV_UNIQUE_NAME_TYPE, 0) != 0) {
 201                 for (i = 0; i < n; i++) {
 202                         if (hcl[i] != NULL)
 203                                 nvlist_free(hcl[i]);
 204                 }
 205                 fmd_hdl_free(hdl, hcl, sizeof (nvlist_t *) * n);
 206         }
 207 
 208         if (nvlist_add_uint8(rsrc, FM_VERSION, FM_HC_SCHEME_VERSION) != 0 ||
 209             nvlist_add_string(rsrc, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC) != 0 ||
 210             nvlist_add_string(rsrc, FM_FMRI_HC_ROOT, "") != 0 ||
 211             nvlist_add_uint32(rsrc, FM_FMRI_HC_LIST_SZ, n) != 0 ||
 212             nvlist_add_nvlist_array(rsrc, FM_FMRI_HC_LIST, hcl, n) != 0) {
 213                 for (i = 0; i < n; i++) {
 214                         if (hcl[i] != NULL)
 215                                 nvlist_free(hcl[i]);
 216                 }
 217                 fmd_hdl_free(hdl, hcl, sizeof (nvlist_t *) * n);
 218                 nvlist_free(rsrc);
 219         }
 220 
 221         fru = gmem_find_fault_fru(hdl, rsrc);
 222         if (fru != NULL) {
 223                 cp = fmd_case_open(hdl, NULL);
 224                 fltlist = fmd_nvl_create_fault(hdl, "fault.memory.datapath",
 225                     100, fru, fru, fru);
 226                 fmd_case_add_suspect(hdl, cp, fltlist);
 227                 fmd_case_solve(hdl, cp);
 228                 nvlist_free(fru);
 229         }
 230 
 231         for (i = 0; i < n; i++) {
 232                 if (hcl[i] != NULL)
 233                         nvlist_free(hcl[i]);
 234         }
 235 
 236         fmd_hdl_free(hdl, hcl, sizeof (nvlist_t *) * n);
 237         nvlist_free(rsrc);
 238 }
 239 
 240 /*
 241  * formula to conver an unhashed address to hashed address
 242  * PA[17:11] = (PA[32:28] xor PA[17:13]) :: ((PA[19:18] xor PA[12:11])
 243  */
 244 static void
 245 gmem_to_hashed_addr(uint64_t *addr, uint64_t afar)
 246 {
 247 
 248         *addr = (afar & OFFBIT) | ((afar & BIT28_32) >> 15) ^ (afar & BIT13_17)
 249             | ((afar & BIT18_19) >> 7) ^ (afar & BIT11_12);
 250 }
 251 
 252 /*
 253  * check if a dimm has n CEs that have the same symbol-in-error
 254  */
 255 int
 256 upos_thresh_check(gmem_dimm_t *dimm, uint16_t upos, uint32_t threshold)
 257 {
 258         int i;
 259         gmem_mq_t *ip, *next;
 260         int count = 0;
 261 
 262         for (i = 0; i < GMEM_MAX_CKWDS; i++) {
 263                 for (ip = gmem_list_next(&dimm->mq_root[i]); ip != NULL;
 264                     ip = next) {
 265                         next = gmem_list_next(ip);
 266                         if (ip->mq_unit_position == upos) {
 267                                 count++;
 268                                 if (count >= threshold)
 269                                         return (1);
 270                         }
 271                 }
 272         }
 273         return (0);
 274 }
 275 
 276 /*
 277  * check if smaller number of retired pages > 1/16 of larger number of
 278  * retired pages
 279  */
 280 int
 281 check_bad_rw_retired_pages(fmd_hdl_t *hdl, gmem_dimm_t *d1, gmem_dimm_t *d2)
 282 {
 283         uint_t sret, lret;
 284         double ratio;
 285 
 286         sret = lret = 0;
 287 
 288         if (d2->dimm_nretired < d1->dimm_nretired) {
 289                 sret = d2->dimm_nretired;
 290                 lret = d1->dimm_nretired;
 291         } else if (d2->dimm_nretired > d1->dimm_nretired) {
 292                 sret = d1->dimm_nretired;
 293                 lret = d2->dimm_nretired;
 294         } else
 295                 return (0);
 296 
 297         ratio = lret * GMEM_MQ_RATIO;
 298 
 299         if (sret > ratio) {
 300                 fmd_hdl_debug(hdl, "sret=%d lret=%d ratio=%.3f",
 301                     sret, lret, ratio);
 302                 return (1);
 303         }
 304         return (0);
 305 }
 306 
 307 /*
 308  * check bad rw on any two DIMMs. The check succeeds if
 309  * - each DIMM has a n CEs which have the same symbol-in-error,
 310  * - the smaller number of retired pages > 1/16 larger number of retired pages
 311  */
 312 static int
 313 check_bad_rw_between_dimms(fmd_hdl_t *hdl, gmem_dimm_t *d1, gmem_dimm_t *d2,
 314     uint16_t *rupos)
 315 {
 316         int i;
 317         gmem_mq_t *ip, *next;
 318         uint16_t upos;
 319 
 320         for (i = 0; i < GMEM_MAX_CKWDS; i++) {
 321                 for (ip = gmem_list_next(&d1->mq_root[i]); ip != NULL;
 322                     ip = next) {
 323                         next = gmem_list_next(ip);
 324                         upos = ip->mq_unit_position;
 325                         if (upos_thresh_check(d1, upos, gmem.gm_nupos)) {
 326                                 if (upos_thresh_check(d2, upos,
 327                                     gmem.gm_nupos)) {
 328                                         if (check_bad_rw_retired_pages(hdl,
 329                                             d1, d2)) {
 330                                                 *rupos = upos;
 331                                                 return (1);
 332                                         }
 333                                 }
 334                         }
 335                 }
 336         }
 337 
 338         return (0);
 339 }
 340 
 341 static void
 342 bad_reader_writer_check(fmd_hdl_t *hdl, nvlist_t *det, gmem_dimm_t *ce_dimm)
 343 {
 344         gmem_dimm_t *d, *next;
 345         uint16_t upos;
 346 
 347         for (d = gmem_list_next(&gmem.gm_dimms); d != NULL; d = next) {
 348                 next = gmem_list_next(d);
 349                 if (d == ce_dimm)
 350                         continue;
 351                 if (!gmem_same_datapath_dimms(hdl, ce_dimm, d))
 352                         continue;
 353                 if (check_bad_rw_between_dimms(hdl, ce_dimm, d, &upos)) {
 354                         gmem_gen_datapath_fault(hdl, det);
 355                         gmem_save_symbol_error(hdl, ce_dimm, upos);
 356                         fmd_hdl_debug(hdl,
 357                             "check_bad_rw_dimms succeeded: %s %s\n",
 358                             ce_dimm->dimm_serial, d->dimm_serial);
 359                         return;
 360                 }
 361         }
 362 }
 363 
 364 /*
 365  * rule 5a checking. The check succeeds if
 366  * - nretired >= 512
 367  * - nretired >= 128 and (addr_hi - addr_low) / (nretired -1 ) > 512KB
 368  */
 369 static void
 370 ce_thresh_check(fmd_hdl_t *hdl, gmem_dimm_t *dimm)
 371 {
 372         nvlist_t *flt, *rsrc;
 373         fmd_case_t *cp;
 374         uint_t nret;
 375         uint64_t delta_addr = 0;
 376 
 377         if (dimm->dimm_flags & GMEM_F_FAULTING)
 378                 return;
 379 
 380         nret = dimm->dimm_nretired;
 381 
 382         if (nret < gmem.gm_low_ce_thresh)
 383                 return;
 384 
 385         if (dimm->dimm_phys_addr_hi >= dimm->dimm_phys_addr_low)
 386                 delta_addr =
 387                     (dimm->dimm_phys_addr_hi - dimm->dimm_phys_addr_low) /
 388                     (nret - 1);
 389 
 390         if (nret >= gmem.gm_max_retired_pages || delta_addr > GMEM_MQ_512KB) {
 391 
 392                 fmd_hdl_debug(hdl, "ce_thresh_check succeeded nret=%d", nret);
 393                 dimm->dimm_flags |= GMEM_F_FAULTING;
 394                 gmem_dimm_dirty(hdl, dimm);
 395 
 396                 cp = fmd_case_open(hdl, NULL);
 397                 rsrc = gmem_find_dimm_rsc(hdl, dimm->dimm_serial);
 398                 flt = fmd_nvl_create_fault(hdl, GMEM_FAULT_DIMM_PAGES,
 399                     GMEM_FLTMAXCONF, NULL, gmem_dimm_fru(dimm), rsrc);
 400                 fmd_case_add_suspect(hdl, cp, flt);
 401                 fmd_case_solve(hdl, cp);
 402                 if (rsrc != NULL)
 403                         nvlist_free(rsrc);
 404         }
 405 }
 406 
 407 /*
 408  * rule 5b checking. The check succeeds if more than 120
 409  * non-intermittent CEs are reported against one symbol
 410  * position of one afar in 72 hours
 411  */
 412 static void
 413 mq_5b_check(fmd_hdl_t *hdl, gmem_dimm_t *dimm)
 414 {
 415         nvlist_t *flt, *rsrc;
 416         fmd_case_t *cp;
 417         gmem_mq_t *ip, *next;
 418         int cw;
 419 
 420         for (cw = 0; cw < GMEM_MAX_CKWDS; cw++) {
 421                 for (ip = gmem_list_next(&dimm->mq_root[cw]);
 422                     ip != NULL; ip = next) {
 423                         next = gmem_list_next(ip);
 424                         if (ip->mq_dupce_count >= gmem.gm_dupce) {
 425                                 fmd_hdl_debug(hdl,
 426                                     "mq_5b_check succeeded: duplicate CE=%d",
 427                                     ip->mq_dupce_count);
 428                                 cp = fmd_case_open(hdl, NULL);
 429                                 rsrc = gmem_find_dimm_rsc(hdl,
 430                                     dimm->dimm_serial);
 431                                 flt = fmd_nvl_create_fault(hdl,
 432                                     GMEM_FAULT_DIMM_PAGES, GMEM_FLTMAXCONF,
 433                                     NULL, gmem_dimm_fru(dimm), rsrc);
 434                                 dimm->dimm_flags |= GMEM_F_FAULTING;
 435                                 gmem_dimm_dirty(hdl, dimm);
 436                                 fmd_case_add_suspect(hdl, cp, flt);
 437                                 fmd_case_solve(hdl, cp);
 438                                 if (rsrc != NULL)
 439                                         nvlist_free(rsrc);
 440                                 return;
 441                         }
 442                 }
 443         }
 444 }
 445 
 446 /*
 447  * delete the expired duplicate CE time stamps
 448  */
 449 static void
 450 mq_prune_dup(fmd_hdl_t *hdl, gmem_mq_t *ip, uint64_t now)
 451 {
 452         tstamp_t *tsp, *next;
 453 
 454         for (tsp = gmem_list_next(&ip->mq_dupce_tstamp); tsp != NULL;
 455             tsp = next) {
 456                 next = gmem_list_next(tsp);
 457                 if (tsp->tstamp < now - GMEM_MQ_TIMELIM) {
 458                         gmem_list_delete(&ip->mq_dupce_tstamp, &tsp->ts_l);
 459                         fmd_hdl_free(hdl, tsp, sizeof (tstamp_t));
 460                         ip->mq_dupce_count--;
 461                 }
 462         }
 463 }
 464 
 465 static void
 466 mq_update(fmd_hdl_t *hdl, fmd_event_t *ep, gmem_mq_t *ip, uint64_t now)
 467 {
 468         tstamp_t *tsp;
 469 
 470         ip->mq_tstamp = now;
 471         ip->mq_ep = ep;
 472         if (fmd_serd_exists(hdl, ip->mq_serdnm))
 473                 fmd_serd_destroy(hdl, ip->mq_serdnm);
 474 
 475         fmd_serd_create(hdl, ip->mq_serdnm, GMEM_MQ_SERDN, GMEM_MQ_SERDT);
 476         (void) fmd_serd_record(hdl, ip->mq_serdnm, ep);
 477 
 478         tsp = fmd_hdl_zalloc(hdl, sizeof (tstamp_t), FMD_SLEEP);
 479         tsp->tstamp = now;
 480         gmem_list_append(&ip->mq_dupce_tstamp, tsp);
 481         ip->mq_dupce_count++;
 482 }
 483 
 484 /*
 485  * Create a fresh index block for MQSC CE correlation.
 486  */
 487 gmem_mq_t *
 488 mq_create(fmd_hdl_t *hdl, fmd_event_t *ep,
 489     uint64_t afar, uint16_t upos, uint16_t ckwd, uint64_t now)
 490 {
 491         gmem_mq_t *cp;
 492         tstamp_t *tsp;
 493 
 494         cp = fmd_hdl_zalloc(hdl, sizeof (gmem_mq_t), FMD_SLEEP);
 495         cp->mq_tstamp = now;
 496         cp->mq_ckwd = ckwd;
 497         cp->mq_phys_addr = afar;
 498         cp->mq_unit_position = upos;
 499         cp->mq_ep = ep;
 500         cp->mq_serdnm =
 501             gmem_mq_serdnm_create(hdl, "mq", afar, ckwd, upos);
 502 
 503         tsp = fmd_hdl_zalloc(hdl, sizeof (tstamp_t), FMD_SLEEP);
 504         tsp->tstamp = now;
 505         gmem_list_append(&cp->mq_dupce_tstamp, tsp);
 506         cp->mq_dupce_count = 1;
 507 
 508         /*
 509          * Create SERD to keep this event from being removed
 510          * by fmd which may not know there is an event pointer
 511          * saved here. This SERD is *never* meant to fire.
 512          */
 513         if (fmd_serd_exists(hdl, cp->mq_serdnm))
 514                 fmd_serd_destroy(hdl, cp->mq_serdnm);
 515 
 516         fmd_serd_create(hdl, cp->mq_serdnm, GMEM_MQ_SERDN, GMEM_MQ_SERDT);
 517         (void) fmd_serd_record(hdl, cp->mq_serdnm, ep);
 518 
 519         return (cp);
 520 }
 521 
 522 gmem_mq_t *
 523 mq_destroy(fmd_hdl_t *hdl, gmem_list_t *lp, gmem_mq_t *ip)
 524 {
 525         gmem_mq_t *jp = gmem_list_next(ip);
 526         tstamp_t *tsp, *next;
 527 
 528 
 529         if (ip->mq_serdnm != NULL) {
 530                 if (fmd_serd_exists(hdl, ip->mq_serdnm))
 531                         fmd_serd_destroy(hdl, ip->mq_serdnm);
 532                 fmd_hdl_strfree(hdl, ip->mq_serdnm);
 533                 ip->mq_serdnm = NULL;
 534         }
 535 
 536         for (tsp = gmem_list_next(&ip->mq_dupce_tstamp); tsp != NULL;
 537             tsp = next) {
 538                 next = gmem_list_next(tsp);
 539                 gmem_list_delete(&ip->mq_dupce_tstamp, &tsp->ts_l);
 540                 fmd_hdl_free(hdl, tsp, sizeof (tstamp_t));
 541         }
 542 
 543         gmem_list_delete(lp, &ip->mq_l);
 544         fmd_hdl_free(hdl, ip, sizeof (gmem_mq_t));
 545 
 546         return (jp);
 547 }
 548 
 549 
 550 /*
 551  * Add an index block for a new CE, sorted
 552  * a) by ascending unit position
 553  * b) order of arrival (~= time order)
 554  */
 555 void
 556 mq_add(fmd_hdl_t *hdl, gmem_dimm_t *dimm, fmd_event_t *ep,
 557     uint64_t afar, uint16_t unit_position, uint16_t ckwd,
 558     uint64_t now)
 559 {
 560         gmem_mq_t *ip, *jp;
 561         int cw = (int)ckwd;
 562 
 563         for (ip = gmem_list_next(&dimm->mq_root[cw]); ip != NULL; ) {
 564                 if (ip->mq_unit_position > unit_position) {
 565                         /* list is in unit position order */
 566                         break;
 567                 } else if (ip->mq_unit_position == unit_position &&
 568                     ip->mq_phys_addr == afar) {
 569                         /*
 570                          * Found a duplicate cw, unit_position, and afar.
 571                          * Delete this node, to be superseded by the new
 572                          * node added below.
 573                          * update the mq_t structure
 574                          */
 575                         mq_update(hdl, ep, ip, now);
 576                         return;
 577                 } else {
 578                         ip = gmem_list_next(ip);
 579                 }
 580         }
 581 
 582         jp = mq_create(hdl, ep, afar, unit_position, cw, now);
 583         if (ip == NULL)
 584                 gmem_list_append(&dimm->mq_root[cw], jp);
 585         else
 586                 gmem_list_insert_before(&dimm->mq_root[cw], ip, jp);
 587 }
 588 
 589 /*
 590  * Prune the MQSC index lists (one for each checkword), by deleting
 591  * outdated index blocks from each list.
 592  */
 593 
 594 void
 595 mq_prune(fmd_hdl_t *hdl, gmem_dimm_t *dimm, uint64_t now)
 596 {
 597         gmem_mq_t *ip;
 598         int cw;
 599 
 600         for (cw = 0; cw < GMEM_MAX_CKWDS; cw++) {
 601                 for (ip = gmem_list_next(&dimm->mq_root[cw]); ip != NULL; ) {
 602                         if (ip->mq_tstamp < now - GMEM_MQ_TIMELIM) {
 603                                 /*
 604                                  * This event has timed out - delete the
 605                                  * mq block as well as serd for the event.
 606                                  */
 607                                 ip = mq_destroy(hdl, &dimm->mq_root[cw], ip);
 608                         } else {
 609                                 mq_prune_dup(hdl, ip, now);
 610                                 /* tstamp < now - ce_t */
 611                                 ip = gmem_list_next(ip);
 612                         }
 613                 } /* per checkword */
 614         } /* cw = 0...3 */
 615 }
 616 
 617 /*
 618  * Check the MQSC index lists (one for each checkword) by making a
 619  * complete pass through each list, checking if the criteria for
 620  * Rule 4A has been met.  Rule 4A checking is done for each checkword.
 621  *
 622  * Rule 4A: fault a DIMM  "whenever Solaris reports two or more CEs from
 623  * two or more different physical addresses on each of two or more different
 624  * bit positions from the same DIMM within 72 hours of each other, and all
 625  * the addresses are in the same relative checkword (that is, the AFARs
 626  * are all the same modulo 64).  [Note: This means at least 4 CEs; two
 627  * from one bit position, with unique addresses, and two from another,
 628  * also with unique addresses, and the lower 6 bits of all the addresses
 629  * are the same."
 630  */
 631 
 632 void
 633 mq_check(fmd_hdl_t *hdl, gmem_dimm_t *dimm)
 634 {
 635         int upos_pairs, curr_upos, cw, i, j;
 636         nvlist_t *flt, *rsc;
 637         typedef struct upos_pair {
 638                 int upos;
 639                 gmem_mq_t *mq1;
 640                 gmem_mq_t *mq2;
 641         } upos_pair_t;
 642         upos_pair_t upos_array[16]; /* max per cw = 2, * 8 cw's */
 643         gmem_mq_t *ip;
 644 
 645         /*
 646          * Each upos_array[] member represents a pair of CEs for the same
 647          * unit position (symbol) which is a 4 bit nibble.
 648          * MQSC rule 4 requires pairs of CEs from the same symbol (same DIMM
 649          * for rule 4A, and same DRAM for rule 4B) for a violation - this
 650          * is why CE pairs are tracked.
 651          */
 652         upos_pairs = 0;
 653         upos_array[0].mq1 = NULL;
 654 
 655         for (cw = 0; cw < GMEM_MAX_CKWDS; cw++) {
 656                 i = upos_pairs;
 657                 curr_upos = -1;
 658 
 659                 /*
 660                  * mq_root[] is an array of cumulative lists of CEs
 661                  * indexed by checkword where the list is in unit position
 662                  * order. Loop through checking for duplicate unit position
 663                  * entries (filled in at mq_create()).
 664                  * The upos_array[] is filled in each time a duplicate
 665                  * unit position is found; the first time through the loop
 666                  * of a unit position sets curr_upos but does not fill in
 667                  * upos_array[] until the second symbol is found.
 668                  */
 669                 for (ip = gmem_list_next(&dimm->mq_root[cw]); ip != NULL;
 670                     ip = gmem_list_next(ip)) {
 671                         if (curr_upos != ip->mq_unit_position) {
 672                                 /* Set initial current position */
 673                                 curr_upos = ip->mq_unit_position;
 674                         } else if (i > upos_pairs &&
 675                             curr_upos == upos_array[i-1].upos) {
 676                                 /*
 677                                  * Only keep track of CE pairs; skip
 678                                  * triples, quads, etc...
 679                                  */
 680                                 continue;
 681                         } else if (upos_array[i].mq1 == NULL) {
 682                                 /* Have a pair. Add to upos_array[] */
 683                                 fmd_hdl_debug(hdl, "pair:upos=%d",
 684                                     curr_upos);
 685                                 upos_array[i].upos = curr_upos;
 686                                 upos_array[i].mq1 = gmem_list_prev(ip);
 687                                 upos_array[i].mq2 = ip;
 688                                 upos_array[++i].mq1 = NULL;
 689                         }
 690                 }
 691                 if (i - upos_pairs >= 2) {
 692                         /* Rule 4A violation */
 693                         rsc = gmem_find_dimm_rsc(hdl, dimm->dimm_serial);
 694                         flt = fmd_nvl_create_fault(hdl, GMEM_FAULT_DIMM_4A,
 695                             GMEM_FLTMAXCONF, NULL, gmem_dimm_fru(dimm), rsc);
 696                         for (j = upos_pairs; j < i; j++) {
 697                                 fmd_case_add_ereport(hdl,
 698                                     dimm->dimm_case.cc_cp,
 699                                     upos_array[j].mq1->mq_ep);
 700                                 fmd_case_add_ereport(hdl,
 701                                     dimm->dimm_case.cc_cp,
 702                                     upos_array[j].mq2->mq_ep);
 703                         }
 704                         dimm->dimm_flags |= GMEM_F_FAULTING;
 705                         gmem_dimm_dirty(hdl, dimm);
 706                         fmd_case_add_suspect(hdl, dimm->dimm_case.cc_cp, flt);
 707                         fmd_case_solve(hdl, dimm->dimm_case.cc_cp);
 708                         if (rsc != NULL)
 709                                 nvlist_free(rsc);
 710                         return;
 711                 }
 712                 upos_pairs = i;
 713                 assert(upos_pairs < 16);
 714         }
 715 }
 716 
 717 /*ARGSUSED*/
 718 gmem_evdisp_t
 719 gmem_ce(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
 720 {
 721         uint16_t symbol_pos, cw;
 722         uint64_t phyaddr, offset, addr;
 723         uint32_t filter_ratio = 0;
 724         gmem_dimm_t *dimm;
 725         gmem_page_t *page;
 726         nvlist_t *fru = NULL;
 727         nvlist_t *topo_rsc = NULL;
 728         nvlist_t *rsrc, *det;
 729         const char *uuid;
 730         ce_dispact_t type;
 731         boolean_t diagnose;
 732         char *sn;
 733         int err, rc;
 734         uint64_t *now;
 735         uint_t nelem;
 736         int skip_error = 0;
 737 
 738         err = nvlist_lookup_boolean_value(nvl, GMEM_ERPT_PAYLOAD_DIAGNOSE,
 739             &diagnose);
 740         if (err != 0 || diagnose == 0)
 741                 return (GMEM_EVD_UNUSED);
 742 
 743         if ((nvlist_lookup_uint64(nvl, GMEM_ERPT_PAYLOAD_PHYSADDR,
 744             &phyaddr) != 0) ||
 745             (nvlist_lookup_uint64(nvl, GMEM_ERPT_PAYLOAD_OFFSET,
 746             &offset) != 0)) {
 747                 fmd_hdl_debug(hdl, "Can't get page phyaddr or offset");
 748                 return (GMEM_EVD_BAD);
 749         }
 750 
 751         fmd_hdl_debug(hdl, "phyaddr %llx offset %llx", phyaddr, offset);
 752 
 753         if ((page = gmem_page_lookup(phyaddr)) != NULL &&
 754             page->page_case.cc_cp != NULL &&
 755             fmd_case_solved(hdl, page->page_case.cc_cp))
 756                 return (GMEM_EVD_REDUND);
 757 
 758         if (nvlist_lookup_nvlist(nvl, GMEM_ERPT_PAYLOAD_RESOURCE,
 759             &rsrc) != 0 ||
 760             nvlist_lookup_string(rsrc, FM_FMRI_HC_SERIAL_ID, &sn) != 0) {
 761                 fmd_hdl_debug(hdl, "Can't get dimm serial\n");
 762                 return (GMEM_EVD_BAD);
 763         }
 764 
 765         fmd_hdl_debug(hdl, "serial %s", sn);
 766 
 767         if (nvlist_lookup_nvlist(nvl, GMEM_ERPT_PAYLOAD_DETECTOR, &det) != 0)
 768                 return (GMEM_EVD_BAD);
 769 
 770         /*
 771          * Find dimm fru by serial number.
 772          */
 773         fru = gmem_find_dimm_fru(hdl, sn);
 774 
 775         if (fru == NULL) {
 776                 fmd_hdl_debug(hdl, "Dimm is not present\n");
 777                 return (GMEM_EVD_UNUSED);
 778         }
 779 
 780         if ((dimm = gmem_dimm_lookup(hdl, fru)) == NULL &&
 781             (dimm = gmem_dimm_create(hdl, fru, det)) == NULL) {
 782                 nvlist_free(fru);
 783                 return (GMEM_EVD_UNUSED);
 784         }
 785 
 786         if (dimm->dimm_case.cc_cp == NULL) {
 787                 dimm->dimm_case.cc_cp = gmem_case_create(hdl,
 788                     &dimm->dimm_header, GMEM_PTR_DIMM_CASE, &uuid);
 789         }
 790 
 791         /*
 792          * Add to MQSC correlation lists all CEs which pass validity
 793          * checks above. If there is no symbol_pos & relative ckword
 794          * in the ereport, skip rule 4A checking.
 795          */
 796 
 797         err = nvlist_lookup_uint16(nvl, GMEM_ERPT_PAYLOAD_SYMBOLPOS,
 798             &symbol_pos);
 799         err |= nvlist_lookup_uint16(nvl, GMEM_ERPT_PAYLOAD_CKW, &cw);
 800 
 801         if (err == 0) {
 802                 fmd_hdl_debug(hdl, "symbol_pos=%d cw=%d", symbol_pos, cw);
 803 
 804                 if (nvlist_lookup_uint64_array(nvl,
 805                     "__tod", &now, &nelem) == 0) {
 806                         skip_error = gmem_check_symbol_error(hdl, dimm,
 807                             symbol_pos);
 808 
 809                         if (!skip_error ||
 810                             !(dimm->dimm_flags & GMEM_F_FAULTING))
 811                                 mq_add(hdl, dimm, ep, phyaddr, symbol_pos,
 812                                     cw, *now);
 813 
 814                         mq_prune(hdl, dimm, *now);
 815 
 816                         if (!skip_error)
 817                                 bad_reader_writer_check(hdl, det, dimm);
 818                         if (!(dimm->dimm_flags & GMEM_F_FAULTING)) {
 819                                 mq_check(hdl, dimm);
 820                                 mq_5b_check(hdl, dimm);
 821                         }
 822                 }
 823         }
 824 
 825         type = gmem_mem_name2type(strstr(class, "mem"));
 826 
 827         switch (type) {
 828         case CE_DISP_UNKNOWN:
 829                 GMEM_STAT_BUMP(ce_unknown);
 830                 nvlist_free(fru);
 831                 return (GMEM_EVD_UNUSED);
 832         case CE_DISP_INTERMITTENT:
 833                 GMEM_STAT_BUMP(ce_interm);
 834                 nvlist_free(fru);
 835                 return (GMEM_EVD_UNUSED);
 836         case CE_DISP_PERS:
 837                 GMEM_STAT_BUMP(ce_clearable_persis);
 838                 break;
 839         case CE_DISP_STICKY:
 840                 GMEM_STAT_BUMP(ce_sticky);
 841                 break;
 842         default:
 843                 nvlist_free(fru);
 844                 return (GMEM_EVD_BAD);
 845         }
 846 
 847         if (gmem_check_symbol_error(hdl, dimm, symbol_pos)) {
 848                 nvlist_free(fru);
 849                 return (GMEM_EVD_REDUND);
 850         }
 851 
 852         if (page == NULL) {
 853                 page = gmem_page_create(hdl, fru, phyaddr, offset);
 854                 if (page == NULL) {
 855                         nvlist_free(fru);
 856                         return (GMEM_EVD_UNUSED);
 857                 }
 858         }
 859 
 860         nvlist_free(fru);
 861 
 862         if (page->page_case.cc_cp == NULL) {
 863                 page->page_case.cc_cp = gmem_case_create(hdl,
 864                     &page->page_header, GMEM_PTR_PAGE_CASE, &uuid);
 865         }
 866 
 867         switch (type) {
 868         case CE_DISP_PERS:
 869                 fmd_hdl_debug(hdl, "adding persistent event to CE serd");
 870                 if (page->page_case.cc_serdnm == NULL)
 871                         gmem_page_serd_create(hdl, page, nvl);
 872 
 873                 filter_ratio = gmem_get_serd_filter_ratio(nvl);
 874 
 875                 fmd_hdl_debug(hdl, "filter_ratio %d\n", filter_ratio);
 876 
 877                 if (gmem_serd_record(hdl, page->page_case.cc_serdnm,
 878                     filter_ratio, ep) == FMD_B_FALSE) {
 879                                 return (GMEM_EVD_OK); /* engine hasn't fired */
 880                 }
 881 
 882                 fmd_hdl_debug(hdl, "ce page serd fired\n");
 883                 fmd_case_add_serd(hdl, page->page_case.cc_cp,
 884                     page->page_case.cc_serdnm);
 885                 fmd_serd_reset(hdl, page->page_case.cc_serdnm);
 886                 break;  /* to retire */
 887 
 888         case CE_DISP_STICKY:
 889                 fmd_case_add_ereport(hdl, page->page_case.cc_cp, ep);
 890                 break;  /* to retire */
 891         }
 892 
 893 
 894         topo_rsc = gmem_find_dimm_rsc(hdl, dimm->dimm_serial);
 895         rc = gmem_page_fault(hdl, gmem_dimm_fru(dimm), topo_rsc,
 896             ep, phyaddr, offset);
 897 
 898         if (rc) {
 899                 gmem_to_hashed_addr(&addr, phyaddr);
 900 
 901                 if (addr > dimm->dimm_phys_addr_hi)
 902                         dimm->dimm_phys_addr_hi = addr;
 903                 if (addr < dimm->dimm_phys_addr_low)
 904                         dimm->dimm_phys_addr_low = addr;
 905 
 906                 dimm->dimm_nretired++;
 907                 dimm->dimm_retstat.fmds_value.ui64++;
 908                 gmem_dimm_dirty(hdl, dimm);
 909                 ce_thresh_check(hdl, dimm);
 910         }
 911         return (GMEM_EVD_OK);
 912 }
 913 
 914 void
 915 gmem_dimm_close(fmd_hdl_t *hdl, void *arg)
 916 {
 917         gmem_dimm_destroy(hdl, arg);
 918 }