1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 
  26 /*
  27  * Ereport-handling routines for memory errors
  28  */
  29 
  30 #include <gmem_mem.h>
  31 #include <gmem_dimm.h>
  32 #include <gmem_page.h>
  33 #include <gmem.h>
  34 
  35 #include <strings.h>
  36 #include <string.h>
  37 #include <errno.h>
  38 #include <assert.h>
  39 #include <fm/fmd_api.h>
  40 #include <fm/libtopo.h>
  41 #include <sys/fm/protocol.h>
  42 #include <sys/async.h>
  43 #include <sys/errclassify.h>
  44 
  45 #define OFFBIT          0xFFFFFFFFFFFC07FFULL
  46 #define BIT28_32        0x00000001F0000000ULL
  47 #define BIT13_17        0x000000000003E000ULL
  48 #define BIT18_19        0x00000000000C0000ULL
  49 #define BIT11_12        0x0000000000001800ULL
  50 
  51 struct ce_name2type {
  52         const char *name;
  53         ce_dispact_t type;
  54 };
  55 
  56 nvlist_t *fru_nvl;
  57 
  58 static ce_dispact_t
  59 gmem_mem_name2type(const char *name)
  60 {
  61         static const struct ce_name2type new[] = {
  62                 { "mem-unk",            CE_DISP_UNKNOWN },
  63                 { "mem-is",             CE_DISP_INTERMITTENT },
  64                 { "mem-cs",             CE_DISP_PERS },
  65                 { "mem-ss",             CE_DISP_STICKY },
  66                 { NULL }
  67         };
  68         const struct ce_name2type *names = &new[0];
  69         const struct ce_name2type *tp;
  70 
  71         for (tp = names; tp->name != NULL; tp++) {
  72                 if (strcasecmp(name, tp->name) == 0)
  73                         return (tp->type);
  74         }
  75 
  76         return (CE_DISP_UNKNOWN);
  77 }
  78 
  79 /*ARGSUSED*/
  80 static int
  81 find_fault_fru(topo_hdl_t *thp, tnode_t *node, void *arg)
  82 {
  83         nvlist_t *nvl = (nvlist_t *)arg;
  84         nvlist_t *rsc = NULL, *fru = NULL;
  85         nvlist_t **hcl, **topo_hcl;
  86         uint_t n1, n2;
  87         char *name, *name1, *name2;
  88         char *id1, *id2;
  89         int err, i;
  90 
  91         if (topo_node_resource(node, &rsc, &err) < 0)
  92                 return (TOPO_WALK_NEXT);
  93 
  94         err = nvlist_lookup_nvlist_array(rsc, FM_FMRI_HC_LIST, &topo_hcl, &n1);
  95 
  96         if (err != 0) {
  97                 nvlist_free(rsc);
  98                 return (TOPO_WALK_NEXT);
  99         }
 100 
 101         (void) nvlist_lookup_string(topo_hcl[n1 - 1], FM_FMRI_HC_NAME, &name);
 102         if (strcmp(name, "chip") != 0) {
 103                 nvlist_free(rsc);
 104                 return (TOPO_WALK_NEXT);
 105         }
 106 
 107         (void) nvlist_lookup_nvlist_array(nvl, FM_FMRI_HC_LIST, &hcl, &n2);
 108 
 109         if (n1 != n2) {
 110                 nvlist_free(rsc);
 111                 return (TOPO_WALK_NEXT);
 112         }
 113 
 114         for (i = 0; i < n1; i++) {
 115                 (void) nvlist_lookup_string(topo_hcl[i], FM_FMRI_HC_NAME,
 116                     &name1);
 117                 (void) nvlist_lookup_string(topo_hcl[i], FM_FMRI_HC_ID, &id1);
 118                 (void) nvlist_lookup_string(hcl[i], FM_FMRI_HC_NAME, &name2);
 119                 (void) nvlist_lookup_string(hcl[i], FM_FMRI_HC_ID, &id2);
 120                 if (strcmp(name1, name2) != 0 || strcmp(id1, id2) != 0) {
 121                         nvlist_free(rsc);
 122                         return (TOPO_WALK_NEXT);
 123                 }
 124         }
 125 
 126         (void) topo_node_fru(node, &fru, NULL, &err);
 127         if (fru != NULL) {
 128                 (void) nvlist_dup(fru, &fru_nvl, NV_UNIQUE_NAME);
 129                 nvlist_free(fru);
 130         }
 131         nvlist_free(rsc);
 132         return (TOPO_WALK_TERMINATE);
 133 }
 134 
 135 nvlist_t *
 136 gmem_find_fault_fru(fmd_hdl_t *hdl, nvlist_t *nvl) {
 137         topo_hdl_t *thp;
 138         topo_walk_t *twp;
 139         int err;
 140         fru_nvl = NULL;
 141 
 142         if ((thp = fmd_hdl_topo_hold(hdl, TOPO_VERSION)) == NULL)
 143                 return (NULL);
 144 
 145         if ((twp = topo_walk_init(thp, FM_FMRI_SCHEME_HC,
 146             find_fault_fru, nvl, &err)) == NULL) {
 147                 fmd_hdl_topo_rele(hdl, thp);
 148                 return (NULL);
 149         }
 150 
 151         (void) topo_walk_step(twp, TOPO_WALK_CHILD);
 152         topo_walk_fini(twp);
 153         fmd_hdl_topo_rele(hdl, thp);
 154         return (fru_nvl);
 155 }
 156 
 157 /*
 158  * fault the FRU of the common detector between two DIMMs
 159  */
 160 void
 161 gmem_gen_datapath_fault(fmd_hdl_t *hdl, nvlist_t *det)
 162 {
 163         char *name, *id;
 164         nvlist_t **hcl1, **hcl;
 165         uint_t n;
 166         int i, j;
 167         fmd_case_t *cp;
 168         nvlist_t *fltlist, *rsrc;
 169         nvlist_t *fru = NULL;
 170 
 171         if (nvlist_lookup_nvlist_array(det, FM_FMRI_HC_LIST, &hcl1, &n) < 0)
 172                 return;
 173 
 174         for (i = 0; i < n; i++) {
 175                 (void) nvlist_lookup_string(hcl1[i], FM_FMRI_HC_NAME, &name);
 176                 if (strcmp(name, "chip") == 0)
 177                         break;
 178         }
 179 
 180         n = i + 1;
 181         hcl = fmd_hdl_zalloc(hdl, sizeof (nvlist_t *) * n, FMD_SLEEP);
 182         if (hcl == NULL)
 183                 return;
 184 
 185         for (i = 0; i < n; i++) {
 186                 (void) nvlist_alloc(&hcl[i],
 187                     NV_UNIQUE_NAME|NV_UNIQUE_NAME_TYPE, 0);
 188         }
 189 
 190         for (i = 0, j = 0; i < n; i++) {
 191                 (void) nvlist_lookup_string(hcl1[i], FM_FMRI_HC_NAME, &name);
 192                 (void) nvlist_lookup_string(hcl1[i], FM_FMRI_HC_ID, &id);
 193                 (void) nvlist_add_string(hcl[j], FM_FMRI_HC_NAME, name);
 194                 (void) nvlist_add_string(hcl[j], FM_FMRI_HC_ID, id);
 195                 j++;
 196                 if (strcmp(name, "chip") == 0)
 197                         break;
 198         }
 199 
 200         if (nvlist_alloc(&rsrc,  NV_UNIQUE_NAME|NV_UNIQUE_NAME_TYPE, 0) != 0) {
 201                 for (i = 0; i < n; i++) {
 202                         nvlist_free(hcl[i]);
 203                 }
 204                 fmd_hdl_free(hdl, hcl, sizeof (nvlist_t *) * n);
 205         }
 206 
 207         if (nvlist_add_uint8(rsrc, FM_VERSION, FM_HC_SCHEME_VERSION) != 0 ||
 208             nvlist_add_string(rsrc, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC) != 0 ||
 209             nvlist_add_string(rsrc, FM_FMRI_HC_ROOT, "") != 0 ||
 210             nvlist_add_uint32(rsrc, FM_FMRI_HC_LIST_SZ, n) != 0 ||
 211             nvlist_add_nvlist_array(rsrc, FM_FMRI_HC_LIST, hcl, n) != 0) {
 212                 for (i = 0; i < n; i++) {
 213                         nvlist_free(hcl[i]);
 214                 }
 215                 fmd_hdl_free(hdl, hcl, sizeof (nvlist_t *) * n);
 216                 nvlist_free(rsrc);
 217         }
 218 
 219         fru = gmem_find_fault_fru(hdl, rsrc);
 220         if (fru != NULL) {
 221                 cp = fmd_case_open(hdl, NULL);
 222                 fltlist = fmd_nvl_create_fault(hdl, "fault.memory.datapath",
 223                     100, fru, fru, fru);
 224                 fmd_case_add_suspect(hdl, cp, fltlist);
 225                 fmd_case_solve(hdl, cp);
 226                 nvlist_free(fru);
 227         }
 228 
 229         for (i = 0; i < n; i++) {
 230                 nvlist_free(hcl[i]);
 231         }
 232 
 233         fmd_hdl_free(hdl, hcl, sizeof (nvlist_t *) * n);
 234         nvlist_free(rsrc);
 235 }
 236 
 237 /*
 238  * formula to conver an unhashed address to hashed address
 239  * PA[17:11] = (PA[32:28] xor PA[17:13]) :: ((PA[19:18] xor PA[12:11])
 240  */
 241 static void
 242 gmem_to_hashed_addr(uint64_t *addr, uint64_t afar)
 243 {
 244 
 245         *addr = (afar & OFFBIT) | ((afar & BIT28_32) >> 15) ^ (afar & BIT13_17)
 246             | ((afar & BIT18_19) >> 7) ^ (afar & BIT11_12);
 247 }
 248 
 249 /*
 250  * check if a dimm has n CEs that have the same symbol-in-error
 251  */
 252 int
 253 upos_thresh_check(gmem_dimm_t *dimm, uint16_t upos, uint32_t threshold)
 254 {
 255         int i;
 256         gmem_mq_t *ip, *next;
 257         int count = 0;
 258 
 259         for (i = 0; i < GMEM_MAX_CKWDS; i++) {
 260                 for (ip = gmem_list_next(&dimm->mq_root[i]); ip != NULL;
 261                     ip = next) {
 262                         next = gmem_list_next(ip);
 263                         if (ip->mq_unit_position == upos) {
 264                                 count++;
 265                                 if (count >= threshold)
 266                                         return (1);
 267                         }
 268                 }
 269         }
 270         return (0);
 271 }
 272 
 273 /*
 274  * check if smaller number of retired pages > 1/16 of larger number of
 275  * retired pages
 276  */
 277 int
 278 check_bad_rw_retired_pages(fmd_hdl_t *hdl, gmem_dimm_t *d1, gmem_dimm_t *d2)
 279 {
 280         uint_t sret, lret;
 281         double ratio;
 282 
 283         sret = lret = 0;
 284 
 285         if (d2->dimm_nretired < d1->dimm_nretired) {
 286                 sret = d2->dimm_nretired;
 287                 lret = d1->dimm_nretired;
 288         } else if (d2->dimm_nretired > d1->dimm_nretired) {
 289                 sret = d1->dimm_nretired;
 290                 lret = d2->dimm_nretired;
 291         } else
 292                 return (0);
 293 
 294         ratio = lret * GMEM_MQ_RATIO;
 295 
 296         if (sret > ratio) {
 297                 fmd_hdl_debug(hdl, "sret=%d lret=%d ratio=%.3f",
 298                     sret, lret, ratio);
 299                 return (1);
 300         }
 301         return (0);
 302 }
 303 
 304 /*
 305  * check bad rw on any two DIMMs. The check succeeds if
 306  * - each DIMM has a n CEs which have the same symbol-in-error,
 307  * - the smaller number of retired pages > 1/16 larger number of retired pages
 308  */
 309 static int
 310 check_bad_rw_between_dimms(fmd_hdl_t *hdl, gmem_dimm_t *d1, gmem_dimm_t *d2,
 311     uint16_t *rupos)
 312 {
 313         int i;
 314         gmem_mq_t *ip, *next;
 315         uint16_t upos;
 316 
 317         for (i = 0; i < GMEM_MAX_CKWDS; i++) {
 318                 for (ip = gmem_list_next(&d1->mq_root[i]); ip != NULL;
 319                     ip = next) {
 320                         next = gmem_list_next(ip);
 321                         upos = ip->mq_unit_position;
 322                         if (upos_thresh_check(d1, upos, gmem.gm_nupos)) {
 323                                 if (upos_thresh_check(d2, upos,
 324                                     gmem.gm_nupos)) {
 325                                         if (check_bad_rw_retired_pages(hdl,
 326                                             d1, d2)) {
 327                                                 *rupos = upos;
 328                                                 return (1);
 329                                         }
 330                                 }
 331                         }
 332                 }
 333         }
 334 
 335         return (0);
 336 }
 337 
 338 static void
 339 bad_reader_writer_check(fmd_hdl_t *hdl, nvlist_t *det, gmem_dimm_t *ce_dimm)
 340 {
 341         gmem_dimm_t *d, *next;
 342         uint16_t upos;
 343 
 344         for (d = gmem_list_next(&gmem.gm_dimms); d != NULL; d = next) {
 345                 next = gmem_list_next(d);
 346                 if (d == ce_dimm)
 347                         continue;
 348                 if (!gmem_same_datapath_dimms(hdl, ce_dimm, d))
 349                         continue;
 350                 if (check_bad_rw_between_dimms(hdl, ce_dimm, d, &upos)) {
 351                         gmem_gen_datapath_fault(hdl, det);
 352                         gmem_save_symbol_error(hdl, ce_dimm, upos);
 353                         fmd_hdl_debug(hdl,
 354                             "check_bad_rw_dimms succeeded: %s %s\n",
 355                             ce_dimm->dimm_serial, d->dimm_serial);
 356                         return;
 357                 }
 358         }
 359 }
 360 
 361 /*
 362  * rule 5a checking. The check succeeds if
 363  * - nretired >= 512
 364  * - nretired >= 128 and (addr_hi - addr_low) / (nretired -1 ) > 512KB
 365  */
 366 static void
 367 ce_thresh_check(fmd_hdl_t *hdl, gmem_dimm_t *dimm)
 368 {
 369         nvlist_t *flt, *rsrc;
 370         fmd_case_t *cp;
 371         uint_t nret;
 372         uint64_t delta_addr = 0;
 373 
 374         if (dimm->dimm_flags & GMEM_F_FAULTING)
 375                 return;
 376 
 377         nret = dimm->dimm_nretired;
 378 
 379         if (nret < gmem.gm_low_ce_thresh)
 380                 return;
 381 
 382         if (dimm->dimm_phys_addr_hi >= dimm->dimm_phys_addr_low)
 383                 delta_addr =
 384                     (dimm->dimm_phys_addr_hi - dimm->dimm_phys_addr_low) /
 385                     (nret - 1);
 386 
 387         if (nret >= gmem.gm_max_retired_pages || delta_addr > GMEM_MQ_512KB) {
 388 
 389                 fmd_hdl_debug(hdl, "ce_thresh_check succeeded nret=%d", nret);
 390                 dimm->dimm_flags |= GMEM_F_FAULTING;
 391                 gmem_dimm_dirty(hdl, dimm);
 392 
 393                 cp = fmd_case_open(hdl, NULL);
 394                 rsrc = gmem_find_dimm_rsc(hdl, dimm->dimm_serial);
 395                 flt = fmd_nvl_create_fault(hdl, GMEM_FAULT_DIMM_PAGES,
 396                     GMEM_FLTMAXCONF, NULL, gmem_dimm_fru(dimm), rsrc);
 397                 fmd_case_add_suspect(hdl, cp, flt);
 398                 fmd_case_solve(hdl, cp);
 399                 nvlist_free(rsrc);
 400         }
 401 }
 402 
 403 /*
 404  * rule 5b checking. The check succeeds if more than 120
 405  * non-intermittent CEs are reported against one symbol
 406  * position of one afar in 72 hours
 407  */
 408 static void
 409 mq_5b_check(fmd_hdl_t *hdl, gmem_dimm_t *dimm)
 410 {
 411         nvlist_t *flt, *rsrc;
 412         fmd_case_t *cp;
 413         gmem_mq_t *ip, *next;
 414         int cw;
 415 
 416         for (cw = 0; cw < GMEM_MAX_CKWDS; cw++) {
 417                 for (ip = gmem_list_next(&dimm->mq_root[cw]);
 418                     ip != NULL; ip = next) {
 419                         next = gmem_list_next(ip);
 420                         if (ip->mq_dupce_count >= gmem.gm_dupce) {
 421                                 fmd_hdl_debug(hdl,
 422                                     "mq_5b_check succeeded: duplicate CE=%d",
 423                                     ip->mq_dupce_count);
 424                                 cp = fmd_case_open(hdl, NULL);
 425                                 rsrc = gmem_find_dimm_rsc(hdl,
 426                                     dimm->dimm_serial);
 427                                 flt = fmd_nvl_create_fault(hdl,
 428                                     GMEM_FAULT_DIMM_PAGES, GMEM_FLTMAXCONF,
 429                                     NULL, gmem_dimm_fru(dimm), rsrc);
 430                                 dimm->dimm_flags |= GMEM_F_FAULTING;
 431                                 gmem_dimm_dirty(hdl, dimm);
 432                                 fmd_case_add_suspect(hdl, cp, flt);
 433                                 fmd_case_solve(hdl, cp);
 434                                 nvlist_free(rsrc);
 435                                 return;
 436                         }
 437                 }
 438         }
 439 }
 440 
 441 /*
 442  * delete the expired duplicate CE time stamps
 443  */
 444 static void
 445 mq_prune_dup(fmd_hdl_t *hdl, gmem_mq_t *ip, uint64_t now)
 446 {
 447         tstamp_t *tsp, *next;
 448 
 449         for (tsp = gmem_list_next(&ip->mq_dupce_tstamp); tsp != NULL;
 450             tsp = next) {
 451                 next = gmem_list_next(tsp);
 452                 if (tsp->tstamp < now - GMEM_MQ_TIMELIM) {
 453                         gmem_list_delete(&ip->mq_dupce_tstamp, &tsp->ts_l);
 454                         fmd_hdl_free(hdl, tsp, sizeof (tstamp_t));
 455                         ip->mq_dupce_count--;
 456                 }
 457         }
 458 }
 459 
 460 static void
 461 mq_update(fmd_hdl_t *hdl, fmd_event_t *ep, gmem_mq_t *ip, uint64_t now)
 462 {
 463         tstamp_t *tsp;
 464 
 465         ip->mq_tstamp = now;
 466         ip->mq_ep = ep;
 467         if (fmd_serd_exists(hdl, ip->mq_serdnm))
 468                 fmd_serd_destroy(hdl, ip->mq_serdnm);
 469 
 470         fmd_serd_create(hdl, ip->mq_serdnm, GMEM_MQ_SERDN, GMEM_MQ_SERDT);
 471         (void) fmd_serd_record(hdl, ip->mq_serdnm, ep);
 472 
 473         tsp = fmd_hdl_zalloc(hdl, sizeof (tstamp_t), FMD_SLEEP);
 474         tsp->tstamp = now;
 475         gmem_list_append(&ip->mq_dupce_tstamp, tsp);
 476         ip->mq_dupce_count++;
 477 }
 478 
 479 /*
 480  * Create a fresh index block for MQSC CE correlation.
 481  */
 482 gmem_mq_t *
 483 mq_create(fmd_hdl_t *hdl, fmd_event_t *ep,
 484     uint64_t afar, uint16_t upos, uint16_t ckwd, uint64_t now)
 485 {
 486         gmem_mq_t *cp;
 487         tstamp_t *tsp;
 488 
 489         cp = fmd_hdl_zalloc(hdl, sizeof (gmem_mq_t), FMD_SLEEP);
 490         cp->mq_tstamp = now;
 491         cp->mq_ckwd = ckwd;
 492         cp->mq_phys_addr = afar;
 493         cp->mq_unit_position = upos;
 494         cp->mq_ep = ep;
 495         cp->mq_serdnm =
 496             gmem_mq_serdnm_create(hdl, "mq", afar, ckwd, upos);
 497 
 498         tsp = fmd_hdl_zalloc(hdl, sizeof (tstamp_t), FMD_SLEEP);
 499         tsp->tstamp = now;
 500         gmem_list_append(&cp->mq_dupce_tstamp, tsp);
 501         cp->mq_dupce_count = 1;
 502 
 503         /*
 504          * Create SERD to keep this event from being removed
 505          * by fmd which may not know there is an event pointer
 506          * saved here. This SERD is *never* meant to fire.
 507          */
 508         if (fmd_serd_exists(hdl, cp->mq_serdnm))
 509                 fmd_serd_destroy(hdl, cp->mq_serdnm);
 510 
 511         fmd_serd_create(hdl, cp->mq_serdnm, GMEM_MQ_SERDN, GMEM_MQ_SERDT);
 512         (void) fmd_serd_record(hdl, cp->mq_serdnm, ep);
 513 
 514         return (cp);
 515 }
 516 
 517 gmem_mq_t *
 518 mq_destroy(fmd_hdl_t *hdl, gmem_list_t *lp, gmem_mq_t *ip)
 519 {
 520         gmem_mq_t *jp = gmem_list_next(ip);
 521         tstamp_t *tsp, *next;
 522 
 523 
 524         if (ip->mq_serdnm != NULL) {
 525                 if (fmd_serd_exists(hdl, ip->mq_serdnm))
 526                         fmd_serd_destroy(hdl, ip->mq_serdnm);
 527                 fmd_hdl_strfree(hdl, ip->mq_serdnm);
 528                 ip->mq_serdnm = NULL;
 529         }
 530 
 531         for (tsp = gmem_list_next(&ip->mq_dupce_tstamp); tsp != NULL;
 532             tsp = next) {
 533                 next = gmem_list_next(tsp);
 534                 gmem_list_delete(&ip->mq_dupce_tstamp, &tsp->ts_l);
 535                 fmd_hdl_free(hdl, tsp, sizeof (tstamp_t));
 536         }
 537 
 538         gmem_list_delete(lp, &ip->mq_l);
 539         fmd_hdl_free(hdl, ip, sizeof (gmem_mq_t));
 540 
 541         return (jp);
 542 }
 543 
 544 
 545 /*
 546  * Add an index block for a new CE, sorted
 547  * a) by ascending unit position
 548  * b) order of arrival (~= time order)
 549  */
 550 void
 551 mq_add(fmd_hdl_t *hdl, gmem_dimm_t *dimm, fmd_event_t *ep,
 552     uint64_t afar, uint16_t unit_position, uint16_t ckwd,
 553     uint64_t now)
 554 {
 555         gmem_mq_t *ip, *jp;
 556         int cw = (int)ckwd;
 557 
 558         for (ip = gmem_list_next(&dimm->mq_root[cw]); ip != NULL; ) {
 559                 if (ip->mq_unit_position > unit_position) {
 560                         /* list is in unit position order */
 561                         break;
 562                 } else if (ip->mq_unit_position == unit_position &&
 563                     ip->mq_phys_addr == afar) {
 564                         /*
 565                          * Found a duplicate cw, unit_position, and afar.
 566                          * Delete this node, to be superseded by the new
 567                          * node added below.
 568                          * update the mq_t structure
 569                          */
 570                         mq_update(hdl, ep, ip, now);
 571                         return;
 572                 } else {
 573                         ip = gmem_list_next(ip);
 574                 }
 575         }
 576 
 577         jp = mq_create(hdl, ep, afar, unit_position, cw, now);
 578         if (ip == NULL)
 579                 gmem_list_append(&dimm->mq_root[cw], jp);
 580         else
 581                 gmem_list_insert_before(&dimm->mq_root[cw], ip, jp);
 582 }
 583 
 584 /*
 585  * Prune the MQSC index lists (one for each checkword), by deleting
 586  * outdated index blocks from each list.
 587  */
 588 
 589 void
 590 mq_prune(fmd_hdl_t *hdl, gmem_dimm_t *dimm, uint64_t now)
 591 {
 592         gmem_mq_t *ip;
 593         int cw;
 594 
 595         for (cw = 0; cw < GMEM_MAX_CKWDS; cw++) {
 596                 for (ip = gmem_list_next(&dimm->mq_root[cw]); ip != NULL; ) {
 597                         if (ip->mq_tstamp < now - GMEM_MQ_TIMELIM) {
 598                                 /*
 599                                  * This event has timed out - delete the
 600                                  * mq block as well as serd for the event.
 601                                  */
 602                                 ip = mq_destroy(hdl, &dimm->mq_root[cw], ip);
 603                         } else {
 604                                 mq_prune_dup(hdl, ip, now);
 605                                 /* tstamp < now - ce_t */
 606                                 ip = gmem_list_next(ip);
 607                         }
 608                 } /* per checkword */
 609         } /* cw = 0...3 */
 610 }
 611 
 612 /*
 613  * Check the MQSC index lists (one for each checkword) by making a
 614  * complete pass through each list, checking if the criteria for
 615  * Rule 4A has been met.  Rule 4A checking is done for each checkword.
 616  *
 617  * Rule 4A: fault a DIMM  "whenever Solaris reports two or more CEs from
 618  * two or more different physical addresses on each of two or more different
 619  * bit positions from the same DIMM within 72 hours of each other, and all
 620  * the addresses are in the same relative checkword (that is, the AFARs
 621  * are all the same modulo 64).  [Note: This means at least 4 CEs; two
 622  * from one bit position, with unique addresses, and two from another,
 623  * also with unique addresses, and the lower 6 bits of all the addresses
 624  * are the same."
 625  */
 626 
 627 void
 628 mq_check(fmd_hdl_t *hdl, gmem_dimm_t *dimm)
 629 {
 630         int upos_pairs, curr_upos, cw, i, j;
 631         nvlist_t *flt, *rsc;
 632         typedef struct upos_pair {
 633                 int upos;
 634                 gmem_mq_t *mq1;
 635                 gmem_mq_t *mq2;
 636         } upos_pair_t;
 637         upos_pair_t upos_array[16]; /* max per cw = 2, * 8 cw's */
 638         gmem_mq_t *ip;
 639 
 640         /*
 641          * Each upos_array[] member represents a pair of CEs for the same
 642          * unit position (symbol) which is a 4 bit nibble.
 643          * MQSC rule 4 requires pairs of CEs from the same symbol (same DIMM
 644          * for rule 4A, and same DRAM for rule 4B) for a violation - this
 645          * is why CE pairs are tracked.
 646          */
 647         upos_pairs = 0;
 648         upos_array[0].mq1 = NULL;
 649 
 650         for (cw = 0; cw < GMEM_MAX_CKWDS; cw++) {
 651                 i = upos_pairs;
 652                 curr_upos = -1;
 653 
 654                 /*
 655                  * mq_root[] is an array of cumulative lists of CEs
 656                  * indexed by checkword where the list is in unit position
 657                  * order. Loop through checking for duplicate unit position
 658                  * entries (filled in at mq_create()).
 659                  * The upos_array[] is filled in each time a duplicate
 660                  * unit position is found; the first time through the loop
 661                  * of a unit position sets curr_upos but does not fill in
 662                  * upos_array[] until the second symbol is found.
 663                  */
 664                 for (ip = gmem_list_next(&dimm->mq_root[cw]); ip != NULL;
 665                     ip = gmem_list_next(ip)) {
 666                         if (curr_upos != ip->mq_unit_position) {
 667                                 /* Set initial current position */
 668                                 curr_upos = ip->mq_unit_position;
 669                         } else if (i > upos_pairs &&
 670                             curr_upos == upos_array[i-1].upos) {
 671                                 /*
 672                                  * Only keep track of CE pairs; skip
 673                                  * triples, quads, etc...
 674                                  */
 675                                 continue;
 676                         } else if (upos_array[i].mq1 == NULL) {
 677                                 /* Have a pair. Add to upos_array[] */
 678                                 fmd_hdl_debug(hdl, "pair:upos=%d",
 679                                     curr_upos);
 680                                 upos_array[i].upos = curr_upos;
 681                                 upos_array[i].mq1 = gmem_list_prev(ip);
 682                                 upos_array[i].mq2 = ip;
 683                                 upos_array[++i].mq1 = NULL;
 684                         }
 685                 }
 686                 if (i - upos_pairs >= 2) {
 687                         /* Rule 4A violation */
 688                         rsc = gmem_find_dimm_rsc(hdl, dimm->dimm_serial);
 689                         flt = fmd_nvl_create_fault(hdl, GMEM_FAULT_DIMM_4A,
 690                             GMEM_FLTMAXCONF, NULL, gmem_dimm_fru(dimm), rsc);
 691                         for (j = upos_pairs; j < i; j++) {
 692                                 fmd_case_add_ereport(hdl,
 693                                     dimm->dimm_case.cc_cp,
 694                                     upos_array[j].mq1->mq_ep);
 695                                 fmd_case_add_ereport(hdl,
 696                                     dimm->dimm_case.cc_cp,
 697                                     upos_array[j].mq2->mq_ep);
 698                         }
 699                         dimm->dimm_flags |= GMEM_F_FAULTING;
 700                         gmem_dimm_dirty(hdl, dimm);
 701                         fmd_case_add_suspect(hdl, dimm->dimm_case.cc_cp, flt);
 702                         fmd_case_solve(hdl, dimm->dimm_case.cc_cp);
 703                         nvlist_free(rsc);
 704                         return;
 705                 }
 706                 upos_pairs = i;
 707                 assert(upos_pairs < 16);
 708         }
 709 }
 710 
 711 /*ARGSUSED*/
 712 gmem_evdisp_t
 713 gmem_ce(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
 714 {
 715         uint16_t symbol_pos, cw;
 716         uint64_t phyaddr, offset, addr;
 717         uint32_t filter_ratio = 0;
 718         gmem_dimm_t *dimm;
 719         gmem_page_t *page;
 720         nvlist_t *fru = NULL;
 721         nvlist_t *topo_rsc = NULL;
 722         nvlist_t *rsrc, *det;
 723         const char *uuid;
 724         ce_dispact_t type;
 725         boolean_t diagnose;
 726         char *sn;
 727         int err, rc;
 728         uint64_t *now;
 729         uint_t nelem;
 730         int skip_error = 0;
 731 
 732         err = nvlist_lookup_boolean_value(nvl, GMEM_ERPT_PAYLOAD_DIAGNOSE,
 733             &diagnose);
 734         if (err != 0 || diagnose == 0)
 735                 return (GMEM_EVD_UNUSED);
 736 
 737         if ((nvlist_lookup_uint64(nvl, GMEM_ERPT_PAYLOAD_PHYSADDR,
 738             &phyaddr) != 0) ||
 739             (nvlist_lookup_uint64(nvl, GMEM_ERPT_PAYLOAD_OFFSET,
 740             &offset) != 0)) {
 741                 fmd_hdl_debug(hdl, "Can't get page phyaddr or offset");
 742                 return (GMEM_EVD_BAD);
 743         }
 744 
 745         fmd_hdl_debug(hdl, "phyaddr %llx offset %llx", phyaddr, offset);
 746 
 747         if ((page = gmem_page_lookup(phyaddr)) != NULL &&
 748             page->page_case.cc_cp != NULL &&
 749             fmd_case_solved(hdl, page->page_case.cc_cp))
 750                 return (GMEM_EVD_REDUND);
 751 
 752         if (nvlist_lookup_nvlist(nvl, GMEM_ERPT_PAYLOAD_RESOURCE,
 753             &rsrc) != 0 ||
 754             nvlist_lookup_string(rsrc, FM_FMRI_HC_SERIAL_ID, &sn) != 0) {
 755                 fmd_hdl_debug(hdl, "Can't get dimm serial\n");
 756                 return (GMEM_EVD_BAD);
 757         }
 758 
 759         fmd_hdl_debug(hdl, "serial %s", sn);
 760 
 761         if (nvlist_lookup_nvlist(nvl, GMEM_ERPT_PAYLOAD_DETECTOR, &det) != 0)
 762                 return (GMEM_EVD_BAD);
 763 
 764         /*
 765          * Find dimm fru by serial number.
 766          */
 767         fru = gmem_find_dimm_fru(hdl, sn);
 768 
 769         if (fru == NULL) {
 770                 fmd_hdl_debug(hdl, "Dimm is not present\n");
 771                 return (GMEM_EVD_UNUSED);
 772         }
 773 
 774         if ((dimm = gmem_dimm_lookup(hdl, fru)) == NULL &&
 775             (dimm = gmem_dimm_create(hdl, fru, det)) == NULL) {
 776                 nvlist_free(fru);
 777                 return (GMEM_EVD_UNUSED);
 778         }
 779 
 780         if (dimm->dimm_case.cc_cp == NULL) {
 781                 dimm->dimm_case.cc_cp = gmem_case_create(hdl,
 782                     &dimm->dimm_header, GMEM_PTR_DIMM_CASE, &uuid);
 783         }
 784 
 785         /*
 786          * Add to MQSC correlation lists all CEs which pass validity
 787          * checks above. If there is no symbol_pos & relative ckword
 788          * in the ereport, skip rule 4A checking.
 789          */
 790 
 791         err = nvlist_lookup_uint16(nvl, GMEM_ERPT_PAYLOAD_SYMBOLPOS,
 792             &symbol_pos);
 793         err |= nvlist_lookup_uint16(nvl, GMEM_ERPT_PAYLOAD_CKW, &cw);
 794 
 795         if (err == 0) {
 796                 fmd_hdl_debug(hdl, "symbol_pos=%d cw=%d", symbol_pos, cw);
 797 
 798                 if (nvlist_lookup_uint64_array(nvl,
 799                     "__tod", &now, &nelem) == 0) {
 800                         skip_error = gmem_check_symbol_error(hdl, dimm,
 801                             symbol_pos);
 802 
 803                         if (!skip_error ||
 804                             !(dimm->dimm_flags & GMEM_F_FAULTING))
 805                                 mq_add(hdl, dimm, ep, phyaddr, symbol_pos,
 806                                     cw, *now);
 807 
 808                         mq_prune(hdl, dimm, *now);
 809 
 810                         if (!skip_error)
 811                                 bad_reader_writer_check(hdl, det, dimm);
 812                         if (!(dimm->dimm_flags & GMEM_F_FAULTING)) {
 813                                 mq_check(hdl, dimm);
 814                                 mq_5b_check(hdl, dimm);
 815                         }
 816                 }
 817         }
 818 
 819         type = gmem_mem_name2type(strstr(class, "mem"));
 820 
 821         switch (type) {
 822         case CE_DISP_UNKNOWN:
 823                 GMEM_STAT_BUMP(ce_unknown);
 824                 nvlist_free(fru);
 825                 return (GMEM_EVD_UNUSED);
 826         case CE_DISP_INTERMITTENT:
 827                 GMEM_STAT_BUMP(ce_interm);
 828                 nvlist_free(fru);
 829                 return (GMEM_EVD_UNUSED);
 830         case CE_DISP_PERS:
 831                 GMEM_STAT_BUMP(ce_clearable_persis);
 832                 break;
 833         case CE_DISP_STICKY:
 834                 GMEM_STAT_BUMP(ce_sticky);
 835                 break;
 836         default:
 837                 nvlist_free(fru);
 838                 return (GMEM_EVD_BAD);
 839         }
 840 
 841         if (gmem_check_symbol_error(hdl, dimm, symbol_pos)) {
 842                 nvlist_free(fru);
 843                 return (GMEM_EVD_REDUND);
 844         }
 845 
 846         if (page == NULL) {
 847                 page = gmem_page_create(hdl, fru, phyaddr, offset);
 848                 if (page == NULL) {
 849                         nvlist_free(fru);
 850                         return (GMEM_EVD_UNUSED);
 851                 }
 852         }
 853 
 854         nvlist_free(fru);
 855 
 856         if (page->page_case.cc_cp == NULL) {
 857                 page->page_case.cc_cp = gmem_case_create(hdl,
 858                     &page->page_header, GMEM_PTR_PAGE_CASE, &uuid);
 859         }
 860 
 861         switch (type) {
 862         case CE_DISP_PERS:
 863                 fmd_hdl_debug(hdl, "adding persistent event to CE serd");
 864                 if (page->page_case.cc_serdnm == NULL)
 865                         gmem_page_serd_create(hdl, page, nvl);
 866 
 867                 filter_ratio = gmem_get_serd_filter_ratio(nvl);
 868 
 869                 fmd_hdl_debug(hdl, "filter_ratio %d\n", filter_ratio);
 870 
 871                 if (gmem_serd_record(hdl, page->page_case.cc_serdnm,
 872                     filter_ratio, ep) == FMD_B_FALSE) {
 873                                 return (GMEM_EVD_OK); /* engine hasn't fired */
 874                 }
 875 
 876                 fmd_hdl_debug(hdl, "ce page serd fired\n");
 877                 fmd_case_add_serd(hdl, page->page_case.cc_cp,
 878                     page->page_case.cc_serdnm);
 879                 fmd_serd_reset(hdl, page->page_case.cc_serdnm);
 880                 break;  /* to retire */
 881 
 882         case CE_DISP_STICKY:
 883                 fmd_case_add_ereport(hdl, page->page_case.cc_cp, ep);
 884                 break;  /* to retire */
 885         }
 886 
 887 
 888         topo_rsc = gmem_find_dimm_rsc(hdl, dimm->dimm_serial);
 889         rc = gmem_page_fault(hdl, gmem_dimm_fru(dimm), topo_rsc,
 890             ep, phyaddr, offset);
 891 
 892         if (rc) {
 893                 gmem_to_hashed_addr(&addr, phyaddr);
 894 
 895                 if (addr > dimm->dimm_phys_addr_hi)
 896                         dimm->dimm_phys_addr_hi = addr;
 897                 if (addr < dimm->dimm_phys_addr_low)
 898                         dimm->dimm_phys_addr_low = addr;
 899 
 900                 dimm->dimm_nretired++;
 901                 dimm->dimm_retstat.fmds_value.ui64++;
 902                 gmem_dimm_dirty(hdl, dimm);
 903                 ce_thresh_check(hdl, dimm);
 904         }
 905         return (GMEM_EVD_OK);
 906 }
 907 
 908 void
 909 gmem_dimm_close(fmd_hdl_t *hdl, void *arg)
 910 {
 911         gmem_dimm_destroy(hdl, arg);
 912 }