1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 /*
  26  * Support routines for DIMMs.
  27  */
  28 
  29 #include <cmd_mem.h>
  30 #include <limits.h>
  31 #include <cmd_dimm.h>
  32 #include <cmd_bank.h>
  33 #include <cmd.h>
  34 
  35 #include <errno.h>
  36 #include <string.h>
  37 #include <strings.h>
  38 #include <fcntl.h>
  39 #include <unistd.h>
  40 #include <fm/fmd_api.h>
  41 #include <sys/fm/protocol.h>
  42 #include <sys/mem.h>
  43 #include <sys/nvpair.h>
  44 #ifdef sun4v
  45 #include <cmd_hc_sun4v.h>
  46 #include <cmd_branch.h>
  47 #endif /* sun4v */
  48 
  49 /*
  50  * Some errors (RxE/FRx pairs) don't have accurate DIMM (resource) FMRIs,
  51  * because sufficient information was unavailable prior to correlation.
  52  * When the DE completes the pair, it uses this routine to retrieve the
  53  * correct FMRI.
  54  */
  55 nvlist_t *
  56 cmd_dimm_fmri_derive(fmd_hdl_t *hdl, uint64_t afar, uint16_t synd,
  57     uint64_t afsr)
  58 {
  59         nvlist_t *fmri;
  60 
  61         if ((fmri = cmd_mem_fmri_derive(hdl, afar, afsr, synd)) == NULL)
  62                 return (NULL);
  63 
  64         if (fmd_nvl_fmri_expand(hdl, fmri) < 0) {
  65                 nvlist_free(fmri);
  66                 return (NULL);
  67         }
  68 
  69         return (fmri);
  70 }
  71 
  72 nvlist_t *
  73 cmd_dimm_fru(cmd_dimm_t *dimm)
  74 {
  75         return (dimm->dimm_asru_nvl);
  76 }
  77 
  78 nvlist_t *
  79 cmd_dimm_create_fault(fmd_hdl_t *hdl, cmd_dimm_t *dimm, const char *fltnm,
  80     uint_t cert)
  81 {
  82 #ifdef sun4v
  83         nvlist_t *flt, *nvlfru;
  84         /*
  85          * Do NOT issue hc scheme FRU FMRIs for ultraSPARC-T1 platforms.
  86          * The SP will misinterpret the FRU. Instead, reuse the ASRU FMRI
  87          *
  88          * Use the BR string as a distinguisher. BR (branch) is only
  89          * present in ultraSPARC-T2/T2plus DIMM unums
  90          */
  91         if (strstr(dimm->dimm_unum, "BR") == NULL) {
  92                 flt = cmd_nvl_create_fault(hdl, fltnm, cert,
  93                     dimm->dimm_asru_nvl, dimm->dimm_asru_nvl, NULL);
  94         } else {
  95                 nvlfru = cmd_mem2hc(hdl, dimm->dimm_asru_nvl);
  96                 flt = cmd_nvl_create_fault(hdl, fltnm, cert,
  97                     dimm->dimm_asru_nvl, nvlfru, NULL);
  98                 if (nvlfru != NULL)
  99                         nvlist_free(nvlfru);
 100         }
 101         return (cmd_fault_add_location(hdl, flt, dimm->dimm_unum));
 102 #else
 103         return (cmd_nvl_create_fault(hdl, fltnm, cert, dimm->dimm_asru_nvl,
 104             dimm->dimm_asru_nvl, NULL));
 105 #endif /* sun4v */
 106 }
 107 
 108 static void
 109 cmd_dimm_free(fmd_hdl_t *hdl, cmd_dimm_t *dimm, int destroy)
 110 {
 111         cmd_case_t *cc = &dimm->dimm_case;
 112         int i;
 113         cmd_mq_t *q;
 114         tstamp_t  *tsp, *next;
 115 
 116 #ifdef sun4v
 117         cmd_branch_t *branch;
 118 #endif
 119         if (cc->cc_cp != NULL) {
 120                 cmd_case_fini(hdl, cc->cc_cp, destroy);
 121                 if (cc->cc_serdnm != NULL) {
 122                         if (fmd_serd_exists(hdl, cc->cc_serdnm) &&
 123                             destroy)
 124                                 fmd_serd_destroy(hdl, cc->cc_serdnm);
 125                         fmd_hdl_strfree(hdl, cc->cc_serdnm);
 126                 }
 127         }
 128 
 129         for (i = 0; i < CMD_MAX_CKWDS; i++) {
 130                 while ((q = cmd_list_next(&dimm->mq_root[i])) != NULL) {
 131                         if (q->mq_serdnm != NULL) {
 132                                 if (fmd_serd_exists(hdl, q->mq_serdnm)) {
 133                                         fmd_serd_destroy(hdl, q->mq_serdnm);
 134                                 }
 135                                 fmd_hdl_strfree(hdl, q->mq_serdnm);
 136                                 q->mq_serdnm = NULL;
 137                         }
 138 
 139                         for (tsp = cmd_list_next(&q->mq_dupce_tstamp);
 140                             tsp != NULL; tsp = next) {
 141                                 next = cmd_list_next(tsp);
 142                                 cmd_list_delete(&q->mq_dupce_tstamp,
 143                                     &tsp->ts_l);
 144                                 fmd_hdl_free(hdl, tsp, sizeof (tstamp_t));
 145                         }
 146 
 147                         cmd_list_delete(&dimm->mq_root[i], q);
 148                         fmd_hdl_free(hdl, q, sizeof (cmd_mq_t));
 149                 }
 150         }
 151 
 152         if (dimm->dimm_bank != NULL)
 153                 cmd_bank_remove_dimm(hdl, dimm->dimm_bank, dimm);
 154 
 155 #ifdef sun4v
 156         branch = cmd_branch_lookup_by_unum(hdl, dimm->dimm_unum);
 157         if (branch != NULL)
 158                 cmd_branch_remove_dimm(hdl, branch, dimm);
 159 #endif
 160 
 161         cmd_fmri_fini(hdl, &dimm->dimm_asru, destroy);
 162 
 163         if (destroy)
 164                 fmd_buf_destroy(hdl, NULL, dimm->dimm_bufname);
 165 
 166         cmd_list_delete(&cmd.cmd_dimms, dimm);
 167         fmd_hdl_free(hdl, dimm, sizeof (cmd_dimm_t));
 168 }
 169 
 170 void
 171 cmd_dimm_destroy(fmd_hdl_t *hdl, cmd_dimm_t *dimm)
 172 {
 173 
 174         fmd_stat_destroy(hdl, 1, &(dimm->dimm_retstat));
 175         cmd_dimm_free(hdl, dimm, FMD_B_TRUE);
 176 }
 177 
 178 static cmd_dimm_t *
 179 dimm_lookup_by_unum(const char *unum)
 180 {
 181         cmd_dimm_t *dimm;
 182 
 183         for (dimm = cmd_list_next(&cmd.cmd_dimms); dimm != NULL;
 184             dimm = cmd_list_next(dimm)) {
 185                 if (strcmp(dimm->dimm_unum, unum) == 0)
 186                         return (dimm);
 187         }
 188 
 189         return (NULL);
 190 }
 191 
 192 static void
 193 dimm_attach_to_bank(fmd_hdl_t *hdl, cmd_dimm_t *dimm)
 194 {
 195         cmd_bank_t *bank;
 196 
 197         for (bank = cmd_list_next(&cmd.cmd_banks); bank != NULL;
 198             bank = cmd_list_next(bank)) {
 199                 if (fmd_nvl_fmri_contains(hdl, bank->bank_asru_nvl,
 200                     dimm->dimm_asru_nvl)) {
 201                         cmd_bank_add_dimm(hdl, bank, dimm);
 202                         return;
 203                 }
 204         }
 205 }
 206 
 207 cmd_dimm_t *
 208 cmd_dimm_create(fmd_hdl_t *hdl, nvlist_t *asru)
 209 {
 210         cmd_dimm_t *dimm;
 211         const char *unum;
 212         nvlist_t *fmri;
 213         size_t nserids = 0;
 214         char **serids = NULL;
 215 
 216         if (!fmd_nvl_fmri_present(hdl, asru)) {
 217                 fmd_hdl_debug(hdl, "dimm_lookup: discarding old ereport\n");
 218                 return (NULL);
 219         }
 220 
 221         if ((unum = cmd_fmri_get_unum(asru)) == NULL) {
 222                 CMD_STAT_BUMP(bad_mem_asru);
 223                 return (NULL);
 224         }
 225 
 226 #ifdef sun4v
 227         if (nvlist_lookup_string_array(asru, FM_FMRI_HC_SERIAL_ID, &serids,
 228             &nserids) != 0) {
 229                 fmd_hdl_debug(hdl, "sun4v mem: FMRI does not"
 230                     " have serial_ids\n");
 231                 CMD_STAT_BUMP(bad_mem_asru);
 232                 return (NULL);
 233         }
 234 #endif
 235         fmri = cmd_mem_fmri_create(unum, serids, nserids);
 236         if (fmd_nvl_fmri_expand(hdl, fmri) < 0) {
 237                 CMD_STAT_BUMP(bad_mem_asru);
 238                 nvlist_free(fmri);
 239                 return (NULL);
 240         }
 241 
 242         fmd_hdl_debug(hdl, "dimm_create: creating new DIMM %s\n", unum);
 243         CMD_STAT_BUMP(dimm_creat);
 244 
 245         dimm = fmd_hdl_zalloc(hdl, sizeof (cmd_dimm_t), FMD_SLEEP);
 246         dimm->dimm_nodetype = CMD_NT_DIMM;
 247         dimm->dimm_version = CMD_DIMM_VERSION;
 248         dimm->dimm_phys_addr_low = ULLONG_MAX;
 249         dimm->dimm_phys_addr_hi = 0;
 250         dimm->dimm_syl_error = USHRT_MAX;
 251 
 252         cmd_bufname(dimm->dimm_bufname, sizeof (dimm->dimm_bufname), "dimm_%s",
 253             unum);
 254         cmd_fmri_init(hdl, &dimm->dimm_asru, fmri, "dimm_asru_%s", unum);
 255 
 256         nvlist_free(fmri);
 257 
 258         (void) nvlist_lookup_string(dimm->dimm_asru_nvl, FM_FMRI_MEM_UNUM,
 259             (char **)&dimm->dimm_unum);
 260 
 261         dimm_attach_to_bank(hdl, dimm);
 262 
 263         cmd_mem_retirestat_create(hdl, &dimm->dimm_retstat, dimm->dimm_unum, 0,
 264             CMD_DIMM_STAT_PREFIX);
 265 
 266         cmd_list_append(&cmd.cmd_dimms, dimm);
 267         cmd_dimm_dirty(hdl, dimm);
 268 
 269         return (dimm);
 270 }
 271 
 272 cmd_dimm_t *
 273 cmd_dimm_lookup(fmd_hdl_t *hdl, nvlist_t *asru)
 274 {
 275         cmd_dimm_t *dimm;
 276         const char *unum;
 277 
 278         if ((unum = cmd_fmri_get_unum(asru)) == NULL) {
 279                 CMD_STAT_BUMP(bad_mem_asru);
 280                 return (NULL);
 281         }
 282 
 283         dimm = dimm_lookup_by_unum(unum);
 284 
 285         if (dimm != NULL && !fmd_nvl_fmri_present(hdl, dimm->dimm_asru_nvl)) {
 286                 /*
 287                  * The DIMM doesn't exist anymore, so we need to delete the
 288                  * state structure, which is now out of date.  The containing
 289                  * bank (if any) is also out of date, so blow it away too.
 290                  */
 291                 fmd_hdl_debug(hdl, "dimm_lookup: discarding old dimm\n");
 292 
 293                 if (dimm->dimm_bank != NULL)
 294                         cmd_bank_destroy(hdl, dimm->dimm_bank);
 295                 cmd_dimm_destroy(hdl, dimm);
 296 
 297                 return (NULL);
 298         }
 299 
 300         return (dimm);
 301 }
 302 
 303 static cmd_dimm_t *
 304 dimm_v0tov2(fmd_hdl_t *hdl, cmd_dimm_0_t *old, size_t oldsz)
 305 {
 306         cmd_dimm_t *new;
 307 
 308         if (oldsz != sizeof (cmd_dimm_0_t)) {
 309                 fmd_hdl_abort(hdl, "size of state doesn't match size of "
 310                     "version 0 state (%u bytes).\n", sizeof (cmd_dimm_0_t));
 311         }
 312 
 313         new = fmd_hdl_zalloc(hdl, sizeof (cmd_dimm_t), FMD_SLEEP);
 314         new->dimm_header = old->dimm0_header;
 315         new->dimm_version = CMD_DIMM_VERSION;
 316         new->dimm_asru = old->dimm0_asru;
 317         new->dimm_nretired = old->dimm0_nretired;
 318         new->dimm_phys_addr_hi = 0;
 319         new->dimm_phys_addr_low = ULLONG_MAX;
 320 
 321         fmd_hdl_free(hdl, old, oldsz);
 322         return (new);
 323 }
 324 
 325 static cmd_dimm_t *
 326 dimm_v1tov2(fmd_hdl_t *hdl, cmd_dimm_1_t *old, size_t oldsz)
 327 {
 328 
 329         cmd_dimm_t *new;
 330 
 331         if (oldsz != sizeof (cmd_dimm_1_t)) {
 332                 fmd_hdl_abort(hdl, "size of state doesn't match size of "
 333                     "version 1 state (%u bytes).\n", sizeof (cmd_dimm_1_t));
 334         }
 335 
 336         new = fmd_hdl_zalloc(hdl, sizeof (cmd_dimm_t), FMD_SLEEP);
 337 
 338         new->dimm_header = old->dimm1_header;
 339         new->dimm_version = CMD_DIMM_VERSION;
 340         new->dimm_asru = old->dimm1_asru;
 341         new->dimm_nretired = old->dimm1_nretired;
 342         new->dimm_flags = old->dimm1_flags;
 343         new->dimm_phys_addr_hi = 0;
 344         new->dimm_phys_addr_low = ULLONG_MAX;
 345 
 346         fmd_hdl_free(hdl, old, oldsz);
 347         return (new);
 348 }
 349 
 350 static cmd_dimm_t *
 351 dimm_wrapv2(fmd_hdl_t *hdl, cmd_dimm_pers_t *pers, size_t psz)
 352 {
 353         cmd_dimm_t *dimm;
 354 
 355         if (psz != sizeof (cmd_dimm_pers_t)) {
 356                 fmd_hdl_abort(hdl, "size of state doesn't match size of "
 357                     "version 1 state (%u bytes).\n", sizeof (cmd_dimm_pers_t));
 358         }
 359 
 360         dimm = fmd_hdl_zalloc(hdl, sizeof (cmd_dimm_t), FMD_SLEEP);
 361         bcopy(pers, dimm, sizeof (cmd_dimm_pers_t));
 362         fmd_hdl_free(hdl, pers, psz);
 363         return (dimm);
 364 }
 365 
 366 void *
 367 cmd_dimm_restore(fmd_hdl_t *hdl, fmd_case_t *cp, cmd_case_ptr_t *ptr)
 368 {
 369         cmd_dimm_t *dimm;
 370 
 371         for (dimm = cmd_list_next(&cmd.cmd_dimms); dimm != NULL;
 372             dimm = cmd_list_next(dimm)) {
 373                 if (strcmp(dimm->dimm_bufname, ptr->ptr_name) == 0)
 374                         break;
 375         }
 376 
 377         if (dimm == NULL) {
 378                 int migrated = 0;
 379                 size_t dimmsz;
 380 
 381                 fmd_hdl_debug(hdl, "restoring dimm from %s\n", ptr->ptr_name);
 382 
 383                 if ((dimmsz = fmd_buf_size(hdl, NULL, ptr->ptr_name)) == 0) {
 384                         fmd_hdl_abort(hdl, "dimm referenced by case %s does "
 385                             "not exist in saved state\n",
 386                             fmd_case_uuid(hdl, cp));
 387                 } else if (dimmsz > CMD_DIMM_MAXSIZE ||
 388                     dimmsz < CMD_DIMM_MINSIZE) {
 389                         fmd_hdl_abort(hdl,
 390                             "dimm buffer referenced by case %s "
 391                             "is out of bounds (is %u bytes, max %u, min %u)\n",
 392                             fmd_case_uuid(hdl, cp), dimmsz,
 393                             CMD_DIMM_MAXSIZE, CMD_DIMM_MINSIZE);
 394                 }
 395 
 396                 if ((dimm = cmd_buf_read(hdl, NULL, ptr->ptr_name,
 397                     dimmsz)) == NULL) {
 398                         fmd_hdl_abort(hdl, "failed to read dimm buf %s",
 399                             ptr->ptr_name);
 400                 }
 401 
 402                 fmd_hdl_debug(hdl, "found %d in version field\n",
 403                     dimm->dimm_version);
 404 
 405                 if (CMD_DIMM_VERSIONED(dimm)) {
 406                         switch (dimm->dimm_version) {
 407                         case CMD_DIMM_VERSION_1:
 408                                 dimm = dimm_v1tov2(hdl, (cmd_dimm_1_t *)dimm,
 409                                     dimmsz);
 410                                 break;
 411                         case CMD_DIMM_VERSION_2:
 412                                 dimm = dimm_wrapv2(hdl, (cmd_dimm_pers_t *)dimm,
 413                                     dimmsz);
 414                                 break;
 415                         default:
 416                                 fmd_hdl_abort(hdl, "unknown version (found %d) "
 417                                     "for dimm state referenced by case %s.\n",
 418                                     dimm->dimm_version, fmd_case_uuid(hdl, cp));
 419                                 break;
 420                         }
 421                 } else {
 422                         dimm = dimm_v0tov2(hdl, (cmd_dimm_0_t *)dimm, dimmsz);
 423                         migrated = 1;
 424                 }
 425 
 426                 if (migrated) {
 427                         CMD_STAT_BUMP(dimm_migrat);
 428                         cmd_dimm_dirty(hdl, dimm);
 429                 }
 430 
 431                 cmd_fmri_restore(hdl, &dimm->dimm_asru);
 432 
 433                 if ((errno = nvlist_lookup_string(dimm->dimm_asru_nvl,
 434                     FM_FMRI_MEM_UNUM, (char **)&dimm->dimm_unum)) != 0)
 435                         fmd_hdl_abort(hdl, "failed to retrieve unum from asru");
 436 
 437                 dimm_attach_to_bank(hdl, dimm);
 438 
 439                 cmd_mem_retirestat_create(hdl, &dimm->dimm_retstat,
 440                     dimm->dimm_unum, dimm->dimm_nretired, CMD_DIMM_STAT_PREFIX);
 441 
 442                 cmd_list_append(&cmd.cmd_dimms, dimm);
 443         }
 444 
 445         switch (ptr->ptr_subtype) {
 446         case BUG_PTR_DIMM_CASE:
 447                 fmd_hdl_debug(hdl, "recovering from out of order dimm ptr\n");
 448                 cmd_case_redirect(hdl, cp, CMD_PTR_DIMM_CASE);
 449                 /*FALLTHROUGH*/
 450         case CMD_PTR_DIMM_CASE:
 451                 cmd_mem_case_restore(hdl, &dimm->dimm_case, cp, "dimm",
 452                     dimm->dimm_unum);
 453                 break;
 454         default:
 455                 fmd_hdl_abort(hdl, "invalid %s subtype %d\n",
 456                     ptr->ptr_name, ptr->ptr_subtype);
 457         }
 458 
 459         return (dimm);
 460 }
 461 
 462 void
 463 cmd_dimm_validate(fmd_hdl_t *hdl)
 464 {
 465         cmd_dimm_t *dimm, *next;
 466 
 467         for (dimm = cmd_list_next(&cmd.cmd_dimms); dimm != NULL; dimm = next) {
 468                 next = cmd_list_next(dimm);
 469 
 470                 if (!fmd_nvl_fmri_present(hdl, dimm->dimm_asru_nvl))
 471                         cmd_dimm_destroy(hdl, dimm);
 472         }
 473 }
 474 
 475 void
 476 cmd_dimm_dirty(fmd_hdl_t *hdl, cmd_dimm_t *dimm)
 477 {
 478         if (fmd_buf_size(hdl, NULL, dimm->dimm_bufname) !=
 479             sizeof (cmd_dimm_pers_t))
 480                 fmd_buf_destroy(hdl, NULL, dimm->dimm_bufname);
 481 
 482         /* No need to rewrite the FMRIs in the dimm - they don't change */
 483         fmd_buf_write(hdl, NULL, dimm->dimm_bufname, &dimm->dimm_pers,
 484             sizeof (cmd_dimm_pers_t));
 485 }
 486 
 487 void
 488 cmd_dimm_gc(fmd_hdl_t *hdl)
 489 {
 490         cmd_dimm_validate(hdl);
 491 }
 492 
 493 void
 494 cmd_dimm_fini(fmd_hdl_t *hdl)
 495 {
 496         cmd_dimm_t *dimm;
 497 
 498         while ((dimm = cmd_list_next(&cmd.cmd_dimms)) != NULL)
 499                 cmd_dimm_free(hdl, dimm, FMD_B_FALSE);
 500 }
 501 
 502 
 503 void
 504 cmd_dimm_save_symbol_error(cmd_dimm_t *dimm, uint16_t upos)
 505 {
 506         cmd_dimm_t *d = NULL, *next = NULL;
 507 
 508         for (d = cmd_list_next(&cmd.cmd_dimms); d != NULL; d = next) {
 509                 next = cmd_list_next(d);
 510                 if (cmd_same_datapath_dimms(dimm, d))
 511                         d->dimm_syl_error = upos;
 512         }
 513 }
 514 
 515 int
 516 cmd_dimm_check_symbol_error(cmd_dimm_t *dimm, uint16_t synd)
 517 {
 518         int upos;
 519         cmd_dimm_t *d, *next;
 520 
 521         if ((upos = cmd_synd2upos(synd)) < 0)
 522                 return (0);
 523 
 524         for (d = cmd_list_next(&cmd.cmd_dimms); d != NULL; d = next) {
 525                 next = cmd_list_next(d);
 526                 if (cmd_same_datapath_dimms(dimm, d) &&
 527                     (d->dimm_syl_error == upos))
 528                         return (1);
 529         }
 530 
 531         return (0);
 532 }