1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * Support routines for DIMMs. 27 */ 28 29 #include <cmd_mem.h> 30 #include <limits.h> 31 #include <cmd_dimm.h> 32 #include <cmd_bank.h> 33 #include <cmd.h> 34 35 #include <errno.h> 36 #include <string.h> 37 #include <strings.h> 38 #include <fcntl.h> 39 #include <unistd.h> 40 #include <fm/fmd_api.h> 41 #include <sys/fm/protocol.h> 42 #include <sys/mem.h> 43 #include <sys/nvpair.h> 44 #ifdef sun4v 45 #include <cmd_hc_sun4v.h> 46 #include <cmd_branch.h> 47 #endif /* sun4v */ 48 49 /* 50 * Some errors (RxE/FRx pairs) don't have accurate DIMM (resource) FMRIs, 51 * because sufficient information was unavailable prior to correlation. 52 * When the DE completes the pair, it uses this routine to retrieve the 53 * correct FMRI. 54 */ 55 nvlist_t * 56 cmd_dimm_fmri_derive(fmd_hdl_t *hdl, uint64_t afar, uint16_t synd, 57 uint64_t afsr) 58 { 59 nvlist_t *fmri; 60 61 if ((fmri = cmd_mem_fmri_derive(hdl, afar, afsr, synd)) == NULL) 62 return (NULL); 63 64 if (fmd_nvl_fmri_expand(hdl, fmri) < 0) { 65 nvlist_free(fmri); 66 return (NULL); 67 } 68 69 return (fmri); 70 } 71 72 nvlist_t * 73 cmd_dimm_fru(cmd_dimm_t *dimm) 74 { 75 return (dimm->dimm_asru_nvl); 76 } 77 78 nvlist_t * 79 cmd_dimm_create_fault(fmd_hdl_t *hdl, cmd_dimm_t *dimm, const char *fltnm, 80 uint_t cert) 81 { 82 #ifdef sun4v 83 nvlist_t *flt, *nvlfru; 84 /* 85 * Do NOT issue hc scheme FRU FMRIs for ultraSPARC-T1 platforms. 86 * The SP will misinterpret the FRU. Instead, reuse the ASRU FMRI 87 * 88 * Use the BR string as a distinguisher. BR (branch) is only 89 * present in ultraSPARC-T2/T2plus DIMM unums 90 */ 91 if (strstr(dimm->dimm_unum, "BR") == NULL) { 92 flt = cmd_nvl_create_fault(hdl, fltnm, cert, 93 dimm->dimm_asru_nvl, dimm->dimm_asru_nvl, NULL); 94 } else { 95 nvlfru = cmd_mem2hc(hdl, dimm->dimm_asru_nvl); 96 flt = cmd_nvl_create_fault(hdl, fltnm, cert, 97 dimm->dimm_asru_nvl, nvlfru, NULL); 98 if (nvlfru != NULL) 99 nvlist_free(nvlfru); 100 } 101 return (cmd_fault_add_location(hdl, flt, dimm->dimm_unum)); 102 #else 103 return (cmd_nvl_create_fault(hdl, fltnm, cert, dimm->dimm_asru_nvl, 104 dimm->dimm_asru_nvl, NULL)); 105 #endif /* sun4v */ 106 } 107 108 static void 109 cmd_dimm_free(fmd_hdl_t *hdl, cmd_dimm_t *dimm, int destroy) 110 { 111 cmd_case_t *cc = &dimm->dimm_case; 112 int i; 113 cmd_mq_t *q; 114 tstamp_t *tsp, *next; 115 116 #ifdef sun4v 117 cmd_branch_t *branch; 118 #endif 119 if (cc->cc_cp != NULL) { 120 cmd_case_fini(hdl, cc->cc_cp, destroy); 121 if (cc->cc_serdnm != NULL) { 122 if (fmd_serd_exists(hdl, cc->cc_serdnm) && 123 destroy) 124 fmd_serd_destroy(hdl, cc->cc_serdnm); 125 fmd_hdl_strfree(hdl, cc->cc_serdnm); 126 } 127 } 128 129 for (i = 0; i < CMD_MAX_CKWDS; i++) { 130 while ((q = cmd_list_next(&dimm->mq_root[i])) != NULL) { 131 if (q->mq_serdnm != NULL) { 132 if (fmd_serd_exists(hdl, q->mq_serdnm)) { 133 fmd_serd_destroy(hdl, q->mq_serdnm); 134 } 135 fmd_hdl_strfree(hdl, q->mq_serdnm); 136 q->mq_serdnm = NULL; 137 } 138 139 for (tsp = cmd_list_next(&q->mq_dupce_tstamp); 140 tsp != NULL; tsp = next) { 141 next = cmd_list_next(tsp); 142 cmd_list_delete(&q->mq_dupce_tstamp, 143 &tsp->ts_l); 144 fmd_hdl_free(hdl, tsp, sizeof (tstamp_t)); 145 } 146 147 cmd_list_delete(&dimm->mq_root[i], q); 148 fmd_hdl_free(hdl, q, sizeof (cmd_mq_t)); 149 } 150 } 151 152 if (dimm->dimm_bank != NULL) 153 cmd_bank_remove_dimm(hdl, dimm->dimm_bank, dimm); 154 155 #ifdef sun4v 156 branch = cmd_branch_lookup_by_unum(hdl, dimm->dimm_unum); 157 if (branch != NULL) 158 cmd_branch_remove_dimm(hdl, branch, dimm); 159 #endif 160 161 cmd_fmri_fini(hdl, &dimm->dimm_asru, destroy); 162 163 if (destroy) 164 fmd_buf_destroy(hdl, NULL, dimm->dimm_bufname); 165 166 cmd_list_delete(&cmd.cmd_dimms, dimm); 167 fmd_hdl_free(hdl, dimm, sizeof (cmd_dimm_t)); 168 } 169 170 void 171 cmd_dimm_destroy(fmd_hdl_t *hdl, cmd_dimm_t *dimm) 172 { 173 174 fmd_stat_destroy(hdl, 1, &(dimm->dimm_retstat)); 175 cmd_dimm_free(hdl, dimm, FMD_B_TRUE); 176 } 177 178 static cmd_dimm_t * 179 dimm_lookup_by_unum(const char *unum) 180 { 181 cmd_dimm_t *dimm; 182 183 for (dimm = cmd_list_next(&cmd.cmd_dimms); dimm != NULL; 184 dimm = cmd_list_next(dimm)) { 185 if (strcmp(dimm->dimm_unum, unum) == 0) 186 return (dimm); 187 } 188 189 return (NULL); 190 } 191 192 static void 193 dimm_attach_to_bank(fmd_hdl_t *hdl, cmd_dimm_t *dimm) 194 { 195 cmd_bank_t *bank; 196 197 for (bank = cmd_list_next(&cmd.cmd_banks); bank != NULL; 198 bank = cmd_list_next(bank)) { 199 if (fmd_nvl_fmri_contains(hdl, bank->bank_asru_nvl, 200 dimm->dimm_asru_nvl)) { 201 cmd_bank_add_dimm(hdl, bank, dimm); 202 return; 203 } 204 } 205 } 206 207 cmd_dimm_t * 208 cmd_dimm_create(fmd_hdl_t *hdl, nvlist_t *asru) 209 { 210 cmd_dimm_t *dimm; 211 const char *unum; 212 nvlist_t *fmri; 213 size_t nserids = 0; 214 char **serids = NULL; 215 216 if (!fmd_nvl_fmri_present(hdl, asru)) { 217 fmd_hdl_debug(hdl, "dimm_lookup: discarding old ereport\n"); 218 return (NULL); 219 } 220 221 if ((unum = cmd_fmri_get_unum(asru)) == NULL) { 222 CMD_STAT_BUMP(bad_mem_asru); 223 return (NULL); 224 } 225 226 #ifdef sun4v 227 if (nvlist_lookup_string_array(asru, FM_FMRI_HC_SERIAL_ID, &serids, 228 &nserids) != 0) { 229 fmd_hdl_debug(hdl, "sun4v mem: FMRI does not" 230 " have serial_ids\n"); 231 CMD_STAT_BUMP(bad_mem_asru); 232 return (NULL); 233 } 234 #endif 235 fmri = cmd_mem_fmri_create(unum, serids, nserids); 236 if (fmd_nvl_fmri_expand(hdl, fmri) < 0) { 237 CMD_STAT_BUMP(bad_mem_asru); 238 nvlist_free(fmri); 239 return (NULL); 240 } 241 242 fmd_hdl_debug(hdl, "dimm_create: creating new DIMM %s\n", unum); 243 CMD_STAT_BUMP(dimm_creat); 244 245 dimm = fmd_hdl_zalloc(hdl, sizeof (cmd_dimm_t), FMD_SLEEP); 246 dimm->dimm_nodetype = CMD_NT_DIMM; 247 dimm->dimm_version = CMD_DIMM_VERSION; 248 dimm->dimm_phys_addr_low = ULLONG_MAX; 249 dimm->dimm_phys_addr_hi = 0; 250 dimm->dimm_syl_error = USHRT_MAX; 251 252 cmd_bufname(dimm->dimm_bufname, sizeof (dimm->dimm_bufname), "dimm_%s", 253 unum); 254 cmd_fmri_init(hdl, &dimm->dimm_asru, fmri, "dimm_asru_%s", unum); 255 256 nvlist_free(fmri); 257 258 (void) nvlist_lookup_string(dimm->dimm_asru_nvl, FM_FMRI_MEM_UNUM, 259 (char **)&dimm->dimm_unum); 260 261 dimm_attach_to_bank(hdl, dimm); 262 263 cmd_mem_retirestat_create(hdl, &dimm->dimm_retstat, dimm->dimm_unum, 0, 264 CMD_DIMM_STAT_PREFIX); 265 266 cmd_list_append(&cmd.cmd_dimms, dimm); 267 cmd_dimm_dirty(hdl, dimm); 268 269 return (dimm); 270 } 271 272 cmd_dimm_t * 273 cmd_dimm_lookup(fmd_hdl_t *hdl, nvlist_t *asru) 274 { 275 cmd_dimm_t *dimm; 276 const char *unum; 277 278 if ((unum = cmd_fmri_get_unum(asru)) == NULL) { 279 CMD_STAT_BUMP(bad_mem_asru); 280 return (NULL); 281 } 282 283 dimm = dimm_lookup_by_unum(unum); 284 285 if (dimm != NULL && !fmd_nvl_fmri_present(hdl, dimm->dimm_asru_nvl)) { 286 /* 287 * The DIMM doesn't exist anymore, so we need to delete the 288 * state structure, which is now out of date. The containing 289 * bank (if any) is also out of date, so blow it away too. 290 */ 291 fmd_hdl_debug(hdl, "dimm_lookup: discarding old dimm\n"); 292 293 if (dimm->dimm_bank != NULL) 294 cmd_bank_destroy(hdl, dimm->dimm_bank); 295 cmd_dimm_destroy(hdl, dimm); 296 297 return (NULL); 298 } 299 300 return (dimm); 301 } 302 303 static cmd_dimm_t * 304 dimm_v0tov2(fmd_hdl_t *hdl, cmd_dimm_0_t *old, size_t oldsz) 305 { 306 cmd_dimm_t *new; 307 308 if (oldsz != sizeof (cmd_dimm_0_t)) { 309 fmd_hdl_abort(hdl, "size of state doesn't match size of " 310 "version 0 state (%u bytes).\n", sizeof (cmd_dimm_0_t)); 311 } 312 313 new = fmd_hdl_zalloc(hdl, sizeof (cmd_dimm_t), FMD_SLEEP); 314 new->dimm_header = old->dimm0_header; 315 new->dimm_version = CMD_DIMM_VERSION; 316 new->dimm_asru = old->dimm0_asru; 317 new->dimm_nretired = old->dimm0_nretired; 318 new->dimm_phys_addr_hi = 0; 319 new->dimm_phys_addr_low = ULLONG_MAX; 320 321 fmd_hdl_free(hdl, old, oldsz); 322 return (new); 323 } 324 325 static cmd_dimm_t * 326 dimm_v1tov2(fmd_hdl_t *hdl, cmd_dimm_1_t *old, size_t oldsz) 327 { 328 329 cmd_dimm_t *new; 330 331 if (oldsz != sizeof (cmd_dimm_1_t)) { 332 fmd_hdl_abort(hdl, "size of state doesn't match size of " 333 "version 1 state (%u bytes).\n", sizeof (cmd_dimm_1_t)); 334 } 335 336 new = fmd_hdl_zalloc(hdl, sizeof (cmd_dimm_t), FMD_SLEEP); 337 338 new->dimm_header = old->dimm1_header; 339 new->dimm_version = CMD_DIMM_VERSION; 340 new->dimm_asru = old->dimm1_asru; 341 new->dimm_nretired = old->dimm1_nretired; 342 new->dimm_flags = old->dimm1_flags; 343 new->dimm_phys_addr_hi = 0; 344 new->dimm_phys_addr_low = ULLONG_MAX; 345 346 fmd_hdl_free(hdl, old, oldsz); 347 return (new); 348 } 349 350 static cmd_dimm_t * 351 dimm_wrapv2(fmd_hdl_t *hdl, cmd_dimm_pers_t *pers, size_t psz) 352 { 353 cmd_dimm_t *dimm; 354 355 if (psz != sizeof (cmd_dimm_pers_t)) { 356 fmd_hdl_abort(hdl, "size of state doesn't match size of " 357 "version 1 state (%u bytes).\n", sizeof (cmd_dimm_pers_t)); 358 } 359 360 dimm = fmd_hdl_zalloc(hdl, sizeof (cmd_dimm_t), FMD_SLEEP); 361 bcopy(pers, dimm, sizeof (cmd_dimm_pers_t)); 362 fmd_hdl_free(hdl, pers, psz); 363 return (dimm); 364 } 365 366 void * 367 cmd_dimm_restore(fmd_hdl_t *hdl, fmd_case_t *cp, cmd_case_ptr_t *ptr) 368 { 369 cmd_dimm_t *dimm; 370 371 for (dimm = cmd_list_next(&cmd.cmd_dimms); dimm != NULL; 372 dimm = cmd_list_next(dimm)) { 373 if (strcmp(dimm->dimm_bufname, ptr->ptr_name) == 0) 374 break; 375 } 376 377 if (dimm == NULL) { 378 int migrated = 0; 379 size_t dimmsz; 380 381 fmd_hdl_debug(hdl, "restoring dimm from %s\n", ptr->ptr_name); 382 383 if ((dimmsz = fmd_buf_size(hdl, NULL, ptr->ptr_name)) == 0) { 384 fmd_hdl_abort(hdl, "dimm referenced by case %s does " 385 "not exist in saved state\n", 386 fmd_case_uuid(hdl, cp)); 387 } else if (dimmsz > CMD_DIMM_MAXSIZE || 388 dimmsz < CMD_DIMM_MINSIZE) { 389 fmd_hdl_abort(hdl, 390 "dimm buffer referenced by case %s " 391 "is out of bounds (is %u bytes, max %u, min %u)\n", 392 fmd_case_uuid(hdl, cp), dimmsz, 393 CMD_DIMM_MAXSIZE, CMD_DIMM_MINSIZE); 394 } 395 396 if ((dimm = cmd_buf_read(hdl, NULL, ptr->ptr_name, 397 dimmsz)) == NULL) { 398 fmd_hdl_abort(hdl, "failed to read dimm buf %s", 399 ptr->ptr_name); 400 } 401 402 fmd_hdl_debug(hdl, "found %d in version field\n", 403 dimm->dimm_version); 404 405 if (CMD_DIMM_VERSIONED(dimm)) { 406 switch (dimm->dimm_version) { 407 case CMD_DIMM_VERSION_1: 408 dimm = dimm_v1tov2(hdl, (cmd_dimm_1_t *)dimm, 409 dimmsz); 410 break; 411 case CMD_DIMM_VERSION_2: 412 dimm = dimm_wrapv2(hdl, (cmd_dimm_pers_t *)dimm, 413 dimmsz); 414 break; 415 default: 416 fmd_hdl_abort(hdl, "unknown version (found %d) " 417 "for dimm state referenced by case %s.\n", 418 dimm->dimm_version, fmd_case_uuid(hdl, cp)); 419 break; 420 } 421 } else { 422 dimm = dimm_v0tov2(hdl, (cmd_dimm_0_t *)dimm, dimmsz); 423 migrated = 1; 424 } 425 426 if (migrated) { 427 CMD_STAT_BUMP(dimm_migrat); 428 cmd_dimm_dirty(hdl, dimm); 429 } 430 431 cmd_fmri_restore(hdl, &dimm->dimm_asru); 432 433 if ((errno = nvlist_lookup_string(dimm->dimm_asru_nvl, 434 FM_FMRI_MEM_UNUM, (char **)&dimm->dimm_unum)) != 0) 435 fmd_hdl_abort(hdl, "failed to retrieve unum from asru"); 436 437 dimm_attach_to_bank(hdl, dimm); 438 439 cmd_mem_retirestat_create(hdl, &dimm->dimm_retstat, 440 dimm->dimm_unum, dimm->dimm_nretired, CMD_DIMM_STAT_PREFIX); 441 442 cmd_list_append(&cmd.cmd_dimms, dimm); 443 } 444 445 switch (ptr->ptr_subtype) { 446 case BUG_PTR_DIMM_CASE: 447 fmd_hdl_debug(hdl, "recovering from out of order dimm ptr\n"); 448 cmd_case_redirect(hdl, cp, CMD_PTR_DIMM_CASE); 449 /*FALLTHROUGH*/ 450 case CMD_PTR_DIMM_CASE: 451 cmd_mem_case_restore(hdl, &dimm->dimm_case, cp, "dimm", 452 dimm->dimm_unum); 453 break; 454 default: 455 fmd_hdl_abort(hdl, "invalid %s subtype %d\n", 456 ptr->ptr_name, ptr->ptr_subtype); 457 } 458 459 return (dimm); 460 } 461 462 void 463 cmd_dimm_validate(fmd_hdl_t *hdl) 464 { 465 cmd_dimm_t *dimm, *next; 466 467 for (dimm = cmd_list_next(&cmd.cmd_dimms); dimm != NULL; dimm = next) { 468 next = cmd_list_next(dimm); 469 470 if (!fmd_nvl_fmri_present(hdl, dimm->dimm_asru_nvl)) 471 cmd_dimm_destroy(hdl, dimm); 472 } 473 } 474 475 void 476 cmd_dimm_dirty(fmd_hdl_t *hdl, cmd_dimm_t *dimm) 477 { 478 if (fmd_buf_size(hdl, NULL, dimm->dimm_bufname) != 479 sizeof (cmd_dimm_pers_t)) 480 fmd_buf_destroy(hdl, NULL, dimm->dimm_bufname); 481 482 /* No need to rewrite the FMRIs in the dimm - they don't change */ 483 fmd_buf_write(hdl, NULL, dimm->dimm_bufname, &dimm->dimm_pers, 484 sizeof (cmd_dimm_pers_t)); 485 } 486 487 void 488 cmd_dimm_gc(fmd_hdl_t *hdl) 489 { 490 cmd_dimm_validate(hdl); 491 } 492 493 void 494 cmd_dimm_fini(fmd_hdl_t *hdl) 495 { 496 cmd_dimm_t *dimm; 497 498 while ((dimm = cmd_list_next(&cmd.cmd_dimms)) != NULL) 499 cmd_dimm_free(hdl, dimm, FMD_B_FALSE); 500 } 501 502 503 void 504 cmd_dimm_save_symbol_error(cmd_dimm_t *dimm, uint16_t upos) 505 { 506 cmd_dimm_t *d = NULL, *next = NULL; 507 508 for (d = cmd_list_next(&cmd.cmd_dimms); d != NULL; d = next) { 509 next = cmd_list_next(d); 510 if (cmd_same_datapath_dimms(dimm, d)) 511 d->dimm_syl_error = upos; 512 } 513 } 514 515 int 516 cmd_dimm_check_symbol_error(cmd_dimm_t *dimm, uint16_t synd) 517 { 518 int upos; 519 cmd_dimm_t *d, *next; 520 521 if ((upos = cmd_synd2upos(synd)) < 0) 522 return (0); 523 524 for (d = cmd_list_next(&cmd.cmd_dimms); d != NULL; d = next) { 525 next = cmd_list_next(d); 526 if (cmd_same_datapath_dimms(dimm, d) && 527 (d->dimm_syl_error == upos)) 528 return (1); 529 } 530 531 return (0); 532 }