Print this page
patch tsoome-feedback


 182         if (hcl == NULL)
 183                 return;
 184 
 185         for (i = 0; i < n; i++) {
 186                 (void) nvlist_alloc(&hcl[i],
 187                     NV_UNIQUE_NAME|NV_UNIQUE_NAME_TYPE, 0);
 188         }
 189 
 190         for (i = 0, j = 0; i < n; i++) {
 191                 (void) nvlist_lookup_string(hcl1[i], FM_FMRI_HC_NAME, &name);
 192                 (void) nvlist_lookup_string(hcl1[i], FM_FMRI_HC_ID, &id);
 193                 (void) nvlist_add_string(hcl[j], FM_FMRI_HC_NAME, name);
 194                 (void) nvlist_add_string(hcl[j], FM_FMRI_HC_ID, id);
 195                 j++;
 196                 if (strcmp(name, "chip") == 0)
 197                         break;
 198         }
 199 
 200         if (nvlist_alloc(&rsrc,  NV_UNIQUE_NAME|NV_UNIQUE_NAME_TYPE, 0) != 0) {
 201                 for (i = 0; i < n; i++) {
 202                         if (hcl[i] != NULL)
 203                                 nvlist_free(hcl[i]);
 204                 }
 205                 fmd_hdl_free(hdl, hcl, sizeof (nvlist_t *) * n);
 206         }
 207 
 208         if (nvlist_add_uint8(rsrc, FM_VERSION, FM_HC_SCHEME_VERSION) != 0 ||
 209             nvlist_add_string(rsrc, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC) != 0 ||
 210             nvlist_add_string(rsrc, FM_FMRI_HC_ROOT, "") != 0 ||
 211             nvlist_add_uint32(rsrc, FM_FMRI_HC_LIST_SZ, n) != 0 ||
 212             nvlist_add_nvlist_array(rsrc, FM_FMRI_HC_LIST, hcl, n) != 0) {
 213                 for (i = 0; i < n; i++) {
 214                         if (hcl[i] != NULL)
 215                                 nvlist_free(hcl[i]);
 216                 }
 217                 fmd_hdl_free(hdl, hcl, sizeof (nvlist_t *) * n);
 218                 nvlist_free(rsrc);
 219         }
 220 
 221         fru = gmem_find_fault_fru(hdl, rsrc);
 222         if (fru != NULL) {
 223                 cp = fmd_case_open(hdl, NULL);
 224                 fltlist = fmd_nvl_create_fault(hdl, "fault.memory.datapath",
 225                     100, fru, fru, fru);
 226                 fmd_case_add_suspect(hdl, cp, fltlist);
 227                 fmd_case_solve(hdl, cp);
 228                 nvlist_free(fru);
 229         }
 230 
 231         for (i = 0; i < n; i++) {
 232                 if (hcl[i] != NULL)
 233                         nvlist_free(hcl[i]);
 234         }
 235 
 236         fmd_hdl_free(hdl, hcl, sizeof (nvlist_t *) * n);
 237         nvlist_free(rsrc);
 238 }
 239 
 240 /*
 241  * formula to conver an unhashed address to hashed address
 242  * PA[17:11] = (PA[32:28] xor PA[17:13]) :: ((PA[19:18] xor PA[12:11])
 243  */
 244 static void
 245 gmem_to_hashed_addr(uint64_t *addr, uint64_t afar)
 246 {
 247 
 248         *addr = (afar & OFFBIT) | ((afar & BIT28_32) >> 15) ^ (afar & BIT13_17)
 249             | ((afar & BIT18_19) >> 7) ^ (afar & BIT11_12);
 250 }
 251 
 252 /*


 382         if (nret < gmem.gm_low_ce_thresh)
 383                 return;
 384 
 385         if (dimm->dimm_phys_addr_hi >= dimm->dimm_phys_addr_low)
 386                 delta_addr =
 387                     (dimm->dimm_phys_addr_hi - dimm->dimm_phys_addr_low) /
 388                     (nret - 1);
 389 
 390         if (nret >= gmem.gm_max_retired_pages || delta_addr > GMEM_MQ_512KB) {
 391 
 392                 fmd_hdl_debug(hdl, "ce_thresh_check succeeded nret=%d", nret);
 393                 dimm->dimm_flags |= GMEM_F_FAULTING;
 394                 gmem_dimm_dirty(hdl, dimm);
 395 
 396                 cp = fmd_case_open(hdl, NULL);
 397                 rsrc = gmem_find_dimm_rsc(hdl, dimm->dimm_serial);
 398                 flt = fmd_nvl_create_fault(hdl, GMEM_FAULT_DIMM_PAGES,
 399                     GMEM_FLTMAXCONF, NULL, gmem_dimm_fru(dimm), rsrc);
 400                 fmd_case_add_suspect(hdl, cp, flt);
 401                 fmd_case_solve(hdl, cp);
 402                 if (rsrc != NULL)
 403                         nvlist_free(rsrc);
 404         }
 405 }
 406 
 407 /*
 408  * rule 5b checking. The check succeeds if more than 120
 409  * non-intermittent CEs are reported against one symbol
 410  * position of one afar in 72 hours
 411  */
 412 static void
 413 mq_5b_check(fmd_hdl_t *hdl, gmem_dimm_t *dimm)
 414 {
 415         nvlist_t *flt, *rsrc;
 416         fmd_case_t *cp;
 417         gmem_mq_t *ip, *next;
 418         int cw;
 419 
 420         for (cw = 0; cw < GMEM_MAX_CKWDS; cw++) {
 421                 for (ip = gmem_list_next(&dimm->mq_root[cw]);
 422                     ip != NULL; ip = next) {
 423                         next = gmem_list_next(ip);
 424                         if (ip->mq_dupce_count >= gmem.gm_dupce) {
 425                                 fmd_hdl_debug(hdl,
 426                                     "mq_5b_check succeeded: duplicate CE=%d",
 427                                     ip->mq_dupce_count);
 428                                 cp = fmd_case_open(hdl, NULL);
 429                                 rsrc = gmem_find_dimm_rsc(hdl,
 430                                     dimm->dimm_serial);
 431                                 flt = fmd_nvl_create_fault(hdl,
 432                                     GMEM_FAULT_DIMM_PAGES, GMEM_FLTMAXCONF,
 433                                     NULL, gmem_dimm_fru(dimm), rsrc);
 434                                 dimm->dimm_flags |= GMEM_F_FAULTING;
 435                                 gmem_dimm_dirty(hdl, dimm);
 436                                 fmd_case_add_suspect(hdl, cp, flt);
 437                                 fmd_case_solve(hdl, cp);
 438                                 if (rsrc != NULL)
 439                                         nvlist_free(rsrc);
 440                                 return;
 441                         }
 442                 }
 443         }
 444 }
 445 
 446 /*
 447  * delete the expired duplicate CE time stamps
 448  */
 449 static void
 450 mq_prune_dup(fmd_hdl_t *hdl, gmem_mq_t *ip, uint64_t now)
 451 {
 452         tstamp_t *tsp, *next;
 453 
 454         for (tsp = gmem_list_next(&ip->mq_dupce_tstamp); tsp != NULL;
 455             tsp = next) {
 456                 next = gmem_list_next(tsp);
 457                 if (tsp->tstamp < now - GMEM_MQ_TIMELIM) {
 458                         gmem_list_delete(&ip->mq_dupce_tstamp, &tsp->ts_l);


 688                                 upos_array[++i].mq1 = NULL;
 689                         }
 690                 }
 691                 if (i - upos_pairs >= 2) {
 692                         /* Rule 4A violation */
 693                         rsc = gmem_find_dimm_rsc(hdl, dimm->dimm_serial);
 694                         flt = fmd_nvl_create_fault(hdl, GMEM_FAULT_DIMM_4A,
 695                             GMEM_FLTMAXCONF, NULL, gmem_dimm_fru(dimm), rsc);
 696                         for (j = upos_pairs; j < i; j++) {
 697                                 fmd_case_add_ereport(hdl,
 698                                     dimm->dimm_case.cc_cp,
 699                                     upos_array[j].mq1->mq_ep);
 700                                 fmd_case_add_ereport(hdl,
 701                                     dimm->dimm_case.cc_cp,
 702                                     upos_array[j].mq2->mq_ep);
 703                         }
 704                         dimm->dimm_flags |= GMEM_F_FAULTING;
 705                         gmem_dimm_dirty(hdl, dimm);
 706                         fmd_case_add_suspect(hdl, dimm->dimm_case.cc_cp, flt);
 707                         fmd_case_solve(hdl, dimm->dimm_case.cc_cp);
 708                         if (rsc != NULL)
 709                                 nvlist_free(rsc);
 710                         return;
 711                 }
 712                 upos_pairs = i;
 713                 assert(upos_pairs < 16);
 714         }
 715 }
 716 
 717 /*ARGSUSED*/
 718 gmem_evdisp_t
 719 gmem_ce(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
 720 {
 721         uint16_t symbol_pos, cw;
 722         uint64_t phyaddr, offset, addr;
 723         uint32_t filter_ratio = 0;
 724         gmem_dimm_t *dimm;
 725         gmem_page_t *page;
 726         nvlist_t *fru = NULL;
 727         nvlist_t *topo_rsc = NULL;
 728         nvlist_t *rsrc, *det;




 182         if (hcl == NULL)
 183                 return;
 184 
 185         for (i = 0; i < n; i++) {
 186                 (void) nvlist_alloc(&hcl[i],
 187                     NV_UNIQUE_NAME|NV_UNIQUE_NAME_TYPE, 0);
 188         }
 189 
 190         for (i = 0, j = 0; i < n; i++) {
 191                 (void) nvlist_lookup_string(hcl1[i], FM_FMRI_HC_NAME, &name);
 192                 (void) nvlist_lookup_string(hcl1[i], FM_FMRI_HC_ID, &id);
 193                 (void) nvlist_add_string(hcl[j], FM_FMRI_HC_NAME, name);
 194                 (void) nvlist_add_string(hcl[j], FM_FMRI_HC_ID, id);
 195                 j++;
 196                 if (strcmp(name, "chip") == 0)
 197                         break;
 198         }
 199 
 200         if (nvlist_alloc(&rsrc,  NV_UNIQUE_NAME|NV_UNIQUE_NAME_TYPE, 0) != 0) {
 201                 for (i = 0; i < n; i++) {

 202                         nvlist_free(hcl[i]);
 203                 }
 204                 fmd_hdl_free(hdl, hcl, sizeof (nvlist_t *) * n);
 205         }
 206 
 207         if (nvlist_add_uint8(rsrc, FM_VERSION, FM_HC_SCHEME_VERSION) != 0 ||
 208             nvlist_add_string(rsrc, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC) != 0 ||
 209             nvlist_add_string(rsrc, FM_FMRI_HC_ROOT, "") != 0 ||
 210             nvlist_add_uint32(rsrc, FM_FMRI_HC_LIST_SZ, n) != 0 ||
 211             nvlist_add_nvlist_array(rsrc, FM_FMRI_HC_LIST, hcl, n) != 0) {
 212                 for (i = 0; i < n; i++) {

 213                         nvlist_free(hcl[i]);
 214                 }
 215                 fmd_hdl_free(hdl, hcl, sizeof (nvlist_t *) * n);
 216                 nvlist_free(rsrc);
 217         }
 218 
 219         fru = gmem_find_fault_fru(hdl, rsrc);
 220         if (fru != NULL) {
 221                 cp = fmd_case_open(hdl, NULL);
 222                 fltlist = fmd_nvl_create_fault(hdl, "fault.memory.datapath",
 223                     100, fru, fru, fru);
 224                 fmd_case_add_suspect(hdl, cp, fltlist);
 225                 fmd_case_solve(hdl, cp);
 226                 nvlist_free(fru);
 227         }
 228 
 229         for (i = 0; i < n; i++) {

 230                 nvlist_free(hcl[i]);
 231         }
 232 
 233         fmd_hdl_free(hdl, hcl, sizeof (nvlist_t *) * n);
 234         nvlist_free(rsrc);
 235 }
 236 
 237 /*
 238  * formula to conver an unhashed address to hashed address
 239  * PA[17:11] = (PA[32:28] xor PA[17:13]) :: ((PA[19:18] xor PA[12:11])
 240  */
 241 static void
 242 gmem_to_hashed_addr(uint64_t *addr, uint64_t afar)
 243 {
 244 
 245         *addr = (afar & OFFBIT) | ((afar & BIT28_32) >> 15) ^ (afar & BIT13_17)
 246             | ((afar & BIT18_19) >> 7) ^ (afar & BIT11_12);
 247 }
 248 
 249 /*


 379         if (nret < gmem.gm_low_ce_thresh)
 380                 return;
 381 
 382         if (dimm->dimm_phys_addr_hi >= dimm->dimm_phys_addr_low)
 383                 delta_addr =
 384                     (dimm->dimm_phys_addr_hi - dimm->dimm_phys_addr_low) /
 385                     (nret - 1);
 386 
 387         if (nret >= gmem.gm_max_retired_pages || delta_addr > GMEM_MQ_512KB) {
 388 
 389                 fmd_hdl_debug(hdl, "ce_thresh_check succeeded nret=%d", nret);
 390                 dimm->dimm_flags |= GMEM_F_FAULTING;
 391                 gmem_dimm_dirty(hdl, dimm);
 392 
 393                 cp = fmd_case_open(hdl, NULL);
 394                 rsrc = gmem_find_dimm_rsc(hdl, dimm->dimm_serial);
 395                 flt = fmd_nvl_create_fault(hdl, GMEM_FAULT_DIMM_PAGES,
 396                     GMEM_FLTMAXCONF, NULL, gmem_dimm_fru(dimm), rsrc);
 397                 fmd_case_add_suspect(hdl, cp, flt);
 398                 fmd_case_solve(hdl, cp);

 399                 nvlist_free(rsrc);
 400         }
 401 }
 402 
 403 /*
 404  * rule 5b checking. The check succeeds if more than 120
 405  * non-intermittent CEs are reported against one symbol
 406  * position of one afar in 72 hours
 407  */
 408 static void
 409 mq_5b_check(fmd_hdl_t *hdl, gmem_dimm_t *dimm)
 410 {
 411         nvlist_t *flt, *rsrc;
 412         fmd_case_t *cp;
 413         gmem_mq_t *ip, *next;
 414         int cw;
 415 
 416         for (cw = 0; cw < GMEM_MAX_CKWDS; cw++) {
 417                 for (ip = gmem_list_next(&dimm->mq_root[cw]);
 418                     ip != NULL; ip = next) {
 419                         next = gmem_list_next(ip);
 420                         if (ip->mq_dupce_count >= gmem.gm_dupce) {
 421                                 fmd_hdl_debug(hdl,
 422                                     "mq_5b_check succeeded: duplicate CE=%d",
 423                                     ip->mq_dupce_count);
 424                                 cp = fmd_case_open(hdl, NULL);
 425                                 rsrc = gmem_find_dimm_rsc(hdl,
 426                                     dimm->dimm_serial);
 427                                 flt = fmd_nvl_create_fault(hdl,
 428                                     GMEM_FAULT_DIMM_PAGES, GMEM_FLTMAXCONF,
 429                                     NULL, gmem_dimm_fru(dimm), rsrc);
 430                                 dimm->dimm_flags |= GMEM_F_FAULTING;
 431                                 gmem_dimm_dirty(hdl, dimm);
 432                                 fmd_case_add_suspect(hdl, cp, flt);
 433                                 fmd_case_solve(hdl, cp);

 434                                 nvlist_free(rsrc);
 435                                 return;
 436                         }
 437                 }
 438         }
 439 }
 440 
 441 /*
 442  * delete the expired duplicate CE time stamps
 443  */
 444 static void
 445 mq_prune_dup(fmd_hdl_t *hdl, gmem_mq_t *ip, uint64_t now)
 446 {
 447         tstamp_t *tsp, *next;
 448 
 449         for (tsp = gmem_list_next(&ip->mq_dupce_tstamp); tsp != NULL;
 450             tsp = next) {
 451                 next = gmem_list_next(tsp);
 452                 if (tsp->tstamp < now - GMEM_MQ_TIMELIM) {
 453                         gmem_list_delete(&ip->mq_dupce_tstamp, &tsp->ts_l);


 683                                 upos_array[++i].mq1 = NULL;
 684                         }
 685                 }
 686                 if (i - upos_pairs >= 2) {
 687                         /* Rule 4A violation */
 688                         rsc = gmem_find_dimm_rsc(hdl, dimm->dimm_serial);
 689                         flt = fmd_nvl_create_fault(hdl, GMEM_FAULT_DIMM_4A,
 690                             GMEM_FLTMAXCONF, NULL, gmem_dimm_fru(dimm), rsc);
 691                         for (j = upos_pairs; j < i; j++) {
 692                                 fmd_case_add_ereport(hdl,
 693                                     dimm->dimm_case.cc_cp,
 694                                     upos_array[j].mq1->mq_ep);
 695                                 fmd_case_add_ereport(hdl,
 696                                     dimm->dimm_case.cc_cp,
 697                                     upos_array[j].mq2->mq_ep);
 698                         }
 699                         dimm->dimm_flags |= GMEM_F_FAULTING;
 700                         gmem_dimm_dirty(hdl, dimm);
 701                         fmd_case_add_suspect(hdl, dimm->dimm_case.cc_cp, flt);
 702                         fmd_case_solve(hdl, dimm->dimm_case.cc_cp);

 703                         nvlist_free(rsc);
 704                         return;
 705                 }
 706                 upos_pairs = i;
 707                 assert(upos_pairs < 16);
 708         }
 709 }
 710 
 711 /*ARGSUSED*/
 712 gmem_evdisp_t
 713 gmem_ce(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
 714 {
 715         uint16_t symbol_pos, cw;
 716         uint64_t phyaddr, offset, addr;
 717         uint32_t filter_ratio = 0;
 718         gmem_dimm_t *dimm;
 719         gmem_page_t *page;
 720         nvlist_t *fru = NULL;
 721         nvlist_t *topo_rsc = NULL;
 722         nvlist_t *rsrc, *det;