4220 * Use the default snapshot routine as it knows how to
4221 * deal with named kstats with long strings.
4222 */
4223 ksp->ks_update = ecc_kstat_update;
4224 kstat_install(ksp);
4225 } else {
4226 kmem_free(kceip, sizeof (struct kstat_ecc_mm_info));
4227 }
4228 }
4229 }
4230
4231 /*ARGSUSED*/
4232 static void
4233 leaky_bucket_timeout(void *arg)
4234 {
4235 int i;
4236 struct ce_info *psimm = mem_ce_simm;
4237
4238 for (i = 0; i < mem_ce_simm_size; i++) {
4239 if (psimm[i].leaky_bucket_cnt > 0)
4240 atomic_add_16(&psimm[i].leaky_bucket_cnt, -1);
4241 }
4242 add_leaky_bucket_timeout();
4243 }
4244
4245 static void
4246 add_leaky_bucket_timeout(void)
4247 {
4248 long timeout_in_microsecs;
4249
4250 /*
4251 * create timeout for next leak.
4252 *
4253 * The timeout interval is calculated as follows
4254 *
4255 * (ecc_softerr_interval * 60 * MICROSEC) / ecc_softerr_limit
4256 *
4257 * ecc_softerr_interval is in minutes, so multiply this by 60 (seconds
4258 * in a minute), then multiply this by MICROSEC to get the interval
4259 * in microseconds. Divide this total by ecc_softerr_limit so that
4260 * the timeout interval is accurate to within a few microseconds.
4365 psimm[i].intermittent_total = 1;
4366 psimm[i].persistent_total = 0;
4367 psimm[i].sticky_total = 0;
4368 }
4369 ecc_error_info_data.count.value.ui32++;
4370 break;
4371 } else if (strncmp(unum, psimm[i].name, len) == 0) {
4372 /*
4373 * Found an existing entry for the current
4374 * memory module, adjust the counts.
4375 */
4376 if (status & ECC_STICKY) {
4377 psimm[i].sticky_total++;
4378 cmn_err(CE_NOTE,
4379 "[AFT0] Sticky Softerror encountered "
4380 "on Memory Module %s\n", unum);
4381 page_status = PR_MCE;
4382 } else if (status & ECC_PERSISTENT) {
4383 int new_value;
4384
4385 new_value = atomic_add_16_nv(
4386 &psimm[i].leaky_bucket_cnt, 1);
4387 psimm[i].persistent_total++;
4388 if (new_value > ecc_softerr_limit) {
4389 cmn_err(CE_NOTE, "[AFT0] Most recent %d"
4390 " soft errors from Memory Module"
4391 " %s exceed threshold (N=%d,"
4392 " T=%dh:%02dm) triggering page"
4393 " retire", new_value, unum,
4394 ecc_softerr_limit,
4395 ecc_softerr_interval / 60,
4396 ecc_softerr_interval % 60);
4397 atomic_add_16(
4398 &psimm[i].leaky_bucket_cnt, -1);
4399 page_status = PR_MCE;
4400 }
4401 } else { /* Intermittent */
4402 psimm[i].intermittent_total++;
4403 }
4404 break;
4405 }
4406 }
4407
4408 if (i >= mem_ce_simm_size)
4409 cmn_err(CE_CONT, "[AFT0] Softerror: mem_ce_simm[] out of "
4410 "space.\n");
4411
4412 return (page_status);
4413 }
4414
4415 /*
4416 * Function to support counting of IO detected CEs.
4417 */
4418 void
|
4220 * Use the default snapshot routine as it knows how to
4221 * deal with named kstats with long strings.
4222 */
4223 ksp->ks_update = ecc_kstat_update;
4224 kstat_install(ksp);
4225 } else {
4226 kmem_free(kceip, sizeof (struct kstat_ecc_mm_info));
4227 }
4228 }
4229 }
4230
4231 /*ARGSUSED*/
4232 static void
4233 leaky_bucket_timeout(void *arg)
4234 {
4235 int i;
4236 struct ce_info *psimm = mem_ce_simm;
4237
4238 for (i = 0; i < mem_ce_simm_size; i++) {
4239 if (psimm[i].leaky_bucket_cnt > 0)
4240 atomic_dec_16(&psimm[i].leaky_bucket_cnt);
4241 }
4242 add_leaky_bucket_timeout();
4243 }
4244
4245 static void
4246 add_leaky_bucket_timeout(void)
4247 {
4248 long timeout_in_microsecs;
4249
4250 /*
4251 * create timeout for next leak.
4252 *
4253 * The timeout interval is calculated as follows
4254 *
4255 * (ecc_softerr_interval * 60 * MICROSEC) / ecc_softerr_limit
4256 *
4257 * ecc_softerr_interval is in minutes, so multiply this by 60 (seconds
4258 * in a minute), then multiply this by MICROSEC to get the interval
4259 * in microseconds. Divide this total by ecc_softerr_limit so that
4260 * the timeout interval is accurate to within a few microseconds.
4365 psimm[i].intermittent_total = 1;
4366 psimm[i].persistent_total = 0;
4367 psimm[i].sticky_total = 0;
4368 }
4369 ecc_error_info_data.count.value.ui32++;
4370 break;
4371 } else if (strncmp(unum, psimm[i].name, len) == 0) {
4372 /*
4373 * Found an existing entry for the current
4374 * memory module, adjust the counts.
4375 */
4376 if (status & ECC_STICKY) {
4377 psimm[i].sticky_total++;
4378 cmn_err(CE_NOTE,
4379 "[AFT0] Sticky Softerror encountered "
4380 "on Memory Module %s\n", unum);
4381 page_status = PR_MCE;
4382 } else if (status & ECC_PERSISTENT) {
4383 int new_value;
4384
4385 new_value = atomic_inc_16_nv(
4386 &psimm[i].leaky_bucket_cnt);
4387 psimm[i].persistent_total++;
4388 if (new_value > ecc_softerr_limit) {
4389 cmn_err(CE_NOTE, "[AFT0] Most recent %d"
4390 " soft errors from Memory Module"
4391 " %s exceed threshold (N=%d,"
4392 " T=%dh:%02dm) triggering page"
4393 " retire", new_value, unum,
4394 ecc_softerr_limit,
4395 ecc_softerr_interval / 60,
4396 ecc_softerr_interval % 60);
4397 atomic_dec_16(
4398 &psimm[i].leaky_bucket_cnt);
4399 page_status = PR_MCE;
4400 }
4401 } else { /* Intermittent */
4402 psimm[i].intermittent_total++;
4403 }
4404 break;
4405 }
4406 }
4407
4408 if (i >= mem_ce_simm_size)
4409 cmn_err(CE_CONT, "[AFT0] Softerror: mem_ce_simm[] out of "
4410 "space.\n");
4411
4412 return (page_status);
4413 }
4414
4415 /*
4416 * Function to support counting of IO detected CEs.
4417 */
4418 void
|