Print this page
XXXX pass in cpu_pause_func via pause_cpus
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/sun4u/os/cpr_impl.c
+++ new/usr/src/uts/sun4u/os/cpr_impl.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25
26 26 /*
27 27 * Platform specific implementation code
28 28 */
29 29
30 30 #define SUNDDI_IMPL
31 31
32 32 #include <sys/types.h>
33 33 #include <sys/promif.h>
34 34 #include <sys/prom_isa.h>
35 35 #include <sys/prom_plat.h>
36 36 #include <sys/mmu.h>
37 37 #include <vm/hat_sfmmu.h>
38 38 #include <sys/iommu.h>
39 39 #include <sys/scb.h>
40 40 #include <sys/cpuvar.h>
41 41 #include <sys/intreg.h>
42 42 #include <sys/pte.h>
43 43 #include <vm/hat.h>
44 44 #include <vm/page.h>
45 45 #include <vm/as.h>
46 46 #include <sys/cpr.h>
47 47 #include <sys/kmem.h>
48 48 #include <sys/clock.h>
49 49 #include <sys/kmem.h>
50 50 #include <sys/panic.h>
51 51 #include <vm/seg_kmem.h>
52 52 #include <sys/cpu_module.h>
53 53 #include <sys/callb.h>
54 54 #include <sys/machsystm.h>
55 55 #include <sys/vmsystm.h>
56 56 #include <sys/systm.h>
57 57 #include <sys/archsystm.h>
58 58 #include <sys/stack.h>
59 59 #include <sys/fs/ufs_fs.h>
60 60 #include <sys/memlist.h>
61 61 #include <sys/bootconf.h>
62 62 #include <sys/thread.h>
63 63 #include <vm/vm_dep.h>
64 64
65 65 extern void cpr_clear_bitmaps(void);
66 66 extern int cpr_setbit(pfn_t ppn, int mapflag);
67 67 extern int cpr_clrbit(pfn_t ppn, int mapflag);
68 68 extern pgcnt_t cpr_scan_kvseg(int mapflag, bitfunc_t bitfunc, struct seg *seg);
69 69 extern pgcnt_t cpr_count_seg_pages(int mapflag, bitfunc_t bitfunc);
70 70 extern void dtlb_wr_entry(uint_t, tte_t *, uint64_t *);
71 71 extern void itlb_wr_entry(uint_t, tte_t *, uint64_t *);
72 72
73 73 static int i_cpr_storage_desc_alloc(csd_t **, pgcnt_t *, csd_t **, int);
74 74 static void i_cpr_storage_desc_init(csd_t *, pgcnt_t, csd_t *);
75 75 static caddr_t i_cpr_storage_data_alloc(pgcnt_t, pgcnt_t *, int);
76 76 static int cpr_dump_sensitive(vnode_t *, csd_t *);
77 77 static void i_cpr_clear_entries(uint64_t, uint64_t);
78 78 static void i_cpr_xcall(xcfunc_t);
79 79
80 80 void i_cpr_storage_free(void);
81 81
82 82 extern void *i_cpr_data_page;
83 83 extern int cpr_test_mode;
84 84 extern int cpr_nbitmaps;
85 85 extern char cpr_default_path[];
86 86 extern caddr_t textva, datava;
87 87
88 88 static struct cpr_map_info cpr_prom_retain[CPR_PROM_RETAIN_CNT];
89 89 caddr_t cpr_vaddr = NULL;
90 90
91 91 static uint_t sensitive_pages_saved;
92 92 static uint_t sensitive_size_saved;
93 93
94 94 caddr_t i_cpr_storage_data_base;
95 95 caddr_t i_cpr_storage_data_end;
96 96 csd_t *i_cpr_storage_desc_base;
97 97 csd_t *i_cpr_storage_desc_end; /* one byte beyond last used descp */
98 98 csd_t *i_cpr_storage_desc_last_used; /* last used descriptor */
99 99 caddr_t sensitive_write_ptr; /* position for next storage write */
100 100
101 101 size_t i_cpr_sensitive_bytes_dumped;
102 102 pgcnt_t i_cpr_sensitive_pgs_dumped;
103 103 pgcnt_t i_cpr_storage_data_sz; /* in pages */
104 104 pgcnt_t i_cpr_storage_desc_pgcnt; /* in pages */
105 105
106 106 ushort_t cpr_mach_type = CPR_MACHTYPE_4U;
107 107 static csu_md_t m_info;
108 108
109 109
110 110 #define MAX_STORAGE_RETRY 3
111 111 #define MAX_STORAGE_ALLOC_RETRY 3
112 112 #define INITIAL_ALLOC_PCNT 40 /* starting allocation percentage */
113 113 #define INTEGRAL 100 /* to get 1% precision */
114 114
115 115 #define EXTRA_RATE 2 /* add EXTRA_RATE% extra space */
116 116 #define EXTRA_DESCS 10
117 117
118 118 #define CPR_NO_STORAGE_DESC 1
119 119 #define CPR_NO_STORAGE_DATA 2
120 120
121 121 #define CIF_SPLICE 0
122 122 #define CIF_UNLINK 1
123 123
124 124
125 125 /*
126 126 * CPR miscellaneous support routines
127 127 */
128 128 #define cpr_open(path, mode, vpp) (vn_open(path, UIO_SYSSPACE, \
129 129 mode, 0600, vpp, CRCREAT, 0))
130 130 #define cpr_rdwr(rw, vp, basep, cnt) (vn_rdwr(rw, vp, (caddr_t)(basep), \
131 131 cnt, 0LL, UIO_SYSSPACE, 0, (rlim64_t)MAXOFF_T, CRED(), \
132 132 (ssize_t *)NULL))
133 133
134 134 /*
135 135 * definitions for saving/restoring prom pages
136 136 */
137 137 static void *ppage_buf;
138 138 static pgcnt_t ppage_count;
139 139 static pfn_t *pphys_list;
140 140 static size_t pphys_list_size;
141 141
142 142 typedef void (*tlb_rw_t)(uint_t, tte_t *, uint64_t *);
143 143 typedef void (*tlb_filter_t)(int, tte_t *, uint64_t, void *);
144 144
145 145 /*
146 146 * private struct for tlb handling
147 147 */
148 148 struct cpr_trans_info {
149 149 sutlb_t *dst;
150 150 sutlb_t *tail;
151 151 tlb_rw_t reader;
152 152 tlb_rw_t writer;
153 153 tlb_filter_t filter;
154 154 int index;
155 155 uint64_t skip; /* assumes TLB <= 64 locked entries */
156 156 };
157 157 typedef struct cpr_trans_info cti_t;
158 158
159 159
160 160 /*
161 161 * special handling for tlb info
162 162 */
163 163 #define WITHIN_OFW(va) \
164 164 (((va) > (uint64_t)OFW_START_ADDR) && ((va) < (uint64_t)OFW_END_ADDR))
165 165
166 166 #define WITHIN_NUCLEUS(va, base) \
167 167 (((va) >= (base)) && \
168 168 (((va) + MMU_PAGESIZE) <= ((base) + MMU_PAGESIZE4M)))
169 169
170 170 #define IS_BIGKTSB(va) \
171 171 (enable_bigktsb && \
172 172 ((va) >= (uint64_t)ktsb_base) && \
173 173 ((va) < (uint64_t)(ktsb_base + ktsb_sz)))
174 174
175 175
176 176 /*
177 177 * WARNING:
178 178 * the text from this file is linked to follow cpr_resume_setup.o;
179 179 * only add text between here and i_cpr_end_jumpback when it needs
180 180 * to be called during resume before we switch back to the kernel
181 181 * trap table. all the text in this range must fit within a page.
182 182 */
183 183
184 184
185 185 /*
186 186 * each time a machine is reset, the prom uses an inconsistent set of phys
187 187 * pages and the cif cookie may differ as well. so prior to restoring the
188 188 * original prom, we have to use to use the new/tmp prom's translations
189 189 * when requesting prom services.
190 190 *
191 191 * cif_handler starts out as the original prom cookie, and that gets used
192 192 * by client_handler() to jump into the prom. here we splice-in a wrapper
193 193 * routine by writing cif_handler; client_handler() will now jump to the
194 194 * wrapper which switches the %tba to the new/tmp prom's trap table then
195 195 * jumps to the new cookie.
196 196 */
197 197 void
198 198 i_cpr_cif_setup(int action)
199 199 {
200 200 extern void *i_cpr_orig_cif, *cif_handler;
201 201 extern int i_cpr_cif_wrapper(void *);
202 202
203 203 /*
204 204 * save the original cookie and change the current cookie to the
205 205 * wrapper routine. later we just restore the original cookie.
206 206 */
207 207 if (action == CIF_SPLICE) {
208 208 i_cpr_orig_cif = cif_handler;
209 209 cif_handler = (void *)i_cpr_cif_wrapper;
210 210 } else if (action == CIF_UNLINK)
211 211 cif_handler = i_cpr_orig_cif;
212 212 }
213 213
214 214
215 215 /*
216 216 * launch slave cpus into kernel text, pause them,
217 217 * and restore the original prom pages
218 218 */
219 219 void
220 220 i_cpr_mp_setup(void)
221 221 {
222 222 extern void restart_other_cpu(int);
223 223 cpu_t *cp;
224 224
225 225 uint64_t kctx = kcontextreg;
226 226
227 227 /*
228 228 * Do not allow setting page size codes in MMU primary context
229 229 * register while using cif wrapper. This is needed to work
230 230 * around OBP incorrect handling of this MMU register.
231 231 */
232 232 kcontextreg = 0;
233 233
234 234 /*
235 235 * reset cpu_ready_set so x_calls work properly
236 236 */
237 237 CPUSET_ZERO(cpu_ready_set);
238 238 CPUSET_ADD(cpu_ready_set, getprocessorid());
239 239
240 240 /*
241 241 * setup cif to use the cookie from the new/tmp prom
242 242 * and setup tmp handling for calling prom services.
243 243 */
244 244 i_cpr_cif_setup(CIF_SPLICE);
245 245
246 246 /*
247 247 * at this point, only the nucleus and a few cpr pages are
248 248 * mapped in. once we switch to the kernel trap table,
249 249 * we can access the rest of kernel space.
250 250 */
251 251 prom_set_traptable(&trap_table);
252 252
253 253 if (ncpus > 1) {
254 254 sfmmu_init_tsbs();
255 255
256 256 mutex_enter(&cpu_lock);
257 257 /*
258 258 * All of the slave cpus are not ready at this time,
259 259 * yet the cpu structures have various cpu_flags set;
260 260 * clear cpu_flags and mutex_ready.
261 261 * Since we are coming up from a CPU suspend, the slave cpus
↓ open down ↓ |
261 lines elided |
↑ open up ↑ |
262 262 * are frozen.
263 263 */
264 264 for (cp = CPU->cpu_next; cp != CPU; cp = cp->cpu_next) {
265 265 cp->cpu_flags = CPU_FROZEN;
266 266 cp->cpu_m.mutex_ready = 0;
267 267 }
268 268
269 269 for (cp = CPU->cpu_next; cp != CPU; cp = cp->cpu_next)
270 270 restart_other_cpu(cp->cpu_id);
271 271
272 - pause_cpus(NULL);
272 + pause_cpus(NULL, NULL);
273 273 mutex_exit(&cpu_lock);
274 274
275 275 i_cpr_xcall(i_cpr_clear_entries);
276 276 } else
277 277 i_cpr_clear_entries(0, 0);
278 278
279 279 /*
280 280 * now unlink the cif wrapper; WARNING: do not call any
281 281 * prom_xxx() routines until after prom pages are restored.
282 282 */
283 283 i_cpr_cif_setup(CIF_UNLINK);
284 284
285 285 (void) i_cpr_prom_pages(CPR_PROM_RESTORE);
286 286
287 287 /* allow setting page size codes in MMU primary context register */
288 288 kcontextreg = kctx;
289 289 }
290 290
291 291
292 292 /*
293 293 * end marker for jumpback page;
294 294 * this symbol is used to check the size of i_cpr_resume_setup()
295 295 * and the above text. For simplicity, the Makefile needs to
296 296 * link i_cpr_resume_setup.o and cpr_impl.o consecutively.
297 297 */
298 298 void
299 299 i_cpr_end_jumpback(void)
300 300 {
301 301 }
302 302
303 303
304 304 /*
305 305 * scan tlb entries with reader; when valid entries are found,
306 306 * the filter routine will selectively save/clear them
307 307 */
308 308 static void
309 309 i_cpr_scan_tlb(cti_t *ctip)
310 310 {
311 311 uint64_t va_tag;
312 312 int tlb_index;
313 313 tte_t tte;
314 314
315 315 for (tlb_index = ctip->index; tlb_index >= 0; tlb_index--) {
316 316 (*ctip->reader)((uint_t)tlb_index, &tte, &va_tag);
317 317 if (va_tag && TTE_IS_VALID(&tte))
318 318 (*ctip->filter)(tlb_index, &tte, va_tag, ctip);
319 319 }
320 320 }
321 321
322 322
323 323 /*
324 324 * filter for locked tlb entries that reference the text/data nucleus
325 325 * and any bigktsb's; these will be reinstalled by cprboot on all cpus
326 326 */
327 327 /* ARGSUSED */
328 328 static void
329 329 i_cpr_lnb(int index, tte_t *ttep, uint64_t va_tag, void *ctrans)
330 330 {
331 331 cti_t *ctip;
332 332
333 333 /*
334 334 * record tlb data at ctip->dst; the target tlb index starts
335 335 * at the highest tlb offset and moves towards 0. the prom
336 336 * reserves both dtlb and itlb index 0. any selected entry
337 337 * also gets marked to prevent being flushed during resume
338 338 */
339 339 if (TTE_IS_LOCKED(ttep) && (va_tag == (uint64_t)textva ||
340 340 va_tag == (uint64_t)datava || IS_BIGKTSB(va_tag))) {
341 341 ctip = ctrans;
342 342 while ((1 << ctip->index) & ctip->skip)
343 343 ctip->index--;
344 344 ASSERT(ctip->index > 0);
345 345 ASSERT(ctip->dst < ctip->tail);
346 346 ctip->dst->tte.ll = ttep->ll;
347 347 ctip->dst->va_tag = va_tag;
348 348 ctip->dst->index = ctip->index--;
349 349 ctip->dst->tmp = 0;
350 350 ctip->dst++;
351 351 }
352 352 }
353 353
354 354
355 355 /*
356 356 * some tlb entries are stale, filter for unlocked entries
357 357 * within the prom virt range and clear them
358 358 */
359 359 static void
360 360 i_cpr_ufw(int index, tte_t *ttep, uint64_t va_tag, void *ctrans)
361 361 {
362 362 sutlb_t clr;
363 363 cti_t *ctip;
364 364
365 365 if (!TTE_IS_LOCKED(ttep) && WITHIN_OFW(va_tag)) {
366 366 ctip = ctrans;
367 367 bzero(&clr, sizeof (clr));
368 368 (*ctip->writer)((uint_t)index, &clr.tte, &clr.va_tag);
369 369 }
370 370 }
371 371
372 372
373 373 /*
374 374 * some of the entries installed by cprboot are needed only on a
375 375 * short-term basis and need to be flushed to avoid clogging the tlbs.
376 376 * scan the dtte/itte arrays for items marked as temporary and clear
377 377 * dtlb/itlb entries using wrfunc.
378 378 */
379 379 static void
380 380 i_cpr_clear_tmp(sutlb_t *listp, int max, tlb_rw_t wrfunc)
381 381 {
382 382 sutlb_t clr, *tail;
383 383
384 384 bzero(&clr, sizeof (clr));
385 385 for (tail = listp + max; listp < tail && listp->va_tag; listp++) {
386 386 if (listp->tmp)
387 387 (*wrfunc)((uint_t)listp->index, &clr.tte, &clr.va_tag);
388 388 }
389 389 }
390 390
391 391
392 392 /* ARGSUSED */
393 393 static void
394 394 i_cpr_clear_entries(uint64_t arg1, uint64_t arg2)
395 395 {
396 396 extern void demap_all(void);
397 397 cti_t cti;
398 398
399 399 i_cpr_clear_tmp(m_info.dtte, CPR_MAX_TLB, dtlb_wr_entry);
400 400 i_cpr_clear_tmp(m_info.itte, CPR_MAX_TLB, itlb_wr_entry);
401 401
402 402 /*
403 403 * for newer cpus that implement DEMAP_ALL_TYPE, demap_all is
404 404 * a second label for vtag_flushall. the call is made using
405 405 * vtag_flushall() instead of demap_all() due to runtime and
406 406 * krtld results with both older and newer cpu modules.
407 407 */
408 408 if (&demap_all != 0) {
409 409 vtag_flushall();
410 410 return;
411 411 }
412 412
413 413 /*
414 414 * for older V9 cpus, scan tlbs and clear stale entries
415 415 */
416 416 bzero(&cti, sizeof (cti));
417 417 cti.filter = i_cpr_ufw;
418 418
419 419 cti.index = cpunodes[CPU->cpu_id].dtlb_size - 1;
420 420 cti.reader = dtlb_rd_entry;
421 421 cti.writer = dtlb_wr_entry;
422 422 i_cpr_scan_tlb(&cti);
423 423
424 424 cti.index = cpunodes[CPU->cpu_id].itlb_size - 1;
425 425 cti.reader = itlb_rd_entry;
426 426 cti.writer = itlb_wr_entry;
427 427 i_cpr_scan_tlb(&cti);
428 428 }
429 429
430 430
431 431 /*
432 432 * craft tlb info for tmp use during resume; this data gets used by
433 433 * cprboot to install tlb entries. we also mark each struct as tmp
434 434 * so those tlb entries will get flushed after switching to the kernel
435 435 * trap table. no data needs to be recorded for vaddr when it falls
436 436 * within the nucleus since we've already recorded nucleus ttes and
437 437 * a 8K tte would conflict with a 4MB tte. eg: the cpr module
438 438 * text/data may have been loaded into the text/data nucleus.
439 439 */
440 440 static void
441 441 i_cpr_make_tte(cti_t *ctip, void *vaddr, caddr_t nbase)
442 442 {
443 443 pfn_t ppn;
444 444 uint_t rw;
445 445
446 446 if (WITHIN_NUCLEUS((caddr_t)vaddr, nbase))
447 447 return;
448 448
449 449 while ((1 << ctip->index) & ctip->skip)
450 450 ctip->index--;
451 451 ASSERT(ctip->index > 0);
452 452 ASSERT(ctip->dst < ctip->tail);
453 453
454 454 /*
455 455 * without any global service available to lookup
456 456 * a tte by vaddr, we craft our own here:
457 457 */
458 458 ppn = va_to_pfn(vaddr);
459 459 rw = (nbase == datava) ? TTE_HWWR_INT : 0;
460 460 ctip->dst->tte.tte_inthi = TTE_VALID_INT | TTE_PFN_INTHI(ppn);
461 461 ctip->dst->tte.tte_intlo = TTE_PFN_INTLO(ppn) | TTE_LCK_INT |
462 462 TTE_CP_INT | TTE_PRIV_INT | rw;
463 463 ctip->dst->va_tag = ((uintptr_t)vaddr & MMU_PAGEMASK);
464 464 ctip->dst->index = ctip->index--;
465 465 ctip->dst->tmp = 1;
466 466 ctip->dst++;
467 467 }
468 468
469 469
470 470 static void
471 471 i_cpr_xcall(xcfunc_t func)
472 472 {
473 473 uint_t pil, reset_pil;
474 474
475 475 pil = getpil();
476 476 if (pil < XCALL_PIL)
477 477 reset_pil = 0;
478 478 else {
479 479 reset_pil = 1;
480 480 setpil(XCALL_PIL - 1);
481 481 }
482 482 xc_some(cpu_ready_set, func, 0, 0);
483 483 if (reset_pil)
484 484 setpil(pil);
485 485 }
486 486
487 487
488 488 /*
489 489 * restart paused slave cpus
490 490 */
491 491 void
492 492 i_cpr_machdep_setup(void)
493 493 {
494 494 if (ncpus > 1) {
495 495 CPR_DEBUG(CPR_DEBUG1, "MP restarted...\n");
496 496 mutex_enter(&cpu_lock);
497 497 start_cpus();
498 498 mutex_exit(&cpu_lock);
499 499 }
500 500 }
501 501
502 502
503 503 /*
504 504 * Stop all interrupt activities in the system
505 505 */
506 506 void
507 507 i_cpr_stop_intr(void)
508 508 {
509 509 (void) spl7();
510 510 }
511 511
512 512 /*
513 513 * Set machine up to take interrupts
514 514 */
515 515 void
516 516 i_cpr_enable_intr(void)
517 517 {
518 518 (void) spl0();
519 519 }
520 520
521 521
522 522 /*
523 523 * record cpu nodes and ids
524 524 */
525 525 static void
526 526 i_cpr_save_cpu_info(void)
527 527 {
528 528 struct sun4u_cpu_info *scip;
529 529 cpu_t *cp;
530 530
531 531 scip = m_info.sci;
532 532 cp = CPU;
533 533 do {
534 534 ASSERT(scip < &m_info.sci[NCPU]);
535 535 scip->cpu_id = cp->cpu_id;
536 536 scip->node = cpunodes[cp->cpu_id].nodeid;
537 537 scip++;
538 538 } while ((cp = cp->cpu_next) != CPU);
539 539 }
540 540
541 541
542 542 /*
543 543 * Write necessary machine dependent information to cpr state file,
544 544 * eg. sun4u mmu ctx secondary for the current running process (cpr) ...
545 545 */
546 546 int
547 547 i_cpr_write_machdep(vnode_t *vp)
548 548 {
549 549 extern uint_t getpstate(), getwstate();
550 550 extern uint_t i_cpr_tstack_size;
551 551 const char ustr[] = ": unix-tte 2drop false ;";
552 552 uintptr_t tinfo;
553 553 label_t *ltp;
554 554 cmd_t cmach;
555 555 char *fmt;
556 556 int rc;
557 557
558 558 /*
559 559 * ustr[] is used as temporary forth words during
560 560 * slave startup sequence, see sfmmu_mp_startup()
561 561 */
562 562
563 563 cmach.md_magic = (uint_t)CPR_MACHDEP_MAGIC;
564 564 cmach.md_size = sizeof (m_info) + sizeof (ustr);
565 565
566 566 if (rc = cpr_write(vp, (caddr_t)&cmach, sizeof (cmach))) {
567 567 cpr_err(CE_WARN, "Failed to write descriptor.");
568 568 return (rc);
569 569 }
570 570
571 571 /*
572 572 * m_info is now cleared in i_cpr_dump_setup()
573 573 */
574 574 m_info.ksb = (uint32_t)STACK_BIAS;
575 575 m_info.kpstate = (uint16_t)getpstate();
576 576 m_info.kwstate = (uint16_t)getwstate();
577 577 CPR_DEBUG(CPR_DEBUG1, "stack bias 0x%x, pstate 0x%x, wstate 0x%x\n",
578 578 m_info.ksb, m_info.kpstate, m_info.kwstate);
579 579
580 580 ltp = &ttolwp(curthread)->lwp_qsav;
581 581 m_info.qsav_pc = (cpr_ext)ltp->val[0];
582 582 m_info.qsav_sp = (cpr_ext)ltp->val[1];
583 583
584 584 /*
585 585 * Set secondary context to INVALID_CONTEXT to force the HAT
586 586 * to re-setup the MMU registers and locked TTEs it needs for
587 587 * TLB miss handling.
588 588 */
589 589 m_info.mmu_ctx_sec = INVALID_CONTEXT;
590 590 m_info.mmu_ctx_pri = KCONTEXT;
591 591
592 592 tinfo = (uintptr_t)curthread;
593 593 m_info.thrp = (cpr_ptr)tinfo;
594 594
595 595 tinfo = (uintptr_t)i_cpr_resume_setup;
596 596 m_info.func = (cpr_ptr)tinfo;
597 597
598 598 /*
599 599 * i_cpr_data_page is comprised of a 4K stack area and a few
600 600 * trailing data symbols; the page is shared by the prom and
601 601 * kernel during resume. the stack size is recorded here
602 602 * and used by cprboot to set %sp
603 603 */
604 604 tinfo = (uintptr_t)&i_cpr_data_page;
605 605 m_info.tmp_stack = (cpr_ptr)tinfo;
606 606 m_info.tmp_stacksize = i_cpr_tstack_size;
607 607
608 608 m_info.test_mode = cpr_test_mode;
609 609
610 610 i_cpr_save_cpu_info();
611 611
612 612 if (rc = cpr_write(vp, (caddr_t)&m_info, sizeof (m_info))) {
613 613 cpr_err(CE_WARN, "Failed to write machdep info.");
614 614 return (rc);
615 615 }
616 616
617 617 fmt = "error writing %s forth info";
618 618 if (rc = cpr_write(vp, (caddr_t)ustr, sizeof (ustr)))
619 619 cpr_err(CE_WARN, fmt, "unix-tte");
620 620
621 621 return (rc);
622 622 }
623 623
624 624
625 625 /*
626 626 * Save miscellaneous information which needs to be written to the
627 627 * state file. This information is required to re-initialize
628 628 * kernel/prom handshaking.
629 629 */
630 630 void
631 631 i_cpr_save_machdep_info(void)
632 632 {
633 633 CPR_DEBUG(CPR_DEBUG5, "jumpback size = 0x%lx\n",
634 634 (uintptr_t)&i_cpr_end_jumpback -
635 635 (uintptr_t)i_cpr_resume_setup);
636 636
637 637 /*
638 638 * Verify the jumpback code all falls in one page.
639 639 */
640 640 if (((uintptr_t)&i_cpr_end_jumpback & MMU_PAGEMASK) !=
641 641 ((uintptr_t)i_cpr_resume_setup & MMU_PAGEMASK))
642 642 cpr_err(CE_PANIC, "jumpback code exceeds one page.");
643 643 }
644 644
645 645
646 646 /*
647 647 * cpu0 should contain bootcpu info
648 648 */
649 649 cpu_t *
650 650 i_cpr_bootcpu(void)
651 651 {
652 652 return (&cpu0);
653 653 }
654 654
655 655 processorid_t
656 656 i_cpr_bootcpuid(void)
657 657 {
658 658 return (0);
659 659 }
660 660
661 661 /*
662 662 * Return the virtual address of the mapping area
663 663 */
664 664 caddr_t
665 665 i_cpr_map_setup(void)
666 666 {
667 667 /*
668 668 * Allocate a virtual memory range spanned by an hmeblk.
669 669 * This would be 8 hments or 64k bytes. Starting VA
670 670 * must be 64k (8-page) aligned.
671 671 */
672 672 cpr_vaddr = vmem_xalloc(heap_arena,
673 673 mmu_ptob(NHMENTS), mmu_ptob(NHMENTS),
674 674 0, 0, NULL, NULL, VM_NOSLEEP);
675 675 return (cpr_vaddr);
676 676 }
677 677
678 678 /*
679 679 * create tmp locked tlb entries for a group of phys pages;
680 680 *
681 681 * i_cpr_mapin/i_cpr_mapout should always be called in pairs,
682 682 * otherwise would fill up a tlb with locked entries
683 683 */
684 684 void
685 685 i_cpr_mapin(caddr_t vaddr, uint_t pages, pfn_t ppn)
686 686 {
687 687 tte_t tte;
688 688 extern pfn_t curthreadpfn;
689 689 extern int curthreadremapped;
690 690
691 691 curthreadremapped = (ppn <= curthreadpfn && curthreadpfn < ppn + pages);
692 692
693 693 for (; pages--; ppn++, vaddr += MMU_PAGESIZE) {
694 694 tte.tte_inthi = TTE_VALID_INT | TTE_PFN_INTHI(ppn);
695 695 tte.tte_intlo = TTE_PFN_INTLO(ppn) | TTE_LCK_INT |
696 696 TTE_CP_INT | TTE_PRIV_INT | TTE_HWWR_INT;
697 697 sfmmu_dtlb_ld_kva(vaddr, &tte);
698 698 }
699 699 }
700 700
701 701 void
702 702 i_cpr_mapout(caddr_t vaddr, uint_t pages)
703 703 {
704 704 extern int curthreadremapped;
705 705
706 706 if (curthreadremapped && vaddr <= (caddr_t)curthread &&
707 707 (caddr_t)curthread < vaddr + pages * MMU_PAGESIZE)
708 708 curthreadremapped = 0;
709 709
710 710 for (; pages--; vaddr += MMU_PAGESIZE)
711 711 vtag_flushpage(vaddr, (uint64_t)ksfmmup);
712 712 }
713 713
714 714 /*
715 715 * We're done using the mapping area; release virtual space
716 716 */
717 717 void
718 718 i_cpr_map_destroy(void)
719 719 {
720 720 vmem_free(heap_arena, cpr_vaddr, mmu_ptob(NHMENTS));
721 721 cpr_vaddr = NULL;
722 722 }
723 723
724 724 /* ARGSUSED */
725 725 void
726 726 i_cpr_handle_xc(int flag)
727 727 {
728 728 }
729 729
730 730
731 731 /*
732 732 * This function takes care of pages which are not in kas or need to be
733 733 * taken care of in a special way. For example, panicbuf pages are not
734 734 * in kas and their pages are allocated via prom_retain().
735 735 */
736 736 pgcnt_t
737 737 i_cpr_count_special_kpages(int mapflag, bitfunc_t bitfunc)
738 738 {
739 739 struct cpr_map_info *pri, *tail;
740 740 pgcnt_t pages, total = 0;
741 741 pfn_t pfn;
742 742
743 743 /*
744 744 * Save information about prom retained panicbuf pages
745 745 */
746 746 if (bitfunc == cpr_setbit) {
747 747 pri = &cpr_prom_retain[CPR_PANICBUF];
748 748 pri->virt = (cpr_ptr)panicbuf;
749 749 pri->phys = va_to_pa(panicbuf);
750 750 pri->size = sizeof (panicbuf);
751 751 }
752 752
753 753 /*
754 754 * Go through the prom_retain array to tag those pages.
755 755 */
756 756 tail = &cpr_prom_retain[CPR_PROM_RETAIN_CNT];
757 757 for (pri = cpr_prom_retain; pri < tail; pri++) {
758 758 pages = mmu_btopr(pri->size);
759 759 for (pfn = ADDR_TO_PN(pri->phys); pages--; pfn++) {
760 760 if (pf_is_memory(pfn)) {
761 761 if (bitfunc == cpr_setbit) {
762 762 if ((*bitfunc)(pfn, mapflag) == 0)
763 763 total++;
764 764 } else
765 765 total++;
766 766 }
767 767 }
768 768 }
769 769
770 770 return (total);
771 771 }
772 772
773 773
774 774 /*
775 775 * Free up memory-related resources here. We start by freeing buffers
776 776 * allocated during suspend initialization. Also, free up the mapping
777 777 * resources allocated in cpr_init().
778 778 */
779 779 void
780 780 i_cpr_free_memory_resources(void)
781 781 {
782 782 (void) i_cpr_prom_pages(CPR_PROM_FREE);
783 783 i_cpr_map_destroy();
784 784 i_cpr_storage_free();
785 785 }
786 786
787 787
788 788 /*
789 789 * Derived from cpr_write_statefile().
790 790 * Save the sensitive pages to the storage area and do bookkeeping
791 791 * using the sensitive descriptors. Each descriptor will contain no more
792 792 * than CPR_MAXCONTIG amount of contiguous pages to match the max amount
793 793 * of pages that statefile gets written to disk at each write.
794 794 * XXX The CPR_MAXCONTIG can be changed to the size of the compression
795 795 * scratch area.
796 796 */
797 797 static int
798 798 i_cpr_save_to_storage(void)
799 799 {
800 800 sensitive_size_saved = 0;
801 801 sensitive_pages_saved = 0;
802 802 sensitive_write_ptr = i_cpr_storage_data_base;
803 803 return (cpr_contig_pages(NULL, SAVE_TO_STORAGE));
804 804 }
805 805
806 806
807 807 /*
808 808 * This routine allocates space to save the sensitive kernel pages,
809 809 * i.e. kernel data nucleus, kvalloc and kvseg segments.
810 810 * It's assumed that those segments are the only areas that can be
811 811 * contaminated by memory allocations during statefile dumping.
812 812 * The space allocated here contains:
813 813 * A list of descriptors describing the saved sensitive pages.
814 814 * The storage area for saving the compressed sensitive kernel pages.
815 815 * Since storage pages are allocated from segkmem, they need to be
816 816 * excluded when saving.
817 817 */
818 818 int
819 819 i_cpr_save_sensitive_kpages(void)
820 820 {
821 821 static const char pages_fmt[] = "\n%s %s allocs\n"
822 822 " spages %ld, vpages %ld, diff %ld\n";
823 823 int retry_cnt;
824 824 int error = 0;
825 825 pgcnt_t pages, spages, vpages;
826 826 caddr_t addr;
827 827 char *str;
828 828
829 829 /*
830 830 * Tag sensitive kpages. Allocate space for storage descriptors
831 831 * and storage data area based on the resulting bitmaps.
832 832 * Note: The storage space will be part of the sensitive
833 833 * segment, so we need to tag kpages here before the storage
834 834 * is actually allocated just so their space won't be accounted
835 835 * for. They will not be part of the statefile although those
836 836 * pages will be claimed by cprboot.
837 837 */
838 838 cpr_clear_bitmaps();
839 839
840 840 spages = i_cpr_count_sensitive_kpages(REGULAR_BITMAP, cpr_setbit);
841 841 vpages = cpr_count_volatile_pages(REGULAR_BITMAP, cpr_clrbit);
842 842 pages = spages - vpages;
843 843
844 844 str = "i_cpr_save_sensitive_kpages:";
845 845 CPR_DEBUG(CPR_DEBUG7, pages_fmt, "before", str, spages, vpages, pages);
846 846
847 847 /*
848 848 * Allocate space to save the clean sensitive kpages
849 849 */
850 850 for (retry_cnt = 0; retry_cnt < MAX_STORAGE_ALLOC_RETRY; retry_cnt++) {
851 851 /*
852 852 * Alloc on first pass or realloc if we are retrying because
853 853 * of insufficient storage for sensitive pages
854 854 */
855 855 if (retry_cnt == 0 || error == ENOMEM) {
856 856 if (i_cpr_storage_data_base) {
857 857 kmem_free(i_cpr_storage_data_base,
858 858 mmu_ptob(i_cpr_storage_data_sz));
859 859 i_cpr_storage_data_base = NULL;
860 860 i_cpr_storage_data_sz = 0;
861 861 }
862 862 addr = i_cpr_storage_data_alloc(pages,
863 863 &i_cpr_storage_data_sz, retry_cnt);
864 864 if (addr == NULL) {
865 865 CPR_DEBUG(CPR_DEBUG7,
866 866 "\n%s can't allocate data storage space!\n",
867 867 str);
868 868 return (ENOMEM);
869 869 }
870 870 i_cpr_storage_data_base = addr;
871 871 i_cpr_storage_data_end =
872 872 addr + mmu_ptob(i_cpr_storage_data_sz);
873 873 }
874 874
875 875 /*
876 876 * Allocate on first pass, only realloc if retry is because of
877 877 * insufficient descriptors, but reset contents on each pass
878 878 * (desc_alloc resets contents as well)
879 879 */
880 880 if (retry_cnt == 0 || error == -1) {
881 881 error = i_cpr_storage_desc_alloc(
882 882 &i_cpr_storage_desc_base, &i_cpr_storage_desc_pgcnt,
883 883 &i_cpr_storage_desc_end, retry_cnt);
884 884 if (error != 0)
885 885 return (error);
886 886 } else {
887 887 i_cpr_storage_desc_init(i_cpr_storage_desc_base,
888 888 i_cpr_storage_desc_pgcnt, i_cpr_storage_desc_end);
889 889 }
890 890
891 891 /*
892 892 * We are ready to save the sensitive kpages to storage.
893 893 * We cannot trust what's tagged in the bitmaps anymore
894 894 * after storage allocations. Clear up the bitmaps and
895 895 * retag the sensitive kpages again. The storage pages
896 896 * should be untagged.
897 897 */
898 898 cpr_clear_bitmaps();
899 899
900 900 spages =
901 901 i_cpr_count_sensitive_kpages(REGULAR_BITMAP, cpr_setbit);
902 902 vpages = cpr_count_volatile_pages(REGULAR_BITMAP, cpr_clrbit);
903 903
904 904 CPR_DEBUG(CPR_DEBUG7, pages_fmt, "after ", str,
905 905 spages, vpages, spages - vpages);
906 906
907 907 /*
908 908 * Returns 0 on success, -1 if too few descriptors, and
909 909 * ENOMEM if not enough space to save sensitive pages
910 910 */
911 911 CPR_DEBUG(CPR_DEBUG1, "compressing pages to storage...\n");
912 912 error = i_cpr_save_to_storage();
913 913 if (error == 0) {
914 914 /* Saving to storage succeeded */
915 915 CPR_DEBUG(CPR_DEBUG1, "compressed %d pages\n",
916 916 sensitive_pages_saved);
917 917 break;
918 918 } else if (error == -1)
919 919 CPR_DEBUG(CPR_DEBUG1, "%s too few descriptors\n", str);
920 920 }
921 921 if (error == -1)
922 922 error = ENOMEM;
923 923 return (error);
924 924 }
925 925
926 926
927 927 /*
928 928 * Estimate how much memory we will need to save
929 929 * the sensitive pages with compression.
930 930 */
931 931 static caddr_t
932 932 i_cpr_storage_data_alloc(pgcnt_t pages, pgcnt_t *alloc_pages, int retry_cnt)
933 933 {
934 934 pgcnt_t alloc_pcnt, last_pcnt;
935 935 caddr_t addr;
936 936 char *str;
937 937
938 938 str = "i_cpr_storage_data_alloc:";
939 939 if (retry_cnt == 0) {
940 940 /*
941 941 * common compression ratio is about 3:1
942 942 * initial storage allocation is estimated at 40%
943 943 * to cover the majority of cases
944 944 */
945 945 alloc_pcnt = INITIAL_ALLOC_PCNT;
946 946 *alloc_pages = (pages * alloc_pcnt) / INTEGRAL;
947 947 CPR_DEBUG(CPR_DEBUG7, "%s sensitive pages: %ld\n", str, pages);
948 948 CPR_DEBUG(CPR_DEBUG7,
949 949 "%s initial est pages: %ld, alloc %ld%%\n",
950 950 str, *alloc_pages, alloc_pcnt);
951 951 } else {
952 952 /*
953 953 * calculate the prior compression percentage (x100)
954 954 * from the last attempt to save sensitive pages
955 955 */
956 956 ASSERT(sensitive_pages_saved != 0);
957 957 last_pcnt = (mmu_btopr(sensitive_size_saved) * INTEGRAL) /
958 958 sensitive_pages_saved;
959 959 CPR_DEBUG(CPR_DEBUG7, "%s last ratio %ld%%\n", str, last_pcnt);
960 960
961 961 /*
962 962 * new estimated storage size is based on
963 963 * the larger ratio + 5% for each retry:
964 964 * pages * (last + [5%, 10%])
965 965 */
966 966 alloc_pcnt = MAX(last_pcnt, INITIAL_ALLOC_PCNT) +
967 967 (retry_cnt * 5);
968 968 *alloc_pages = (pages * alloc_pcnt) / INTEGRAL;
969 969 CPR_DEBUG(CPR_DEBUG7, "%s Retry est pages: %ld, alloc %ld%%\n",
970 970 str, *alloc_pages, alloc_pcnt);
971 971 }
972 972
973 973 addr = kmem_alloc(mmu_ptob(*alloc_pages), KM_NOSLEEP);
974 974 CPR_DEBUG(CPR_DEBUG7, "%s alloc %ld pages\n", str, *alloc_pages);
975 975 return (addr);
976 976 }
977 977
978 978
979 979 void
980 980 i_cpr_storage_free(void)
981 981 {
982 982 /* Free descriptors */
983 983 if (i_cpr_storage_desc_base) {
984 984 kmem_free(i_cpr_storage_desc_base,
985 985 mmu_ptob(i_cpr_storage_desc_pgcnt));
986 986 i_cpr_storage_desc_base = NULL;
987 987 i_cpr_storage_desc_pgcnt = 0;
988 988 }
989 989
990 990
991 991 /* Data storage */
992 992 if (i_cpr_storage_data_base) {
993 993 kmem_free(i_cpr_storage_data_base,
994 994 mmu_ptob(i_cpr_storage_data_sz));
995 995 i_cpr_storage_data_base = NULL;
996 996 i_cpr_storage_data_sz = 0;
997 997 }
998 998 }
999 999
1000 1000
1001 1001 /*
1002 1002 * This routine is derived from cpr_compress_and_write().
1003 1003 * 1. Do bookkeeping in the descriptor for the contiguous sensitive chunk.
1004 1004 * 2. Compress and save the clean sensitive pages into the storage area.
1005 1005 */
1006 1006 int
1007 1007 i_cpr_compress_and_save(int chunks, pfn_t spfn, pgcnt_t pages)
1008 1008 {
1009 1009 extern char *cpr_compress_pages(cpd_t *, pgcnt_t, int);
1010 1010 extern caddr_t i_cpr_storage_data_end;
1011 1011 uint_t remaining, datalen;
1012 1012 uint32_t test_usum;
1013 1013 char *datap;
1014 1014 csd_t *descp;
1015 1015 cpd_t cpd;
1016 1016 int error;
1017 1017
1018 1018 /*
1019 1019 * Fill next empty storage descriptor
1020 1020 */
1021 1021 descp = i_cpr_storage_desc_base + chunks - 1;
1022 1022 if (descp >= i_cpr_storage_desc_end) {
1023 1023 CPR_DEBUG(CPR_DEBUG1, "ran out of descriptors, base 0x%p, "
1024 1024 "chunks %d, end 0x%p, descp 0x%p\n",
1025 1025 (void *)i_cpr_storage_desc_base, chunks,
1026 1026 (void *)i_cpr_storage_desc_end, (void *)descp);
1027 1027 return (-1);
1028 1028 }
1029 1029 ASSERT(descp->csd_dirty_spfn == (uint_t)-1);
1030 1030 i_cpr_storage_desc_last_used = descp;
1031 1031
1032 1032 descp->csd_dirty_spfn = spfn;
1033 1033 descp->csd_dirty_npages = pages;
1034 1034
1035 1035 i_cpr_mapin(CPR->c_mapping_area, pages, spfn);
1036 1036
1037 1037 /*
1038 1038 * try compressing pages and copy cpd fields
1039 1039 * pfn is copied for debug use
1040 1040 */
1041 1041 cpd.cpd_pfn = spfn;
1042 1042 datap = cpr_compress_pages(&cpd, pages, C_COMPRESSING);
1043 1043 datalen = cpd.cpd_length;
1044 1044 descp->csd_clean_compressed = (cpd.cpd_flag & CPD_COMPRESS);
1045 1045 #ifdef DEBUG
1046 1046 descp->csd_usum = cpd.cpd_usum;
1047 1047 descp->csd_csum = cpd.cpd_csum;
1048 1048 #endif
1049 1049
1050 1050 error = 0;
1051 1051
1052 1052 /*
1053 1053 * Save the raw or compressed data to the storage area pointed to by
1054 1054 * sensitive_write_ptr. Make sure the storage space is big enough to
1055 1055 * hold the result. Otherwise roll back to increase the storage space.
1056 1056 */
1057 1057 descp->csd_clean_sva = (cpr_ptr)sensitive_write_ptr;
1058 1058 descp->csd_clean_sz = datalen;
1059 1059 if ((sensitive_write_ptr + datalen) < i_cpr_storage_data_end) {
1060 1060 extern void cprbcopy(void *, void *, size_t);
1061 1061
1062 1062 cprbcopy(datap, sensitive_write_ptr, datalen);
1063 1063 sensitive_size_saved += datalen;
1064 1064 sensitive_pages_saved += descp->csd_dirty_npages;
1065 1065 sensitive_write_ptr += datalen;
1066 1066 } else {
1067 1067 remaining = (i_cpr_storage_data_end - sensitive_write_ptr);
1068 1068 CPR_DEBUG(CPR_DEBUG1, "i_cpr_compress_and_save: The storage "
1069 1069 "space is too small!\ngot %d, want %d\n\n",
1070 1070 remaining, (remaining + datalen));
1071 1071 #ifdef DEBUG
1072 1072 /*
1073 1073 * Check to see if the content of the sensitive pages that we
1074 1074 * just copied have changed during this small time window.
1075 1075 */
1076 1076 test_usum = checksum32(CPR->c_mapping_area, mmu_ptob(pages));
1077 1077 descp->csd_usum = cpd.cpd_usum;
1078 1078 if (test_usum != descp->csd_usum) {
1079 1079 CPR_DEBUG(CPR_DEBUG1, "\nWARNING: "
1080 1080 "i_cpr_compress_and_save: "
1081 1081 "Data in the range of pfn 0x%lx to pfn "
1082 1082 "0x%lx has changed after they are saved "
1083 1083 "into storage.", spfn, (spfn + pages - 1));
1084 1084 }
1085 1085 #endif
1086 1086 error = ENOMEM;
1087 1087 }
1088 1088
1089 1089 i_cpr_mapout(CPR->c_mapping_area, pages);
1090 1090 return (error);
1091 1091 }
1092 1092
1093 1093
1094 1094 /*
1095 1095 * This routine is derived from cpr_count_kpages().
1096 1096 * It goes through kernel data nucleus and segkmem segments to select
1097 1097 * pages in use and mark them in the corresponding bitmap.
1098 1098 */
1099 1099 pgcnt_t
1100 1100 i_cpr_count_sensitive_kpages(int mapflag, bitfunc_t bitfunc)
1101 1101 {
1102 1102 pgcnt_t kdata_cnt = 0, segkmem_cnt = 0;
1103 1103 extern caddr_t e_moddata;
1104 1104 extern struct seg kvalloc;
1105 1105 extern struct seg kmem64;
1106 1106 size_t size;
1107 1107
1108 1108 /*
1109 1109 * Kernel data nucleus pages
1110 1110 */
1111 1111 size = e_moddata - s_data;
1112 1112 kdata_cnt += cpr_count_pages(s_data, size,
1113 1113 mapflag, bitfunc, DBG_SHOWRANGE);
1114 1114
1115 1115 /*
1116 1116 * kvseg and kvalloc pages
1117 1117 */
1118 1118 segkmem_cnt += cpr_scan_kvseg(mapflag, bitfunc, &kvseg);
1119 1119 segkmem_cnt += cpr_count_pages(kvalloc.s_base, kvalloc.s_size,
1120 1120 mapflag, bitfunc, DBG_SHOWRANGE);
1121 1121
1122 1122 /* segment to support kernel memory usage above 32-bit space (4GB) */
1123 1123 if (kmem64.s_base)
1124 1124 segkmem_cnt += cpr_count_pages(kmem64.s_base, kmem64.s_size,
1125 1125 mapflag, bitfunc, DBG_SHOWRANGE);
1126 1126
1127 1127 CPR_DEBUG(CPR_DEBUG7, "\ni_cpr_count_sensitive_kpages:\n"
1128 1128 "\tkdata_cnt %ld + segkmem_cnt %ld = %ld pages\n",
1129 1129 kdata_cnt, segkmem_cnt, kdata_cnt + segkmem_cnt);
1130 1130
1131 1131 return (kdata_cnt + segkmem_cnt);
1132 1132 }
1133 1133
1134 1134
1135 1135 pgcnt_t
1136 1136 i_cpr_count_storage_pages(int mapflag, bitfunc_t bitfunc)
1137 1137 {
1138 1138 pgcnt_t count = 0;
1139 1139
1140 1140 if (i_cpr_storage_desc_base) {
1141 1141 count += cpr_count_pages((caddr_t)i_cpr_storage_desc_base,
1142 1142 (size_t)mmu_ptob(i_cpr_storage_desc_pgcnt),
1143 1143 mapflag, bitfunc, DBG_SHOWRANGE);
1144 1144 }
1145 1145 if (i_cpr_storage_data_base) {
1146 1146 count += cpr_count_pages(i_cpr_storage_data_base,
1147 1147 (size_t)mmu_ptob(i_cpr_storage_data_sz),
1148 1148 mapflag, bitfunc, DBG_SHOWRANGE);
1149 1149 }
1150 1150 return (count);
1151 1151 }
1152 1152
1153 1153
1154 1154 /*
1155 1155 * Derived from cpr_write_statefile().
1156 1156 * Allocate (or reallocate after exhausting the supply) descriptors for each
1157 1157 * chunk of contiguous sensitive kpages.
1158 1158 */
1159 1159 static int
1160 1160 i_cpr_storage_desc_alloc(csd_t **basepp, pgcnt_t *pgsp, csd_t **endpp,
1161 1161 int retry)
1162 1162 {
1163 1163 pgcnt_t npages;
1164 1164 int chunks;
1165 1165 csd_t *descp, *end;
1166 1166 size_t len;
1167 1167 char *str = "i_cpr_storage_desc_alloc:";
1168 1168
1169 1169 /*
1170 1170 * On initial allocation, add some extra to cover overhead caused
1171 1171 * by the allocation for the storage area later.
1172 1172 */
1173 1173 if (retry == 0) {
1174 1174 chunks = cpr_contig_pages(NULL, STORAGE_DESC_ALLOC) +
1175 1175 EXTRA_DESCS;
1176 1176 npages = mmu_btopr(sizeof (**basepp) * (pgcnt_t)chunks);
1177 1177 CPR_DEBUG(CPR_DEBUG7, "%s chunks %d, ", str, chunks);
1178 1178 } else {
1179 1179 CPR_DEBUG(CPR_DEBUG7, "%s retry %d: ", str, retry);
1180 1180 npages = *pgsp + 1;
1181 1181 }
1182 1182 /* Free old descriptors, if any */
1183 1183 if (*basepp)
1184 1184 kmem_free((caddr_t)*basepp, mmu_ptob(*pgsp));
1185 1185
1186 1186 descp = *basepp = kmem_alloc(mmu_ptob(npages), KM_NOSLEEP);
1187 1187 if (descp == NULL) {
1188 1188 CPR_DEBUG(CPR_DEBUG7, "%s no space for descriptors!\n", str);
1189 1189 return (ENOMEM);
1190 1190 }
1191 1191
1192 1192 *pgsp = npages;
1193 1193 len = mmu_ptob(npages);
1194 1194 end = *endpp = descp + (len / (sizeof (**basepp)));
1195 1195 CPR_DEBUG(CPR_DEBUG7, "npages 0x%lx, len 0x%lx, items 0x%lx\n\t*basepp "
1196 1196 "%p, *endpp %p\n", npages, len, (len / (sizeof (**basepp))),
1197 1197 (void *)*basepp, (void *)*endpp);
1198 1198 i_cpr_storage_desc_init(descp, npages, end);
1199 1199 return (0);
1200 1200 }
1201 1201
1202 1202 static void
1203 1203 i_cpr_storage_desc_init(csd_t *descp, pgcnt_t npages, csd_t *end)
1204 1204 {
1205 1205 size_t len = mmu_ptob(npages);
1206 1206
1207 1207 /* Initialize the descriptors to something impossible. */
1208 1208 bzero(descp, len);
1209 1209 #ifdef DEBUG
1210 1210 /*
1211 1211 * This condition is tested by an ASSERT
1212 1212 */
1213 1213 for (; descp < end; descp++)
1214 1214 descp->csd_dirty_spfn = (uint_t)-1;
1215 1215 #endif
1216 1216 }
1217 1217
1218 1218 int
1219 1219 i_cpr_dump_sensitive_kpages(vnode_t *vp)
1220 1220 {
1221 1221 int error = 0;
1222 1222 uint_t spin_cnt = 0;
1223 1223 csd_t *descp;
1224 1224
1225 1225 /*
1226 1226 * These following two variables need to be reinitialized
1227 1227 * for each cpr cycle.
1228 1228 */
1229 1229 i_cpr_sensitive_bytes_dumped = 0;
1230 1230 i_cpr_sensitive_pgs_dumped = 0;
1231 1231
1232 1232 if (i_cpr_storage_desc_base) {
1233 1233 for (descp = i_cpr_storage_desc_base;
1234 1234 descp <= i_cpr_storage_desc_last_used; descp++) {
1235 1235 if (error = cpr_dump_sensitive(vp, descp))
1236 1236 return (error);
1237 1237 spin_cnt++;
1238 1238 if ((spin_cnt & 0x5F) == 1)
1239 1239 cpr_spinning_bar();
1240 1240 }
1241 1241 prom_printf(" \b");
1242 1242 }
1243 1243
1244 1244 CPR_DEBUG(CPR_DEBUG7, "\ni_cpr_dump_sensitive_kpages: dumped %ld\n",
1245 1245 i_cpr_sensitive_pgs_dumped);
1246 1246 return (0);
1247 1247 }
1248 1248
1249 1249
1250 1250 /*
1251 1251 * 1. Fill the cpr page descriptor with the info of the dirty pages
1252 1252 * and
1253 1253 * write the descriptor out. It will be used at resume.
1254 1254 * 2. Write the clean data in stead of the dirty data out.
1255 1255 * Note: to save space, the clean data is already compressed.
1256 1256 */
1257 1257 static int
1258 1258 cpr_dump_sensitive(vnode_t *vp, csd_t *descp)
1259 1259 {
1260 1260 int error = 0;
1261 1261 caddr_t datap;
1262 1262 cpd_t cpd; /* cpr page descriptor */
1263 1263 pfn_t dirty_spfn;
1264 1264 pgcnt_t dirty_npages;
1265 1265 size_t clean_sz;
1266 1266 caddr_t clean_sva;
1267 1267 int clean_compressed;
1268 1268 extern uchar_t cpr_pagecopy[];
1269 1269
1270 1270 dirty_spfn = descp->csd_dirty_spfn;
1271 1271 dirty_npages = descp->csd_dirty_npages;
1272 1272 clean_sva = (caddr_t)descp->csd_clean_sva;
1273 1273 clean_sz = descp->csd_clean_sz;
1274 1274 clean_compressed = descp->csd_clean_compressed;
1275 1275
1276 1276 /* Fill cpr page descriptor. */
1277 1277 cpd.cpd_magic = (uint_t)CPR_PAGE_MAGIC;
1278 1278 cpd.cpd_pfn = dirty_spfn;
1279 1279 cpd.cpd_flag = 0; /* must init to zero */
1280 1280 cpd.cpd_pages = dirty_npages;
1281 1281
1282 1282 #ifdef DEBUG
1283 1283 if ((cpd.cpd_usum = descp->csd_usum) != 0)
1284 1284 cpd.cpd_flag |= CPD_USUM;
1285 1285 if ((cpd.cpd_csum = descp->csd_csum) != 0)
1286 1286 cpd.cpd_flag |= CPD_CSUM;
1287 1287 #endif
1288 1288
1289 1289 STAT->cs_dumped_statefsz += mmu_ptob(dirty_npages);
1290 1290
1291 1291 /*
1292 1292 * The sensitive kpages are usually saved with compression
1293 1293 * unless compression could not reduce the size of the data.
1294 1294 * If user choose not to have the statefile compressed,
1295 1295 * we need to decompress the data back before dumping it to disk.
1296 1296 */
1297 1297 if (CPR->c_flags & C_COMPRESSING) {
1298 1298 cpd.cpd_length = clean_sz;
1299 1299 datap = clean_sva;
1300 1300 if (clean_compressed)
1301 1301 cpd.cpd_flag |= CPD_COMPRESS;
1302 1302 } else {
1303 1303 if (clean_compressed) {
1304 1304 cpd.cpd_length = decompress(clean_sva, cpr_pagecopy,
1305 1305 clean_sz, mmu_ptob(dirty_npages));
1306 1306 datap = (caddr_t)cpr_pagecopy;
1307 1307 ASSERT(cpd.cpd_length == mmu_ptob(dirty_npages));
1308 1308 } else {
1309 1309 cpd.cpd_length = clean_sz;
1310 1310 datap = clean_sva;
1311 1311 }
1312 1312 cpd.cpd_csum = 0;
1313 1313 }
1314 1314
1315 1315 /* Write cpr page descriptor */
1316 1316 error = cpr_write(vp, (caddr_t)&cpd, sizeof (cpd));
1317 1317 if (error) {
1318 1318 CPR_DEBUG(CPR_DEBUG7, "descp: %p\n", (void *)descp);
1319 1319 #ifdef DEBUG
1320 1320 debug_enter("cpr_dump_sensitive: cpr_write() page "
1321 1321 "descriptor failed!\n");
1322 1322 #endif
1323 1323 return (error);
1324 1324 }
1325 1325
1326 1326 i_cpr_sensitive_bytes_dumped += sizeof (cpd_t);
1327 1327
1328 1328 /* Write page data */
1329 1329 error = cpr_write(vp, (caddr_t)datap, cpd.cpd_length);
1330 1330 if (error) {
1331 1331 CPR_DEBUG(CPR_DEBUG7, "error: %x\n", error);
1332 1332 CPR_DEBUG(CPR_DEBUG7, "descp: %p\n", (void *)descp);
1333 1333 CPR_DEBUG(CPR_DEBUG7, "cpr_write(%p, %p , %lx)\n",
1334 1334 (void *)vp, (void *)datap, cpd.cpd_length);
1335 1335 #ifdef DEBUG
1336 1336 debug_enter("cpr_dump_sensitive: cpr_write() data failed!\n");
1337 1337 #endif
1338 1338 return (error);
1339 1339 }
1340 1340
1341 1341 i_cpr_sensitive_bytes_dumped += cpd.cpd_length;
1342 1342 i_cpr_sensitive_pgs_dumped += dirty_npages;
1343 1343
1344 1344 return (error);
1345 1345 }
1346 1346
1347 1347
1348 1348 /*
1349 1349 * Sanity check to make sure that we have dumped right amount
1350 1350 * of pages from different sources to statefile.
1351 1351 */
1352 1352 int
1353 1353 i_cpr_check_pgs_dumped(uint_t pgs_expected, uint_t regular_pgs_dumped)
1354 1354 {
1355 1355 uint_t total_pgs_dumped;
1356 1356
1357 1357 total_pgs_dumped = regular_pgs_dumped + i_cpr_sensitive_pgs_dumped;
1358 1358
1359 1359 CPR_DEBUG(CPR_DEBUG7, "\ncheck_pgs: reg %d + sens %ld = %d, "
1360 1360 "expect %d\n\n", regular_pgs_dumped, i_cpr_sensitive_pgs_dumped,
1361 1361 total_pgs_dumped, pgs_expected);
1362 1362
1363 1363 if (pgs_expected == total_pgs_dumped)
1364 1364 return (0);
1365 1365
1366 1366 return (EINVAL);
1367 1367 }
1368 1368
1369 1369
1370 1370 int
1371 1371 i_cpr_reusefini(void)
1372 1372 {
1373 1373 struct vnode *vp;
1374 1374 cdef_t *cdef;
1375 1375 size_t size;
1376 1376 char *bufp;
1377 1377 int rc;
1378 1378
1379 1379 if (cpr_reusable_mode)
1380 1380 cpr_reusable_mode = 0;
1381 1381
1382 1382 if (rc = cpr_open_deffile(FREAD|FWRITE, &vp)) {
1383 1383 if (rc == EROFS) {
1384 1384 cpr_err(CE_CONT, "uadmin A_FREEZE AD_REUSEFINI "
1385 1385 "(uadmin %d %d)\nmust be done with / mounted "
1386 1386 "writeable.\n", A_FREEZE, AD_REUSEFINI);
1387 1387 }
1388 1388 return (rc);
1389 1389 }
1390 1390
1391 1391 cdef = kmem_alloc(sizeof (*cdef), KM_SLEEP);
1392 1392 rc = cpr_rdwr(UIO_READ, vp, cdef, sizeof (*cdef));
1393 1393
1394 1394 if (rc) {
1395 1395 cpr_err(CE_WARN, "Failed reading %s, errno = %d",
1396 1396 cpr_default_path, rc);
1397 1397 } else if (cdef->mini.magic != CPR_DEFAULT_MAGIC) {
1398 1398 cpr_err(CE_WARN, "bad magic number in %s, cannot restore "
1399 1399 "prom values for %s", cpr_default_path,
1400 1400 cpr_enumerate_promprops(&bufp, &size));
1401 1401 kmem_free(bufp, size);
1402 1402 rc = EINVAL;
1403 1403 } else {
1404 1404 /*
1405 1405 * clean up prom properties
1406 1406 */
1407 1407 rc = cpr_update_nvram(cdef->props);
1408 1408 if (rc == 0) {
1409 1409 /*
1410 1410 * invalidate the disk copy and turn off reusable
1411 1411 */
1412 1412 cdef->mini.magic = 0;
1413 1413 cdef->mini.reusable = 0;
1414 1414 if (rc = cpr_rdwr(UIO_WRITE, vp,
1415 1415 &cdef->mini, sizeof (cdef->mini))) {
1416 1416 cpr_err(CE_WARN, "Failed writing %s, errno %d",
1417 1417 cpr_default_path, rc);
1418 1418 }
1419 1419 }
1420 1420 }
1421 1421
1422 1422 (void) VOP_CLOSE(vp, FREAD|FWRITE, 1, (offset_t)0, CRED(), NULL);
1423 1423 VN_RELE(vp);
1424 1424 kmem_free(cdef, sizeof (*cdef));
1425 1425
1426 1426 return (rc);
1427 1427 }
1428 1428
1429 1429
1430 1430 int
1431 1431 i_cpr_reuseinit(void)
1432 1432 {
1433 1433 int rc = 0;
1434 1434
1435 1435 if (rc = cpr_default_setup(1))
1436 1436 return (rc);
1437 1437
1438 1438 /*
1439 1439 * We need to validate default file
1440 1440 */
1441 1441 rc = cpr_validate_definfo(1);
1442 1442 if (rc == 0)
1443 1443 cpr_reusable_mode = 1;
1444 1444 else if (rc == EROFS) {
1445 1445 cpr_err(CE_NOTE, "reuseinit must be performed "
1446 1446 "while / is mounted writeable");
1447 1447 }
1448 1448
1449 1449 (void) cpr_default_setup(0);
1450 1450
1451 1451 return (rc);
1452 1452 }
1453 1453
1454 1454
1455 1455 int
1456 1456 i_cpr_check_cprinfo(void)
1457 1457 {
1458 1458 struct vnode *vp;
1459 1459 cmini_t mini;
1460 1460 int rc = 0;
1461 1461
1462 1462 if (rc = cpr_open_deffile(FREAD, &vp)) {
1463 1463 if (rc == ENOENT)
1464 1464 cpr_err(CE_NOTE, "cprinfo file does not "
1465 1465 "exist. You must run 'uadmin %d %d' "
1466 1466 "command while / is mounted writeable,\n"
1467 1467 "then reboot and run 'uadmin %d %d' "
1468 1468 "to create a reusable statefile",
1469 1469 A_FREEZE, AD_REUSEINIT, A_FREEZE, AD_REUSABLE);
1470 1470 return (rc);
1471 1471 }
1472 1472
1473 1473 rc = cpr_rdwr(UIO_READ, vp, &mini, sizeof (mini));
1474 1474 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
1475 1475 VN_RELE(vp);
1476 1476
1477 1477 if (rc) {
1478 1478 cpr_err(CE_WARN, "Failed reading %s, errno = %d",
1479 1479 cpr_default_path, rc);
1480 1480 } else if (mini.magic != CPR_DEFAULT_MAGIC) {
1481 1481 cpr_err(CE_CONT, "bad magic number in cprinfo file.\n"
1482 1482 "You must run 'uadmin %d %d' while / is mounted "
1483 1483 "writeable, then reboot and run 'uadmin %d %d' "
1484 1484 "to create a reusable statefile\n",
1485 1485 A_FREEZE, AD_REUSEINIT, A_FREEZE, AD_REUSABLE);
1486 1486 rc = EINVAL;
1487 1487 }
1488 1488
1489 1489 return (rc);
1490 1490 }
1491 1491
1492 1492
1493 1493 int
1494 1494 i_cpr_reusable_supported(void)
1495 1495 {
1496 1496 return (1);
1497 1497 }
1498 1498
1499 1499
1500 1500 /*
1501 1501 * find prom phys pages and alloc space for a tmp copy
1502 1502 */
1503 1503 static int
1504 1504 i_cpr_find_ppages(void)
1505 1505 {
1506 1506 struct page *pp;
1507 1507 struct memlist *pmem;
1508 1508 pgcnt_t npages, pcnt, scnt, vcnt;
1509 1509 pfn_t ppn, plast, *dst;
1510 1510 int mapflag;
1511 1511
1512 1512 cpr_clear_bitmaps();
1513 1513 mapflag = REGULAR_BITMAP;
1514 1514
1515 1515 /*
1516 1516 * there should be a page_t for each phys page used by the kernel;
1517 1517 * set a bit for each phys page not tracked by a page_t
1518 1518 */
1519 1519 pcnt = 0;
1520 1520 memlist_read_lock();
1521 1521 for (pmem = phys_install; pmem; pmem = pmem->ml_next) {
1522 1522 npages = mmu_btop(pmem->ml_size);
1523 1523 ppn = mmu_btop(pmem->ml_address);
1524 1524 for (plast = ppn + npages; ppn < plast; ppn++) {
1525 1525 if (page_numtopp_nolock(ppn))
1526 1526 continue;
1527 1527 (void) cpr_setbit(ppn, mapflag);
1528 1528 pcnt++;
1529 1529 }
1530 1530 }
1531 1531 memlist_read_unlock();
1532 1532
1533 1533 /*
1534 1534 * clear bits for phys pages in each segment
1535 1535 */
1536 1536 scnt = cpr_count_seg_pages(mapflag, cpr_clrbit);
1537 1537
1538 1538 /*
1539 1539 * set bits for phys pages referenced by the promvp vnode;
1540 1540 * these pages are mostly comprised of forthdebug words
1541 1541 */
1542 1542 vcnt = 0;
1543 1543 for (pp = promvp.v_pages; pp; ) {
1544 1544 if (cpr_setbit(pp->p_offset, mapflag) == 0)
1545 1545 vcnt++;
1546 1546 pp = pp->p_vpnext;
1547 1547 if (pp == promvp.v_pages)
1548 1548 break;
1549 1549 }
1550 1550
1551 1551 /*
1552 1552 * total number of prom pages are:
1553 1553 * (non-page_t pages - seg pages + vnode pages)
1554 1554 */
1555 1555 ppage_count = pcnt - scnt + vcnt;
1556 1556 CPR_DEBUG(CPR_DEBUG1,
1557 1557 "find_ppages: pcnt %ld - scnt %ld + vcnt %ld = %ld\n",
1558 1558 pcnt, scnt, vcnt, ppage_count);
1559 1559
1560 1560 /*
1561 1561 * alloc array of pfn_t to store phys page list
1562 1562 */
1563 1563 pphys_list_size = ppage_count * sizeof (pfn_t);
1564 1564 pphys_list = kmem_alloc(pphys_list_size, KM_NOSLEEP);
1565 1565 if (pphys_list == NULL) {
1566 1566 cpr_err(CE_WARN, "cannot alloc pphys_list");
1567 1567 return (ENOMEM);
1568 1568 }
1569 1569
1570 1570 /*
1571 1571 * phys pages referenced in the bitmap should be
1572 1572 * those used by the prom; scan bitmap and save
1573 1573 * a list of prom phys page numbers
1574 1574 */
1575 1575 dst = pphys_list;
1576 1576 memlist_read_lock();
1577 1577 for (pmem = phys_install; pmem; pmem = pmem->ml_next) {
1578 1578 npages = mmu_btop(pmem->ml_size);
1579 1579 ppn = mmu_btop(pmem->ml_address);
1580 1580 for (plast = ppn + npages; ppn < plast; ppn++) {
1581 1581 if (cpr_isset(ppn, mapflag)) {
1582 1582 ASSERT(dst < (pphys_list + ppage_count));
1583 1583 *dst++ = ppn;
1584 1584 }
1585 1585 }
1586 1586 }
1587 1587 memlist_read_unlock();
1588 1588
1589 1589 /*
1590 1590 * allocate space to store prom pages
1591 1591 */
1592 1592 ppage_buf = kmem_alloc(mmu_ptob(ppage_count), KM_NOSLEEP);
1593 1593 if (ppage_buf == NULL) {
1594 1594 kmem_free(pphys_list, pphys_list_size);
1595 1595 pphys_list = NULL;
1596 1596 cpr_err(CE_WARN, "cannot alloc ppage_buf");
1597 1597 return (ENOMEM);
1598 1598 }
1599 1599
1600 1600 return (0);
1601 1601 }
1602 1602
1603 1603
1604 1604 /*
1605 1605 * save prom pages to kmem pages
1606 1606 */
1607 1607 static void
1608 1608 i_cpr_save_ppages(void)
1609 1609 {
1610 1610 pfn_t *pphys, *plast;
1611 1611 caddr_t dst;
1612 1612
1613 1613 /*
1614 1614 * map in each prom page and copy to a kmem page
1615 1615 */
1616 1616 dst = ppage_buf;
1617 1617 plast = pphys_list + ppage_count;
1618 1618 for (pphys = pphys_list; pphys < plast; pphys++) {
1619 1619 i_cpr_mapin(cpr_vaddr, 1, *pphys);
1620 1620 bcopy(cpr_vaddr, dst, MMU_PAGESIZE);
1621 1621 i_cpr_mapout(cpr_vaddr, 1);
1622 1622 dst += MMU_PAGESIZE;
1623 1623 }
1624 1624
1625 1625 CPR_DEBUG(CPR_DEBUG1, "saved %ld prom pages\n", ppage_count);
1626 1626 }
1627 1627
1628 1628
1629 1629 /*
1630 1630 * restore prom pages from kmem pages
1631 1631 */
1632 1632 static void
1633 1633 i_cpr_restore_ppages(void)
1634 1634 {
1635 1635 pfn_t *pphys, *plast;
1636 1636 caddr_t src;
1637 1637
1638 1638 dcache_flushall();
1639 1639
1640 1640 /*
1641 1641 * map in each prom page and copy from a kmem page
1642 1642 */
1643 1643 src = ppage_buf;
1644 1644 plast = pphys_list + ppage_count;
1645 1645 for (pphys = pphys_list; pphys < plast; pphys++) {
1646 1646 i_cpr_mapin(cpr_vaddr, 1, *pphys);
1647 1647 bcopy(src, cpr_vaddr, MMU_PAGESIZE);
1648 1648 i_cpr_mapout(cpr_vaddr, 1);
1649 1649 src += MMU_PAGESIZE;
1650 1650 }
1651 1651
1652 1652 dcache_flushall();
1653 1653
1654 1654 CPR_DEBUG(CPR_DEBUG1, "restored %ld prom pages\n", ppage_count);
1655 1655 }
1656 1656
1657 1657
1658 1658 /*
1659 1659 * save/restore prom pages or free related allocs
1660 1660 */
1661 1661 int
1662 1662 i_cpr_prom_pages(int action)
1663 1663 {
1664 1664 int error;
1665 1665
1666 1666 if (action == CPR_PROM_SAVE) {
1667 1667 if (ppage_buf == NULL) {
1668 1668 ASSERT(pphys_list == NULL);
1669 1669 if (error = i_cpr_find_ppages())
1670 1670 return (error);
1671 1671 i_cpr_save_ppages();
1672 1672 }
1673 1673 } else if (action == CPR_PROM_RESTORE) {
1674 1674 i_cpr_restore_ppages();
1675 1675 } else if (action == CPR_PROM_FREE) {
1676 1676 if (pphys_list) {
1677 1677 ASSERT(pphys_list_size);
1678 1678 kmem_free(pphys_list, pphys_list_size);
1679 1679 pphys_list = NULL;
1680 1680 pphys_list_size = 0;
1681 1681 }
1682 1682 if (ppage_buf) {
1683 1683 ASSERT(ppage_count);
1684 1684 kmem_free(ppage_buf, mmu_ptob(ppage_count));
1685 1685 CPR_DEBUG(CPR_DEBUG1, "freed %ld prom pages\n",
1686 1686 ppage_count);
1687 1687 ppage_buf = NULL;
1688 1688 ppage_count = 0;
1689 1689 }
1690 1690 }
1691 1691 return (0);
1692 1692 }
1693 1693
1694 1694
1695 1695 /*
1696 1696 * record tlb data for the nucleus, bigktsb's, and the cpr module;
1697 1697 * this data is later used by cprboot to install dtlb/itlb entries.
1698 1698 * when we jump into the cpr module during the resume phase, those
1699 1699 * mappings are needed until switching to the kernel trap table.
1700 1700 * to make the dtte/itte info available during resume, we need
1701 1701 * the info recorded prior to saving sensitive pages, otherwise
1702 1702 * all the data would appear as NULLs.
1703 1703 */
1704 1704 static void
1705 1705 i_cpr_save_tlbinfo(void)
1706 1706 {
1707 1707 cti_t cti = {0};
1708 1708
1709 1709 /*
1710 1710 * during resume - shortly after jumping into the cpr module,
1711 1711 * sfmmu_load_mmustate() will overwrite any dtlb entry at any
1712 1712 * index used for TSBs; skip is set so that any saved tte will
1713 1713 * target other tlb offsets and prevent being lost during
1714 1714 * resume. now scan the dtlb and save locked entries,
1715 1715 * then add entries for the tmp stack / data page and the
1716 1716 * cpr thread structure.
1717 1717 */
1718 1718 cti.dst = m_info.dtte;
1719 1719 cti.tail = cti.dst + CPR_MAX_TLB;
1720 1720 cti.reader = dtlb_rd_entry;
1721 1721 cti.writer = NULL;
1722 1722 cti.filter = i_cpr_lnb;
1723 1723 cti.index = cpunodes[CPU->cpu_id].dtlb_size - 1;
1724 1724
1725 1725 if (utsb_dtlb_ttenum != -1)
1726 1726 cti.skip = (1 << utsb_dtlb_ttenum);
1727 1727
1728 1728 if (utsb4m_dtlb_ttenum != -1)
1729 1729 cti.skip |= (1 << utsb4m_dtlb_ttenum);
1730 1730
1731 1731 i_cpr_scan_tlb(&cti);
1732 1732 i_cpr_make_tte(&cti, &i_cpr_data_page, datava);
1733 1733 i_cpr_make_tte(&cti, curthread, datava);
1734 1734
1735 1735 /*
1736 1736 * scan itlb and save locked entries; add an entry for
1737 1737 * the first text page of the cpr module; cprboot will
1738 1738 * jump to that page after restoring kernel pages.
1739 1739 */
1740 1740 cti.dst = m_info.itte;
1741 1741 cti.tail = cti.dst + CPR_MAX_TLB;
1742 1742 cti.reader = itlb_rd_entry;
1743 1743 cti.index = cpunodes[CPU->cpu_id].itlb_size - 1;
1744 1744 cti.skip = 0;
1745 1745 i_cpr_scan_tlb(&cti);
1746 1746 i_cpr_make_tte(&cti, (void *)i_cpr_resume_setup, textva);
1747 1747 }
1748 1748
1749 1749
1750 1750 /* ARGSUSED */
1751 1751 int
1752 1752 i_cpr_dump_setup(vnode_t *vp)
1753 1753 {
1754 1754 /*
1755 1755 * zero out m_info and add info to dtte/itte arrays
1756 1756 */
1757 1757 bzero(&m_info, sizeof (m_info));
1758 1758 i_cpr_save_tlbinfo();
1759 1759 return (0);
1760 1760 }
1761 1761
1762 1762
1763 1763 int
1764 1764 i_cpr_is_supported(int sleeptype)
1765 1765 {
1766 1766 char es_prop[] = "energystar-v2";
1767 1767 pnode_t node;
1768 1768 int last;
1769 1769 extern int cpr_supported_override;
1770 1770 extern int cpr_platform_enable;
1771 1771
1772 1772 if (sleeptype != CPR_TODISK)
1773 1773 return (0);
1774 1774
1775 1775 /*
1776 1776 * The next statement tests if a specific platform has turned off
1777 1777 * cpr support.
1778 1778 */
1779 1779 if (cpr_supported_override)
1780 1780 return (0);
1781 1781
1782 1782 /*
1783 1783 * Do not inspect energystar-v* property if a platform has
1784 1784 * specifically turned on cpr support
1785 1785 */
1786 1786 if (cpr_platform_enable)
1787 1787 return (1);
1788 1788
1789 1789 node = prom_rootnode();
1790 1790 if (prom_getproplen(node, es_prop) != -1)
1791 1791 return (1);
1792 1792 last = strlen(es_prop) - 1;
1793 1793 es_prop[last] = '3';
1794 1794 return (prom_getproplen(node, es_prop) != -1);
1795 1795 }
1796 1796
1797 1797
1798 1798 /*
1799 1799 * the actual size of the statefile data isn't known until after all the
1800 1800 * compressed pages are written; even the inode size doesn't reflect the
1801 1801 * data size since there are usually many extra fs blocks. for recording
1802 1802 * the actual data size, the first sector of the statefile is copied to
1803 1803 * a tmp buf, and the copy is later updated and flushed to disk.
1804 1804 */
1805 1805 int
1806 1806 i_cpr_blockzero(char *base, char **bufpp, int *blkno, vnode_t *vp)
1807 1807 {
1808 1808 extern int cpr_flush_write(vnode_t *);
1809 1809 static char cpr_sector[DEV_BSIZE];
1810 1810 cpr_ext bytes, *dst;
1811 1811
1812 1812 /*
1813 1813 * this routine is called after cdd_t and csu_md_t are copied
1814 1814 * to cpr_buf; mini-hack alert: the save/update method creates
1815 1815 * a dependency on the combined struct size being >= one sector
1816 1816 * or DEV_BSIZE; since introduction in Sol2.7, csu_md_t size is
1817 1817 * over 1K bytes and will probably grow with any changes.
1818 1818 *
1819 1819 * copy when vp is NULL, flush when non-NULL
1820 1820 */
1821 1821 if (vp == NULL) {
1822 1822 ASSERT((*bufpp - base) >= DEV_BSIZE);
1823 1823 bcopy(base, cpr_sector, sizeof (cpr_sector));
1824 1824 return (0);
1825 1825 } else {
1826 1826 bytes = dbtob(*blkno);
1827 1827 dst = &((cdd_t *)cpr_sector)->cdd_filesize;
1828 1828 bcopy(&bytes, dst, sizeof (bytes));
1829 1829 bcopy(cpr_sector, base, sizeof (cpr_sector));
1830 1830 *bufpp = base + sizeof (cpr_sector);
1831 1831 *blkno = cpr_statefile_offset();
1832 1832 CPR_DEBUG(CPR_DEBUG1, "statefile data size: %ld\n\n", bytes);
1833 1833 return (cpr_flush_write(vp));
1834 1834 }
1835 1835 }
1836 1836
1837 1837
1838 1838 /*
1839 1839 * Allocate bitmaps according to the phys_install list.
1840 1840 */
1841 1841 static int
1842 1842 i_cpr_bitmap_setup(void)
1843 1843 {
1844 1844 struct memlist *pmem;
1845 1845 cbd_t *dp, *tail;
1846 1846 void *space;
1847 1847 size_t size;
1848 1848
1849 1849 /*
1850 1850 * The number of bitmap descriptors will be the count of
1851 1851 * phys_install ranges plus 1 for a trailing NULL struct.
1852 1852 */
1853 1853 cpr_nbitmaps = 1;
1854 1854 for (pmem = phys_install; pmem; pmem = pmem->ml_next)
1855 1855 cpr_nbitmaps++;
1856 1856
1857 1857 if (cpr_nbitmaps > (CPR_MAX_BMDESC - 1)) {
1858 1858 cpr_err(CE_WARN, "too many physical memory ranges %d, max %d",
1859 1859 cpr_nbitmaps, CPR_MAX_BMDESC - 1);
1860 1860 return (EFBIG);
1861 1861 }
1862 1862
1863 1863 /* Alloc an array of bitmap descriptors. */
1864 1864 dp = kmem_zalloc(cpr_nbitmaps * sizeof (*dp), KM_NOSLEEP);
1865 1865 if (dp == NULL) {
1866 1866 cpr_nbitmaps = 0;
1867 1867 return (ENOMEM);
1868 1868 }
1869 1869 tail = dp + cpr_nbitmaps;
1870 1870
1871 1871 CPR->c_bmda = dp;
1872 1872 for (pmem = phys_install; pmem; pmem = pmem->ml_next) {
1873 1873 size = BITMAP_BYTES(pmem->ml_size);
1874 1874 space = kmem_zalloc(size * 2, KM_NOSLEEP);
1875 1875 if (space == NULL)
1876 1876 return (ENOMEM);
1877 1877 ASSERT(dp < tail);
1878 1878 dp->cbd_magic = CPR_BITMAP_MAGIC;
1879 1879 dp->cbd_spfn = mmu_btop(pmem->ml_address);
1880 1880 dp->cbd_epfn = mmu_btop(pmem->ml_address + pmem->ml_size) - 1;
1881 1881 dp->cbd_size = size;
1882 1882 dp->cbd_reg_bitmap = (cpr_ptr)space;
1883 1883 dp->cbd_vlt_bitmap = (cpr_ptr)((caddr_t)space + size);
1884 1884 dp++;
1885 1885 }
1886 1886
1887 1887 /* set magic for the last descriptor */
1888 1888 ASSERT(dp == (tail - 1));
1889 1889 dp->cbd_magic = CPR_BITMAP_MAGIC;
1890 1890
1891 1891 return (0);
1892 1892 }
1893 1893
1894 1894
1895 1895 void
1896 1896 i_cpr_bitmap_cleanup(void)
1897 1897 {
1898 1898 cbd_t *dp;
1899 1899
1900 1900 if (CPR->c_bmda == NULL)
1901 1901 return;
1902 1902 for (dp = CPR->c_bmda; dp->cbd_size; dp++)
1903 1903 kmem_free((void *)dp->cbd_reg_bitmap, dp->cbd_size * 2);
1904 1904 kmem_free(CPR->c_bmda, cpr_nbitmaps * sizeof (*CPR->c_bmda));
1905 1905 CPR->c_bmda = NULL;
1906 1906 cpr_nbitmaps = 0;
1907 1907 }
1908 1908
1909 1909
1910 1910 /*
1911 1911 * A "regular" and "volatile" bitmap are created for each range of
1912 1912 * physical memory. The volatile maps are used to count and track pages
1913 1913 * susceptible to heap corruption - caused by drivers that allocate mem
1914 1914 * during VOP_DUMP(); the regular maps are used for all the other non-
1915 1915 * susceptible pages. Before writing the bitmaps to the statefile,
1916 1916 * each bitmap pair gets merged to simplify handling within cprboot.
1917 1917 */
1918 1918 int
1919 1919 i_cpr_alloc_bitmaps(void)
1920 1920 {
1921 1921 int err;
1922 1922
1923 1923 memlist_read_lock();
1924 1924 err = i_cpr_bitmap_setup();
1925 1925 memlist_read_unlock();
1926 1926 if (err)
1927 1927 i_cpr_bitmap_cleanup();
1928 1928 return (err);
1929 1929 }
1930 1930
1931 1931
1932 1932
1933 1933 /*
1934 1934 * Power down the system.
1935 1935 */
1936 1936 int
1937 1937 i_cpr_power_down(int sleeptype)
1938 1938 {
1939 1939 int is_defined = 0;
1940 1940 char *wordexists = "p\" power-off\" find nip swap l! ";
1941 1941 char *req = "power-off";
1942 1942
1943 1943 ASSERT(sleeptype == CPR_TODISK);
1944 1944
1945 1945 /*
1946 1946 * is_defined has value -1 when defined
1947 1947 */
1948 1948 prom_interpret(wordexists, (uintptr_t)&is_defined, 0, 0, 0, 0);
1949 1949 if (is_defined) {
1950 1950 CPR_DEBUG(CPR_DEBUG1, "\ncpr: %s...\n", req);
1951 1951 prom_interpret(req, 0, 0, 0, 0, 0);
1952 1952 }
1953 1953 /*
1954 1954 * Only returns if failed
1955 1955 */
1956 1956 return (EIO);
1957 1957 }
1958 1958
1959 1959 void
1960 1960 i_cpr_stop_other_cpus(void)
1961 1961 {
1962 1962 stop_other_cpus();
1963 1963 }
1964 1964
1965 1965 /*
1966 1966 * Save context for the specified CPU
1967 1967 */
1968 1968 /* ARGSUSED */
1969 1969 void *
1970 1970 i_cpr_save_context(void *arg)
1971 1971 {
1972 1972 /*
1973 1973 * Not yet
1974 1974 */
1975 1975 ASSERT(0);
1976 1976 return (NULL);
1977 1977 }
1978 1978
1979 1979 void
1980 1980 i_cpr_pre_resume_cpus(void)
1981 1981 {
1982 1982 /*
1983 1983 * Not yet
1984 1984 */
1985 1985 ASSERT(0);
1986 1986 }
1987 1987
1988 1988 void
1989 1989 i_cpr_post_resume_cpus(void)
1990 1990 {
1991 1991 /*
1992 1992 * Not yet
1993 1993 */
1994 1994 ASSERT(0);
1995 1995 }
1996 1996
1997 1997 /*
1998 1998 * nothing to do
1999 1999 */
2000 2000 void
2001 2001 i_cpr_alloc_cpus(void)
2002 2002 {
2003 2003 }
2004 2004
2005 2005 /*
2006 2006 * nothing to do
2007 2007 */
2008 2008 void
2009 2009 i_cpr_free_cpus(void)
2010 2010 {
2011 2011 }
2012 2012
2013 2013 /* ARGSUSED */
2014 2014 void
2015 2015 i_cpr_save_configuration(dev_info_t *dip)
2016 2016 {
2017 2017 /*
2018 2018 * this is a no-op on sparc
2019 2019 */
2020 2020 }
2021 2021
2022 2022 /* ARGSUSED */
2023 2023 void
2024 2024 i_cpr_restore_configuration(dev_info_t *dip)
2025 2025 {
2026 2026 /*
2027 2027 * this is a no-op on sparc
2028 2028 */
2029 2029 }
↓ open down ↓ |
1747 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX