1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * Platform specific implementation code
  28  */
  29 
  30 #define SUNDDI_IMPL
  31 
  32 #include <sys/types.h>
  33 #include <sys/promif.h>
  34 #include <sys/prom_isa.h>
  35 #include <sys/prom_plat.h>
  36 #include <sys/mmu.h>
  37 #include <vm/hat_sfmmu.h>
  38 #include <sys/iommu.h>
  39 #include <sys/scb.h>
  40 #include <sys/cpuvar.h>
  41 #include <sys/intreg.h>
  42 #include <sys/pte.h>
  43 #include <vm/hat.h>
  44 #include <vm/page.h>
  45 #include <vm/as.h>
  46 #include <sys/cpr.h>
  47 #include <sys/kmem.h>
  48 #include <sys/clock.h>
  49 #include <sys/kmem.h>
  50 #include <sys/panic.h>
  51 #include <vm/seg_kmem.h>
  52 #include <sys/cpu_module.h>
  53 #include <sys/callb.h>
  54 #include <sys/machsystm.h>
  55 #include <sys/vmsystm.h>
  56 #include <sys/systm.h>
  57 #include <sys/archsystm.h>
  58 #include <sys/stack.h>
  59 #include <sys/fs/ufs_fs.h>
  60 #include <sys/memlist.h>
  61 #include <sys/bootconf.h>
  62 #include <sys/thread.h>
  63 #include <vm/vm_dep.h>
  64 
  65 extern  void cpr_clear_bitmaps(void);
  66 extern  int cpr_setbit(pfn_t ppn, int mapflag);
  67 extern  int cpr_clrbit(pfn_t ppn, int mapflag);
  68 extern  pgcnt_t cpr_scan_kvseg(int mapflag, bitfunc_t bitfunc, struct seg *seg);
  69 extern  pgcnt_t cpr_count_seg_pages(int mapflag, bitfunc_t bitfunc);
  70 extern  void dtlb_wr_entry(uint_t, tte_t *, uint64_t *);
  71 extern  void itlb_wr_entry(uint_t, tte_t *, uint64_t *);
  72 
  73 static  int i_cpr_storage_desc_alloc(csd_t **, pgcnt_t *, csd_t **, int);
  74 static  void i_cpr_storage_desc_init(csd_t *, pgcnt_t, csd_t *);
  75 static  caddr_t i_cpr_storage_data_alloc(pgcnt_t, pgcnt_t *, int);
  76 static  int cpr_dump_sensitive(vnode_t *, csd_t *);
  77 static  void i_cpr_clear_entries(uint64_t, uint64_t);
  78 static  void i_cpr_xcall(xcfunc_t);
  79 
  80 void    i_cpr_storage_free(void);
  81 
  82 extern void *i_cpr_data_page;
  83 extern int cpr_test_mode;
  84 extern int cpr_nbitmaps;
  85 extern char cpr_default_path[];
  86 extern caddr_t textva, datava;
  87 
  88 static struct cpr_map_info cpr_prom_retain[CPR_PROM_RETAIN_CNT];
  89 caddr_t cpr_vaddr = NULL;
  90 
  91 static  uint_t sensitive_pages_saved;
  92 static  uint_t sensitive_size_saved;
  93 
  94 caddr_t i_cpr_storage_data_base;
  95 caddr_t i_cpr_storage_data_end;
  96 csd_t *i_cpr_storage_desc_base;
  97 csd_t *i_cpr_storage_desc_end;          /* one byte beyond last used descp */
  98 csd_t *i_cpr_storage_desc_last_used;    /* last used descriptor */
  99 caddr_t sensitive_write_ptr;            /* position for next storage write */
 100 
 101 size_t  i_cpr_sensitive_bytes_dumped;
 102 pgcnt_t i_cpr_sensitive_pgs_dumped;
 103 pgcnt_t i_cpr_storage_data_sz;          /* in pages */
 104 pgcnt_t i_cpr_storage_desc_pgcnt;       /* in pages */
 105 
 106 ushort_t cpr_mach_type = CPR_MACHTYPE_4U;
 107 static  csu_md_t m_info;
 108 
 109 
 110 #define MAX_STORAGE_RETRY       3
 111 #define MAX_STORAGE_ALLOC_RETRY 3
 112 #define INITIAL_ALLOC_PCNT      40      /* starting allocation percentage */
 113 #define INTEGRAL                100     /* to get 1% precision */
 114 
 115 #define EXTRA_RATE              2       /* add EXTRA_RATE% extra space */
 116 #define EXTRA_DESCS             10
 117 
 118 #define CPR_NO_STORAGE_DESC     1
 119 #define CPR_NO_STORAGE_DATA     2
 120 
 121 #define CIF_SPLICE              0
 122 #define CIF_UNLINK              1
 123 
 124 
 125 /*
 126  * CPR miscellaneous support routines
 127  */
 128 #define cpr_open(path, mode,  vpp)      (vn_open(path, UIO_SYSSPACE, \
 129                 mode, 0600, vpp, CRCREAT, 0))
 130 #define cpr_rdwr(rw, vp, basep, cnt)    (vn_rdwr(rw, vp,  (caddr_t)(basep), \
 131                 cnt, 0LL, UIO_SYSSPACE, 0, (rlim64_t)MAXOFF_T, CRED(), \
 132                 (ssize_t *)NULL))
 133 
 134 /*
 135  * definitions for saving/restoring prom pages
 136  */
 137 static void     *ppage_buf;
 138 static pgcnt_t  ppage_count;
 139 static pfn_t    *pphys_list;
 140 static size_t   pphys_list_size;
 141 
 142 typedef void (*tlb_rw_t)(uint_t, tte_t *, uint64_t *);
 143 typedef void (*tlb_filter_t)(int, tte_t *, uint64_t, void *);
 144 
 145 /*
 146  * private struct for tlb handling
 147  */
 148 struct cpr_trans_info {
 149         sutlb_t         *dst;
 150         sutlb_t         *tail;
 151         tlb_rw_t        reader;
 152         tlb_rw_t        writer;
 153         tlb_filter_t    filter;
 154         int             index;
 155         uint64_t        skip;           /* assumes TLB <= 64 locked entries */
 156 };
 157 typedef struct cpr_trans_info cti_t;
 158 
 159 
 160 /*
 161  * special handling for tlb info
 162  */
 163 #define WITHIN_OFW(va) \
 164         (((va) > (uint64_t)OFW_START_ADDR) && ((va) < (uint64_t)OFW_END_ADDR))
 165 
 166 #define WITHIN_NUCLEUS(va, base) \
 167         (((va) >= (base)) && \
 168         (((va) + MMU_PAGESIZE) <= ((base) + MMU_PAGESIZE4M)))
 169 
 170 #define IS_BIGKTSB(va) \
 171         (enable_bigktsb && \
 172         ((va) >= (uint64_t)ktsb_base) && \
 173         ((va) < (uint64_t)(ktsb_base + ktsb_sz)))
 174 
 175 
 176 /*
 177  * WARNING:
 178  * the text from this file is linked to follow cpr_resume_setup.o;
 179  * only add text between here and i_cpr_end_jumpback when it needs
 180  * to be called during resume before we switch back to the kernel
 181  * trap table.  all the text in this range must fit within a page.
 182  */
 183 
 184 
 185 /*
 186  * each time a machine is reset, the prom uses an inconsistent set of phys
 187  * pages and the cif cookie may differ as well.  so prior to restoring the
 188  * original prom, we have to use to use the new/tmp prom's translations
 189  * when requesting prom services.
 190  *
 191  * cif_handler starts out as the original prom cookie, and that gets used
 192  * by client_handler() to jump into the prom.  here we splice-in a wrapper
 193  * routine by writing cif_handler; client_handler() will now jump to the
 194  * wrapper which switches the %tba to the new/tmp prom's trap table then
 195  * jumps to the new cookie.
 196  */
 197 void
 198 i_cpr_cif_setup(int action)
 199 {
 200         extern void *i_cpr_orig_cif, *cif_handler;
 201         extern int i_cpr_cif_wrapper(void *);
 202 
 203         /*
 204          * save the original cookie and change the current cookie to the
 205          * wrapper routine.  later we just restore the original cookie.
 206          */
 207         if (action == CIF_SPLICE) {
 208                 i_cpr_orig_cif = cif_handler;
 209                 cif_handler = (void *)i_cpr_cif_wrapper;
 210         } else if (action == CIF_UNLINK)
 211                 cif_handler = i_cpr_orig_cif;
 212 }
 213 
 214 
 215 /*
 216  * launch slave cpus into kernel text, pause them,
 217  * and restore the original prom pages
 218  */
 219 void
 220 i_cpr_mp_setup(void)
 221 {
 222         extern void restart_other_cpu(int);
 223         cpu_t *cp;
 224 
 225         uint64_t kctx = kcontextreg;
 226 
 227         /*
 228          * Do not allow setting page size codes in MMU primary context
 229          * register while using cif wrapper. This is needed to work
 230          * around OBP incorrect handling of this MMU register.
 231          */
 232         kcontextreg = 0;
 233 
 234         /*
 235          * reset cpu_ready_set so x_calls work properly
 236          */
 237         CPUSET_ZERO(cpu_ready_set);
 238         CPUSET_ADD(cpu_ready_set, getprocessorid());
 239 
 240         /*
 241          * setup cif to use the cookie from the new/tmp prom
 242          * and setup tmp handling for calling prom services.
 243          */
 244         i_cpr_cif_setup(CIF_SPLICE);
 245 
 246         /*
 247          * at this point, only the nucleus and a few cpr pages are
 248          * mapped in.  once we switch to the kernel trap table,
 249          * we can access the rest of kernel space.
 250          */
 251         prom_set_traptable(&trap_table);
 252 
 253         if (ncpus > 1) {
 254                 sfmmu_init_tsbs();
 255 
 256                 mutex_enter(&cpu_lock);
 257                 /*
 258                  * All of the slave cpus are not ready at this time,
 259                  * yet the cpu structures have various cpu_flags set;
 260                  * clear cpu_flags and mutex_ready.
 261                  * Since we are coming up from a CPU suspend, the slave cpus
 262                  * are frozen.
 263                  */
 264                 for (cp = CPU->cpu_next; cp != CPU; cp = cp->cpu_next) {
 265                         cp->cpu_flags = CPU_FROZEN;
 266                         cp->cpu_m.mutex_ready = 0;
 267                 }
 268 
 269                 for (cp = CPU->cpu_next; cp != CPU; cp = cp->cpu_next)
 270                         restart_other_cpu(cp->cpu_id);
 271 
 272                 pause_cpus(NULL);
 273                 mutex_exit(&cpu_lock);
 274 
 275                 i_cpr_xcall(i_cpr_clear_entries);
 276         } else
 277                 i_cpr_clear_entries(0, 0);
 278 
 279         /*
 280          * now unlink the cif wrapper;  WARNING: do not call any
 281          * prom_xxx() routines until after prom pages are restored.
 282          */
 283         i_cpr_cif_setup(CIF_UNLINK);
 284 
 285         (void) i_cpr_prom_pages(CPR_PROM_RESTORE);
 286 
 287         /* allow setting page size codes in MMU primary context register */
 288         kcontextreg = kctx;
 289 }
 290 
 291 
 292 /*
 293  * end marker for jumpback page;
 294  * this symbol is used to check the size of i_cpr_resume_setup()
 295  * and the above text.  For simplicity, the Makefile needs to
 296  * link i_cpr_resume_setup.o and cpr_impl.o consecutively.
 297  */
 298 void
 299 i_cpr_end_jumpback(void)
 300 {
 301 }
 302 
 303 
 304 /*
 305  * scan tlb entries with reader; when valid entries are found,
 306  * the filter routine will selectively save/clear them
 307  */
 308 static void
 309 i_cpr_scan_tlb(cti_t *ctip)
 310 {
 311         uint64_t va_tag;
 312         int tlb_index;
 313         tte_t tte;
 314 
 315         for (tlb_index = ctip->index; tlb_index >= 0; tlb_index--) {
 316                 (*ctip->reader)((uint_t)tlb_index, &tte, &va_tag);
 317                 if (va_tag && TTE_IS_VALID(&tte))
 318                         (*ctip->filter)(tlb_index, &tte, va_tag, ctip);
 319         }
 320 }
 321 
 322 
 323 /*
 324  * filter for locked tlb entries that reference the text/data nucleus
 325  * and any bigktsb's; these will be reinstalled by cprboot on all cpus
 326  */
 327 /* ARGSUSED */
 328 static void
 329 i_cpr_lnb(int index, tte_t *ttep, uint64_t va_tag, void *ctrans)
 330 {
 331         cti_t *ctip;
 332 
 333         /*
 334          * record tlb data at ctip->dst; the target tlb index starts
 335          * at the highest tlb offset and moves towards 0.  the prom
 336          * reserves both dtlb and itlb index 0.  any selected entry
 337          * also gets marked to prevent being flushed during resume
 338          */
 339         if (TTE_IS_LOCKED(ttep) && (va_tag == (uint64_t)textva ||
 340             va_tag == (uint64_t)datava || IS_BIGKTSB(va_tag))) {
 341                 ctip = ctrans;
 342                 while ((1 << ctip->index) & ctip->skip)
 343                         ctip->index--;
 344                 ASSERT(ctip->index > 0);
 345                 ASSERT(ctip->dst < ctip->tail);
 346                 ctip->dst->tte.ll = ttep->ll;
 347                 ctip->dst->va_tag = va_tag;
 348                 ctip->dst->index = ctip->index--;
 349                 ctip->dst->tmp = 0;
 350                 ctip->dst++;
 351         }
 352 }
 353 
 354 
 355 /*
 356  * some tlb entries are stale, filter for unlocked entries
 357  * within the prom virt range and clear them
 358  */
 359 static void
 360 i_cpr_ufw(int index, tte_t *ttep, uint64_t va_tag, void *ctrans)
 361 {
 362         sutlb_t clr;
 363         cti_t *ctip;
 364 
 365         if (!TTE_IS_LOCKED(ttep) && WITHIN_OFW(va_tag)) {
 366                 ctip = ctrans;
 367                 bzero(&clr, sizeof (clr));
 368                 (*ctip->writer)((uint_t)index, &clr.tte, &clr.va_tag);
 369         }
 370 }
 371 
 372 
 373 /*
 374  * some of the entries installed by cprboot are needed only on a
 375  * short-term basis and need to be flushed to avoid clogging the tlbs.
 376  * scan the dtte/itte arrays for items marked as temporary and clear
 377  * dtlb/itlb entries using wrfunc.
 378  */
 379 static void
 380 i_cpr_clear_tmp(sutlb_t *listp, int max, tlb_rw_t wrfunc)
 381 {
 382         sutlb_t clr, *tail;
 383 
 384         bzero(&clr, sizeof (clr));
 385         for (tail = listp + max; listp < tail && listp->va_tag; listp++) {
 386                 if (listp->tmp)
 387                         (*wrfunc)((uint_t)listp->index, &clr.tte, &clr.va_tag);
 388         }
 389 }
 390 
 391 
 392 /* ARGSUSED */
 393 static void
 394 i_cpr_clear_entries(uint64_t arg1, uint64_t arg2)
 395 {
 396         extern void demap_all(void);
 397         cti_t cti;
 398 
 399         i_cpr_clear_tmp(m_info.dtte, CPR_MAX_TLB, dtlb_wr_entry);
 400         i_cpr_clear_tmp(m_info.itte, CPR_MAX_TLB, itlb_wr_entry);
 401 
 402         /*
 403          * for newer cpus that implement DEMAP_ALL_TYPE, demap_all is
 404          * a second label for vtag_flushall.  the call is made using
 405          * vtag_flushall() instead of demap_all() due to runtime and
 406          * krtld results with both older and newer cpu modules.
 407          */
 408         if (&demap_all != 0) {
 409                 vtag_flushall();
 410                 return;
 411         }
 412 
 413         /*
 414          * for older V9 cpus, scan tlbs and clear stale entries
 415          */
 416         bzero(&cti, sizeof (cti));
 417         cti.filter = i_cpr_ufw;
 418 
 419         cti.index = cpunodes[CPU->cpu_id].dtlb_size - 1;
 420         cti.reader = dtlb_rd_entry;
 421         cti.writer = dtlb_wr_entry;
 422         i_cpr_scan_tlb(&cti);
 423 
 424         cti.index = cpunodes[CPU->cpu_id].itlb_size - 1;
 425         cti.reader = itlb_rd_entry;
 426         cti.writer = itlb_wr_entry;
 427         i_cpr_scan_tlb(&cti);
 428 }
 429 
 430 
 431 /*
 432  * craft tlb info for tmp use during resume; this data gets used by
 433  * cprboot to install tlb entries.  we also mark each struct as tmp
 434  * so those tlb entries will get flushed after switching to the kernel
 435  * trap table.  no data needs to be recorded for vaddr when it falls
 436  * within the nucleus since we've already recorded nucleus ttes and
 437  * a 8K tte would conflict with a 4MB tte.  eg: the cpr module
 438  * text/data may have been loaded into the text/data nucleus.
 439  */
 440 static void
 441 i_cpr_make_tte(cti_t *ctip, void *vaddr, caddr_t nbase)
 442 {
 443         pfn_t ppn;
 444         uint_t rw;
 445 
 446         if (WITHIN_NUCLEUS((caddr_t)vaddr, nbase))
 447                 return;
 448 
 449         while ((1 << ctip->index) & ctip->skip)
 450                 ctip->index--;
 451         ASSERT(ctip->index > 0);
 452         ASSERT(ctip->dst < ctip->tail);
 453 
 454         /*
 455          * without any global service available to lookup
 456          * a tte by vaddr, we craft our own here:
 457          */
 458         ppn = va_to_pfn(vaddr);
 459         rw = (nbase == datava) ? TTE_HWWR_INT : 0;
 460         ctip->dst->tte.tte_inthi = TTE_VALID_INT | TTE_PFN_INTHI(ppn);
 461         ctip->dst->tte.tte_intlo = TTE_PFN_INTLO(ppn) | TTE_LCK_INT |
 462             TTE_CP_INT | TTE_PRIV_INT | rw;
 463         ctip->dst->va_tag = ((uintptr_t)vaddr & MMU_PAGEMASK);
 464         ctip->dst->index = ctip->index--;
 465         ctip->dst->tmp = 1;
 466         ctip->dst++;
 467 }
 468 
 469 
 470 static void
 471 i_cpr_xcall(xcfunc_t func)
 472 {
 473         uint_t pil, reset_pil;
 474 
 475         pil = getpil();
 476         if (pil < XCALL_PIL)
 477                 reset_pil = 0;
 478         else {
 479                 reset_pil = 1;
 480                 setpil(XCALL_PIL - 1);
 481         }
 482         xc_some(cpu_ready_set, func, 0, 0);
 483         if (reset_pil)
 484                 setpil(pil);
 485 }
 486 
 487 
 488 /*
 489  * restart paused slave cpus
 490  */
 491 void
 492 i_cpr_machdep_setup(void)
 493 {
 494         if (ncpus > 1) {
 495                 CPR_DEBUG(CPR_DEBUG1, "MP restarted...\n");
 496                 mutex_enter(&cpu_lock);
 497                 start_cpus();
 498                 mutex_exit(&cpu_lock);
 499         }
 500 }
 501 
 502 
 503 /*
 504  * Stop all interrupt activities in the system
 505  */
 506 void
 507 i_cpr_stop_intr(void)
 508 {
 509         (void) spl7();
 510 }
 511 
 512 /*
 513  * Set machine up to take interrupts
 514  */
 515 void
 516 i_cpr_enable_intr(void)
 517 {
 518         (void) spl0();
 519 }
 520 
 521 
 522 /*
 523  * record cpu nodes and ids
 524  */
 525 static void
 526 i_cpr_save_cpu_info(void)
 527 {
 528         struct sun4u_cpu_info *scip;
 529         cpu_t *cp;
 530 
 531         scip = m_info.sci;
 532         cp = CPU;
 533         do {
 534                 ASSERT(scip < &m_info.sci[NCPU]);
 535                 scip->cpu_id = cp->cpu_id;
 536                 scip->node = cpunodes[cp->cpu_id].nodeid;
 537                 scip++;
 538         } while ((cp = cp->cpu_next) != CPU);
 539 }
 540 
 541 
 542 /*
 543  * Write necessary machine dependent information to cpr state file,
 544  * eg. sun4u mmu ctx secondary for the current running process (cpr) ...
 545  */
 546 int
 547 i_cpr_write_machdep(vnode_t *vp)
 548 {
 549         extern uint_t getpstate(), getwstate();
 550         extern uint_t i_cpr_tstack_size;
 551         const char ustr[] = ": unix-tte 2drop false ;";
 552         uintptr_t tinfo;
 553         label_t *ltp;
 554         cmd_t cmach;
 555         char *fmt;
 556         int rc;
 557 
 558         /*
 559          * ustr[] is used as temporary forth words during
 560          * slave startup sequence, see sfmmu_mp_startup()
 561          */
 562 
 563         cmach.md_magic = (uint_t)CPR_MACHDEP_MAGIC;
 564         cmach.md_size = sizeof (m_info) + sizeof (ustr);
 565 
 566         if (rc = cpr_write(vp, (caddr_t)&cmach, sizeof (cmach))) {
 567                 cpr_err(CE_WARN, "Failed to write descriptor.");
 568                 return (rc);
 569         }
 570 
 571         /*
 572          * m_info is now cleared in i_cpr_dump_setup()
 573          */
 574         m_info.ksb = (uint32_t)STACK_BIAS;
 575         m_info.kpstate = (uint16_t)getpstate();
 576         m_info.kwstate = (uint16_t)getwstate();
 577         CPR_DEBUG(CPR_DEBUG1, "stack bias 0x%x, pstate 0x%x, wstate 0x%x\n",
 578             m_info.ksb, m_info.kpstate, m_info.kwstate);
 579 
 580         ltp = &ttolwp(curthread)->lwp_qsav;
 581         m_info.qsav_pc = (cpr_ext)ltp->val[0];
 582         m_info.qsav_sp = (cpr_ext)ltp->val[1];
 583 
 584         /*
 585          * Set secondary context to INVALID_CONTEXT to force the HAT
 586          * to re-setup the MMU registers and locked TTEs it needs for
 587          * TLB miss handling.
 588          */
 589         m_info.mmu_ctx_sec = INVALID_CONTEXT;
 590         m_info.mmu_ctx_pri = KCONTEXT;
 591 
 592         tinfo = (uintptr_t)curthread;
 593         m_info.thrp = (cpr_ptr)tinfo;
 594 
 595         tinfo = (uintptr_t)i_cpr_resume_setup;
 596         m_info.func = (cpr_ptr)tinfo;
 597 
 598         /*
 599          * i_cpr_data_page is comprised of a 4K stack area and a few
 600          * trailing data symbols; the page is shared by the prom and
 601          * kernel during resume.  the stack size is recorded here
 602          * and used by cprboot to set %sp
 603          */
 604         tinfo = (uintptr_t)&i_cpr_data_page;
 605         m_info.tmp_stack = (cpr_ptr)tinfo;
 606         m_info.tmp_stacksize = i_cpr_tstack_size;
 607 
 608         m_info.test_mode = cpr_test_mode;
 609 
 610         i_cpr_save_cpu_info();
 611 
 612         if (rc = cpr_write(vp, (caddr_t)&m_info, sizeof (m_info))) {
 613                 cpr_err(CE_WARN, "Failed to write machdep info.");
 614                 return (rc);
 615         }
 616 
 617         fmt = "error writing %s forth info";
 618         if (rc = cpr_write(vp, (caddr_t)ustr, sizeof (ustr)))
 619                 cpr_err(CE_WARN, fmt, "unix-tte");
 620 
 621         return (rc);
 622 }
 623 
 624 
 625 /*
 626  * Save miscellaneous information which needs to be written to the
 627  * state file.  This information is required to re-initialize
 628  * kernel/prom handshaking.
 629  */
 630 void
 631 i_cpr_save_machdep_info(void)
 632 {
 633         CPR_DEBUG(CPR_DEBUG5, "jumpback size = 0x%lx\n",
 634             (uintptr_t)&i_cpr_end_jumpback -
 635             (uintptr_t)i_cpr_resume_setup);
 636 
 637         /*
 638          * Verify the jumpback code all falls in one page.
 639          */
 640         if (((uintptr_t)&i_cpr_end_jumpback & MMU_PAGEMASK) !=
 641             ((uintptr_t)i_cpr_resume_setup & MMU_PAGEMASK))
 642                 cpr_err(CE_PANIC, "jumpback code exceeds one page.");
 643 }
 644 
 645 
 646 /*
 647  * cpu0 should contain bootcpu info
 648  */
 649 cpu_t *
 650 i_cpr_bootcpu(void)
 651 {
 652         return (&cpu0);
 653 }
 654 
 655 processorid_t
 656 i_cpr_bootcpuid(void)
 657 {
 658         return (0);
 659 }
 660 
 661 /*
 662  * Return the virtual address of the mapping area
 663  */
 664 caddr_t
 665 i_cpr_map_setup(void)
 666 {
 667         /*
 668          * Allocate a virtual memory range spanned by an hmeblk.
 669          * This would be 8 hments or 64k bytes.  Starting VA
 670          * must be 64k (8-page) aligned.
 671          */
 672         cpr_vaddr = vmem_xalloc(heap_arena,
 673             mmu_ptob(NHMENTS), mmu_ptob(NHMENTS),
 674             0, 0, NULL, NULL, VM_NOSLEEP);
 675         return (cpr_vaddr);
 676 }
 677 
 678 /*
 679  * create tmp locked tlb entries for a group of phys pages;
 680  *
 681  * i_cpr_mapin/i_cpr_mapout should always be called in pairs,
 682  * otherwise would fill up a tlb with locked entries
 683  */
 684 void
 685 i_cpr_mapin(caddr_t vaddr, uint_t pages, pfn_t ppn)
 686 {
 687         tte_t tte;
 688         extern pfn_t curthreadpfn;
 689         extern int curthreadremapped;
 690 
 691         curthreadremapped = (ppn <= curthreadpfn && curthreadpfn < ppn + pages);
 692 
 693         for (; pages--; ppn++, vaddr += MMU_PAGESIZE) {
 694                 tte.tte_inthi = TTE_VALID_INT | TTE_PFN_INTHI(ppn);
 695                 tte.tte_intlo = TTE_PFN_INTLO(ppn) | TTE_LCK_INT |
 696                     TTE_CP_INT | TTE_PRIV_INT | TTE_HWWR_INT;
 697                 sfmmu_dtlb_ld_kva(vaddr, &tte);
 698         }
 699 }
 700 
 701 void
 702 i_cpr_mapout(caddr_t vaddr, uint_t pages)
 703 {
 704         extern int curthreadremapped;
 705 
 706         if (curthreadremapped && vaddr <= (caddr_t)curthread &&
 707             (caddr_t)curthread < vaddr + pages * MMU_PAGESIZE)
 708                 curthreadremapped = 0;
 709 
 710         for (; pages--; vaddr += MMU_PAGESIZE)
 711                 vtag_flushpage(vaddr, (uint64_t)ksfmmup);
 712 }
 713 
 714 /*
 715  * We're done using the mapping area; release virtual space
 716  */
 717 void
 718 i_cpr_map_destroy(void)
 719 {
 720         vmem_free(heap_arena, cpr_vaddr, mmu_ptob(NHMENTS));
 721         cpr_vaddr = NULL;
 722 }
 723 
 724 /* ARGSUSED */
 725 void
 726 i_cpr_handle_xc(int flag)
 727 {
 728 }
 729 
 730 
 731 /*
 732  * This function takes care of pages which are not in kas or need to be
 733  * taken care of in a special way.  For example, panicbuf pages are not
 734  * in kas and their pages are allocated via prom_retain().
 735  */
 736 pgcnt_t
 737 i_cpr_count_special_kpages(int mapflag, bitfunc_t bitfunc)
 738 {
 739         struct cpr_map_info *pri, *tail;
 740         pgcnt_t pages, total = 0;
 741         pfn_t pfn;
 742 
 743         /*
 744          * Save information about prom retained panicbuf pages
 745          */
 746         if (bitfunc == cpr_setbit) {
 747                 pri = &cpr_prom_retain[CPR_PANICBUF];
 748                 pri->virt = (cpr_ptr)panicbuf;
 749                 pri->phys = va_to_pa(panicbuf);
 750                 pri->size = sizeof (panicbuf);
 751         }
 752 
 753         /*
 754          * Go through the prom_retain array to tag those pages.
 755          */
 756         tail = &cpr_prom_retain[CPR_PROM_RETAIN_CNT];
 757         for (pri = cpr_prom_retain; pri < tail; pri++) {
 758                 pages = mmu_btopr(pri->size);
 759                 for (pfn = ADDR_TO_PN(pri->phys); pages--; pfn++) {
 760                         if (pf_is_memory(pfn)) {
 761                                 if (bitfunc == cpr_setbit) {
 762                                         if ((*bitfunc)(pfn, mapflag) == 0)
 763                                                 total++;
 764                                 } else
 765                                         total++;
 766                         }
 767                 }
 768         }
 769 
 770         return (total);
 771 }
 772 
 773 
 774 /*
 775  * Free up memory-related resources here.  We start by freeing buffers
 776  * allocated during suspend initialization.  Also, free up the mapping
 777  * resources allocated in cpr_init().
 778  */
 779 void
 780 i_cpr_free_memory_resources(void)
 781 {
 782         (void) i_cpr_prom_pages(CPR_PROM_FREE);
 783         i_cpr_map_destroy();
 784         i_cpr_storage_free();
 785 }
 786 
 787 
 788 /*
 789  * Derived from cpr_write_statefile().
 790  * Save the sensitive pages to the storage area and do bookkeeping
 791  * using the sensitive descriptors. Each descriptor will contain no more
 792  * than CPR_MAXCONTIG amount of contiguous pages to match the max amount
 793  * of pages that statefile gets written to disk at each write.
 794  * XXX The CPR_MAXCONTIG can be changed to the size of the compression
 795  * scratch area.
 796  */
 797 static int
 798 i_cpr_save_to_storage(void)
 799 {
 800         sensitive_size_saved = 0;
 801         sensitive_pages_saved = 0;
 802         sensitive_write_ptr = i_cpr_storage_data_base;
 803         return (cpr_contig_pages(NULL, SAVE_TO_STORAGE));
 804 }
 805 
 806 
 807 /*
 808  * This routine allocates space to save the sensitive kernel pages,
 809  * i.e. kernel data nucleus, kvalloc and kvseg segments.
 810  * It's assumed that those segments are the only areas that can be
 811  * contaminated by memory allocations during statefile dumping.
 812  * The space allocated here contains:
 813  *      A list of descriptors describing the saved sensitive pages.
 814  *      The storage area for saving the compressed sensitive kernel pages.
 815  * Since storage pages are allocated from segkmem, they need to be
 816  * excluded when saving.
 817  */
 818 int
 819 i_cpr_save_sensitive_kpages(void)
 820 {
 821         static const char pages_fmt[] = "\n%s %s allocs\n"
 822             "   spages %ld, vpages %ld, diff %ld\n";
 823         int retry_cnt;
 824         int error = 0;
 825         pgcnt_t pages, spages, vpages;
 826         caddr_t addr;
 827         char *str;
 828 
 829         /*
 830          * Tag sensitive kpages. Allocate space for storage descriptors
 831          * and storage data area based on the resulting bitmaps.
 832          * Note: The storage space will be part of the sensitive
 833          * segment, so we need to tag kpages here before the storage
 834          * is actually allocated just so their space won't be accounted
 835          * for. They will not be part of the statefile although those
 836          * pages will be claimed by cprboot.
 837          */
 838         cpr_clear_bitmaps();
 839 
 840         spages = i_cpr_count_sensitive_kpages(REGULAR_BITMAP, cpr_setbit);
 841         vpages = cpr_count_volatile_pages(REGULAR_BITMAP, cpr_clrbit);
 842         pages = spages - vpages;
 843 
 844         str = "i_cpr_save_sensitive_kpages:";
 845         CPR_DEBUG(CPR_DEBUG7, pages_fmt, "before", str, spages, vpages, pages);
 846 
 847         /*
 848          * Allocate space to save the clean sensitive kpages
 849          */
 850         for (retry_cnt = 0; retry_cnt < MAX_STORAGE_ALLOC_RETRY; retry_cnt++) {
 851                 /*
 852                  * Alloc on first pass or realloc if we are retrying because
 853                  * of insufficient storage for sensitive pages
 854                  */
 855                 if (retry_cnt == 0 || error == ENOMEM) {
 856                         if (i_cpr_storage_data_base) {
 857                                 kmem_free(i_cpr_storage_data_base,
 858                                     mmu_ptob(i_cpr_storage_data_sz));
 859                                 i_cpr_storage_data_base = NULL;
 860                                 i_cpr_storage_data_sz = 0;
 861                         }
 862                         addr = i_cpr_storage_data_alloc(pages,
 863                             &i_cpr_storage_data_sz, retry_cnt);
 864                         if (addr == NULL) {
 865                                 CPR_DEBUG(CPR_DEBUG7,
 866                                     "\n%s can't allocate data storage space!\n",
 867                                     str);
 868                                 return (ENOMEM);
 869                         }
 870                         i_cpr_storage_data_base = addr;
 871                         i_cpr_storage_data_end =
 872                             addr + mmu_ptob(i_cpr_storage_data_sz);
 873                 }
 874 
 875                 /*
 876                  * Allocate on first pass, only realloc if retry is because of
 877                  * insufficient descriptors, but reset contents on each pass
 878                  * (desc_alloc resets contents as well)
 879                  */
 880                 if (retry_cnt == 0 || error == -1) {
 881                         error = i_cpr_storage_desc_alloc(
 882                             &i_cpr_storage_desc_base, &i_cpr_storage_desc_pgcnt,
 883                             &i_cpr_storage_desc_end, retry_cnt);
 884                         if (error != 0)
 885                                 return (error);
 886                 } else {
 887                         i_cpr_storage_desc_init(i_cpr_storage_desc_base,
 888                             i_cpr_storage_desc_pgcnt, i_cpr_storage_desc_end);
 889                 }
 890 
 891                 /*
 892                  * We are ready to save the sensitive kpages to storage.
 893                  * We cannot trust what's tagged in the bitmaps anymore
 894                  * after storage allocations.  Clear up the bitmaps and
 895                  * retag the sensitive kpages again.  The storage pages
 896                  * should be untagged.
 897                  */
 898                 cpr_clear_bitmaps();
 899 
 900                 spages =
 901                     i_cpr_count_sensitive_kpages(REGULAR_BITMAP, cpr_setbit);
 902                 vpages = cpr_count_volatile_pages(REGULAR_BITMAP, cpr_clrbit);
 903 
 904                 CPR_DEBUG(CPR_DEBUG7, pages_fmt, "after ", str,
 905                     spages, vpages, spages - vpages);
 906 
 907                 /*
 908                  * Returns 0 on success, -1 if too few descriptors, and
 909                  * ENOMEM if not enough space to save sensitive pages
 910                  */
 911                 CPR_DEBUG(CPR_DEBUG1, "compressing pages to storage...\n");
 912                 error = i_cpr_save_to_storage();
 913                 if (error == 0) {
 914                         /* Saving to storage succeeded */
 915                         CPR_DEBUG(CPR_DEBUG1, "compressed %d pages\n",
 916                             sensitive_pages_saved);
 917                         break;
 918                 } else if (error == -1)
 919                         CPR_DEBUG(CPR_DEBUG1, "%s too few descriptors\n", str);
 920         }
 921         if (error == -1)
 922                 error = ENOMEM;
 923         return (error);
 924 }
 925 
 926 
 927 /*
 928  * Estimate how much memory we will need to save
 929  * the sensitive pages with compression.
 930  */
 931 static caddr_t
 932 i_cpr_storage_data_alloc(pgcnt_t pages, pgcnt_t *alloc_pages, int retry_cnt)
 933 {
 934         pgcnt_t alloc_pcnt, last_pcnt;
 935         caddr_t addr;
 936         char *str;
 937 
 938         str = "i_cpr_storage_data_alloc:";
 939         if (retry_cnt == 0) {
 940                 /*
 941                  * common compression ratio is about 3:1
 942                  * initial storage allocation is estimated at 40%
 943                  * to cover the majority of cases
 944                  */
 945                 alloc_pcnt = INITIAL_ALLOC_PCNT;
 946                 *alloc_pages = (pages * alloc_pcnt) / INTEGRAL;
 947                 CPR_DEBUG(CPR_DEBUG7, "%s sensitive pages: %ld\n", str, pages);
 948                 CPR_DEBUG(CPR_DEBUG7,
 949                     "%s initial est pages: %ld, alloc %ld%%\n",
 950                     str, *alloc_pages, alloc_pcnt);
 951         } else {
 952                 /*
 953                  * calculate the prior compression percentage (x100)
 954                  * from the last attempt to save sensitive pages
 955                  */
 956                 ASSERT(sensitive_pages_saved != 0);
 957                 last_pcnt = (mmu_btopr(sensitive_size_saved) * INTEGRAL) /
 958                     sensitive_pages_saved;
 959                 CPR_DEBUG(CPR_DEBUG7, "%s last ratio %ld%%\n", str, last_pcnt);
 960 
 961                 /*
 962                  * new estimated storage size is based on
 963                  * the larger ratio + 5% for each retry:
 964                  * pages * (last + [5%, 10%])
 965                  */
 966                 alloc_pcnt = MAX(last_pcnt, INITIAL_ALLOC_PCNT) +
 967                     (retry_cnt * 5);
 968                 *alloc_pages = (pages * alloc_pcnt) / INTEGRAL;
 969                 CPR_DEBUG(CPR_DEBUG7, "%s Retry est pages: %ld, alloc %ld%%\n",
 970                     str, *alloc_pages, alloc_pcnt);
 971         }
 972 
 973         addr = kmem_alloc(mmu_ptob(*alloc_pages), KM_NOSLEEP);
 974         CPR_DEBUG(CPR_DEBUG7, "%s alloc %ld pages\n", str, *alloc_pages);
 975         return (addr);
 976 }
 977 
 978 
 979 void
 980 i_cpr_storage_free(void)
 981 {
 982         /* Free descriptors */
 983         if (i_cpr_storage_desc_base) {
 984                 kmem_free(i_cpr_storage_desc_base,
 985                     mmu_ptob(i_cpr_storage_desc_pgcnt));
 986                 i_cpr_storage_desc_base = NULL;
 987                 i_cpr_storage_desc_pgcnt = 0;
 988         }
 989 
 990 
 991         /* Data storage */
 992         if (i_cpr_storage_data_base) {
 993                 kmem_free(i_cpr_storage_data_base,
 994                     mmu_ptob(i_cpr_storage_data_sz));
 995                 i_cpr_storage_data_base = NULL;
 996                 i_cpr_storage_data_sz = 0;
 997         }
 998 }
 999 
1000 
1001 /*
1002  * This routine is derived from cpr_compress_and_write().
1003  * 1. Do bookkeeping in the descriptor for the contiguous sensitive chunk.
1004  * 2. Compress and save the clean sensitive pages into the storage area.
1005  */
1006 int
1007 i_cpr_compress_and_save(int chunks, pfn_t spfn, pgcnt_t pages)
1008 {
1009         extern char *cpr_compress_pages(cpd_t *, pgcnt_t, int);
1010         extern caddr_t i_cpr_storage_data_end;
1011         uint_t remaining, datalen;
1012         uint32_t test_usum;
1013         char *datap;
1014         csd_t *descp;
1015         cpd_t cpd;
1016         int error;
1017 
1018         /*
1019          * Fill next empty storage descriptor
1020          */
1021         descp = i_cpr_storage_desc_base + chunks - 1;
1022         if (descp >= i_cpr_storage_desc_end) {
1023                 CPR_DEBUG(CPR_DEBUG1, "ran out of descriptors, base 0x%p, "
1024                     "chunks %d, end 0x%p, descp 0x%p\n",
1025                     (void *)i_cpr_storage_desc_base, chunks,
1026                     (void *)i_cpr_storage_desc_end, (void *)descp);
1027                 return (-1);
1028         }
1029         ASSERT(descp->csd_dirty_spfn == (uint_t)-1);
1030         i_cpr_storage_desc_last_used = descp;
1031 
1032         descp->csd_dirty_spfn = spfn;
1033         descp->csd_dirty_npages = pages;
1034 
1035         i_cpr_mapin(CPR->c_mapping_area, pages, spfn);
1036 
1037         /*
1038          * try compressing pages and copy cpd fields
1039          * pfn is copied for debug use
1040          */
1041         cpd.cpd_pfn = spfn;
1042         datap = cpr_compress_pages(&cpd, pages, C_COMPRESSING);
1043         datalen = cpd.cpd_length;
1044         descp->csd_clean_compressed = (cpd.cpd_flag & CPD_COMPRESS);
1045 #ifdef DEBUG
1046         descp->csd_usum = cpd.cpd_usum;
1047         descp->csd_csum = cpd.cpd_csum;
1048 #endif
1049 
1050         error = 0;
1051 
1052         /*
1053          * Save the raw or compressed data to the storage area pointed to by
1054          * sensitive_write_ptr. Make sure the storage space is big enough to
1055          * hold the result. Otherwise roll back to increase the storage space.
1056          */
1057         descp->csd_clean_sva = (cpr_ptr)sensitive_write_ptr;
1058         descp->csd_clean_sz = datalen;
1059         if ((sensitive_write_ptr + datalen) < i_cpr_storage_data_end) {
1060                 extern  void cprbcopy(void *, void *, size_t);
1061 
1062                 cprbcopy(datap, sensitive_write_ptr, datalen);
1063                 sensitive_size_saved += datalen;
1064                 sensitive_pages_saved += descp->csd_dirty_npages;
1065                 sensitive_write_ptr += datalen;
1066         } else {
1067                 remaining = (i_cpr_storage_data_end - sensitive_write_ptr);
1068                 CPR_DEBUG(CPR_DEBUG1, "i_cpr_compress_and_save: The storage "
1069                     "space is too small!\ngot %d, want %d\n\n",
1070                     remaining, (remaining + datalen));
1071 #ifdef  DEBUG
1072                 /*
1073                  * Check to see if the content of the sensitive pages that we
1074                  * just copied have changed during this small time window.
1075                  */
1076                 test_usum = checksum32(CPR->c_mapping_area, mmu_ptob(pages));
1077                 descp->csd_usum = cpd.cpd_usum;
1078                 if (test_usum != descp->csd_usum) {
1079                         CPR_DEBUG(CPR_DEBUG1, "\nWARNING: "
1080                             "i_cpr_compress_and_save: "
1081                             "Data in the range of pfn 0x%lx to pfn "
1082                             "0x%lx has changed after they are saved "
1083                             "into storage.", spfn, (spfn + pages - 1));
1084                 }
1085 #endif
1086                 error = ENOMEM;
1087         }
1088 
1089         i_cpr_mapout(CPR->c_mapping_area, pages);
1090         return (error);
1091 }
1092 
1093 
1094 /*
1095  * This routine is derived from cpr_count_kpages().
1096  * It goes through kernel data nucleus and segkmem segments to select
1097  * pages in use and mark them in the corresponding bitmap.
1098  */
1099 pgcnt_t
1100 i_cpr_count_sensitive_kpages(int mapflag, bitfunc_t bitfunc)
1101 {
1102         pgcnt_t kdata_cnt = 0, segkmem_cnt = 0;
1103         extern caddr_t e_moddata;
1104         extern struct seg kvalloc;
1105         extern struct seg kmem64;
1106         size_t size;
1107 
1108         /*
1109          * Kernel data nucleus pages
1110          */
1111         size = e_moddata - s_data;
1112         kdata_cnt += cpr_count_pages(s_data, size,
1113             mapflag, bitfunc, DBG_SHOWRANGE);
1114 
1115         /*
1116          * kvseg and kvalloc pages
1117          */
1118         segkmem_cnt += cpr_scan_kvseg(mapflag, bitfunc, &kvseg);
1119         segkmem_cnt += cpr_count_pages(kvalloc.s_base, kvalloc.s_size,
1120             mapflag, bitfunc, DBG_SHOWRANGE);
1121 
1122         /* segment to support kernel memory usage above 32-bit space (4GB) */
1123         if (kmem64.s_base)
1124                 segkmem_cnt += cpr_count_pages(kmem64.s_base, kmem64.s_size,
1125                     mapflag, bitfunc, DBG_SHOWRANGE);
1126 
1127         CPR_DEBUG(CPR_DEBUG7, "\ni_cpr_count_sensitive_kpages:\n"
1128             "\tkdata_cnt %ld + segkmem_cnt %ld = %ld pages\n",
1129             kdata_cnt, segkmem_cnt, kdata_cnt + segkmem_cnt);
1130 
1131         return (kdata_cnt + segkmem_cnt);
1132 }
1133 
1134 
1135 pgcnt_t
1136 i_cpr_count_storage_pages(int mapflag, bitfunc_t bitfunc)
1137 {
1138         pgcnt_t count = 0;
1139 
1140         if (i_cpr_storage_desc_base) {
1141                 count += cpr_count_pages((caddr_t)i_cpr_storage_desc_base,
1142                     (size_t)mmu_ptob(i_cpr_storage_desc_pgcnt),
1143                     mapflag, bitfunc, DBG_SHOWRANGE);
1144         }
1145         if (i_cpr_storage_data_base) {
1146                 count += cpr_count_pages(i_cpr_storage_data_base,
1147                     (size_t)mmu_ptob(i_cpr_storage_data_sz),
1148                     mapflag, bitfunc, DBG_SHOWRANGE);
1149         }
1150         return (count);
1151 }
1152 
1153 
1154 /*
1155  * Derived from cpr_write_statefile().
1156  * Allocate (or reallocate after exhausting the supply) descriptors for each
1157  * chunk of contiguous sensitive kpages.
1158  */
1159 static int
1160 i_cpr_storage_desc_alloc(csd_t **basepp, pgcnt_t *pgsp, csd_t **endpp,
1161     int retry)
1162 {
1163         pgcnt_t npages;
1164         int chunks;
1165         csd_t   *descp, *end;
1166         size_t  len;
1167         char *str = "i_cpr_storage_desc_alloc:";
1168 
1169         /*
1170          * On initial allocation, add some extra to cover overhead caused
1171          * by the allocation for the storage area later.
1172          */
1173         if (retry == 0) {
1174                 chunks = cpr_contig_pages(NULL, STORAGE_DESC_ALLOC) +
1175                     EXTRA_DESCS;
1176                 npages = mmu_btopr(sizeof (**basepp) * (pgcnt_t)chunks);
1177                 CPR_DEBUG(CPR_DEBUG7, "%s chunks %d, ", str, chunks);
1178         } else {
1179                 CPR_DEBUG(CPR_DEBUG7, "%s retry %d: ", str, retry);
1180                 npages = *pgsp + 1;
1181         }
1182         /* Free old descriptors, if any */
1183         if (*basepp)
1184                 kmem_free((caddr_t)*basepp, mmu_ptob(*pgsp));
1185 
1186         descp = *basepp = kmem_alloc(mmu_ptob(npages), KM_NOSLEEP);
1187         if (descp == NULL) {
1188                 CPR_DEBUG(CPR_DEBUG7, "%s no space for descriptors!\n", str);
1189                 return (ENOMEM);
1190         }
1191 
1192         *pgsp = npages;
1193         len = mmu_ptob(npages);
1194         end = *endpp = descp + (len / (sizeof (**basepp)));
1195         CPR_DEBUG(CPR_DEBUG7, "npages 0x%lx, len 0x%lx, items 0x%lx\n\t*basepp "
1196             "%p, *endpp %p\n", npages, len, (len / (sizeof (**basepp))),
1197             (void *)*basepp, (void *)*endpp);
1198         i_cpr_storage_desc_init(descp, npages, end);
1199         return (0);
1200 }
1201 
1202 static void
1203 i_cpr_storage_desc_init(csd_t *descp, pgcnt_t npages, csd_t *end)
1204 {
1205         size_t  len = mmu_ptob(npages);
1206 
1207         /* Initialize the descriptors to something impossible. */
1208         bzero(descp, len);
1209 #ifdef  DEBUG
1210         /*
1211          * This condition is tested by an ASSERT
1212          */
1213         for (; descp < end; descp++)
1214                 descp->csd_dirty_spfn = (uint_t)-1;
1215 #endif
1216 }
1217 
1218 int
1219 i_cpr_dump_sensitive_kpages(vnode_t *vp)
1220 {
1221         int     error = 0;
1222         uint_t  spin_cnt = 0;
1223         csd_t   *descp;
1224 
1225         /*
1226          * These following two variables need to be reinitialized
1227          * for each cpr cycle.
1228          */
1229         i_cpr_sensitive_bytes_dumped = 0;
1230         i_cpr_sensitive_pgs_dumped = 0;
1231 
1232         if (i_cpr_storage_desc_base) {
1233                 for (descp = i_cpr_storage_desc_base;
1234                     descp <= i_cpr_storage_desc_last_used; descp++) {
1235                         if (error = cpr_dump_sensitive(vp, descp))
1236                                 return (error);
1237                         spin_cnt++;
1238                         if ((spin_cnt & 0x5F) == 1)
1239                                 cpr_spinning_bar();
1240                 }
1241                 prom_printf(" \b");
1242         }
1243 
1244         CPR_DEBUG(CPR_DEBUG7, "\ni_cpr_dump_sensitive_kpages: dumped %ld\n",
1245             i_cpr_sensitive_pgs_dumped);
1246         return (0);
1247 }
1248 
1249 
1250 /*
1251  * 1. Fill the cpr page descriptor with the info of the dirty pages
1252  *    and
1253  *    write the descriptor out. It will be used at resume.
1254  * 2. Write the clean data in stead of the dirty data out.
1255  *    Note: to save space, the clean data is already compressed.
1256  */
1257 static int
1258 cpr_dump_sensitive(vnode_t *vp, csd_t *descp)
1259 {
1260         int error = 0;
1261         caddr_t datap;
1262         cpd_t cpd;      /* cpr page descriptor */
1263         pfn_t   dirty_spfn;
1264         pgcnt_t dirty_npages;
1265         size_t clean_sz;
1266         caddr_t clean_sva;
1267         int     clean_compressed;
1268         extern uchar_t cpr_pagecopy[];
1269 
1270         dirty_spfn = descp->csd_dirty_spfn;
1271         dirty_npages = descp->csd_dirty_npages;
1272         clean_sva = (caddr_t)descp->csd_clean_sva;
1273         clean_sz = descp->csd_clean_sz;
1274         clean_compressed = descp->csd_clean_compressed;
1275 
1276         /* Fill cpr page descriptor. */
1277         cpd.cpd_magic = (uint_t)CPR_PAGE_MAGIC;
1278         cpd.cpd_pfn = dirty_spfn;
1279         cpd.cpd_flag = 0;  /* must init to zero */
1280         cpd.cpd_pages = dirty_npages;
1281 
1282 #ifdef  DEBUG
1283         if ((cpd.cpd_usum = descp->csd_usum) != 0)
1284                 cpd.cpd_flag |= CPD_USUM;
1285         if ((cpd.cpd_csum = descp->csd_csum) != 0)
1286                 cpd.cpd_flag |= CPD_CSUM;
1287 #endif
1288 
1289         STAT->cs_dumped_statefsz += mmu_ptob(dirty_npages);
1290 
1291         /*
1292          * The sensitive kpages are usually saved with compression
1293          * unless compression could not reduce the size of the data.
1294          * If user choose not to have the statefile compressed,
1295          * we need to decompress the data back before dumping it to disk.
1296          */
1297         if (CPR->c_flags & C_COMPRESSING) {
1298                 cpd.cpd_length = clean_sz;
1299                 datap = clean_sva;
1300                 if (clean_compressed)
1301                         cpd.cpd_flag |= CPD_COMPRESS;
1302         } else {
1303                 if (clean_compressed) {
1304                         cpd.cpd_length = decompress(clean_sva, cpr_pagecopy,
1305                             clean_sz, mmu_ptob(dirty_npages));
1306                         datap = (caddr_t)cpr_pagecopy;
1307                         ASSERT(cpd.cpd_length == mmu_ptob(dirty_npages));
1308                 } else {
1309                         cpd.cpd_length = clean_sz;
1310                         datap = clean_sva;
1311                 }
1312                 cpd.cpd_csum = 0;
1313         }
1314 
1315         /* Write cpr page descriptor */
1316         error = cpr_write(vp, (caddr_t)&cpd, sizeof (cpd));
1317         if (error) {
1318                 CPR_DEBUG(CPR_DEBUG7, "descp: %p\n", (void *)descp);
1319 #ifdef DEBUG
1320                 debug_enter("cpr_dump_sensitive: cpr_write() page "
1321                     "descriptor failed!\n");
1322 #endif
1323                 return (error);
1324         }
1325 
1326         i_cpr_sensitive_bytes_dumped += sizeof (cpd_t);
1327 
1328         /* Write page data */
1329         error = cpr_write(vp, (caddr_t)datap, cpd.cpd_length);
1330         if (error) {
1331                 CPR_DEBUG(CPR_DEBUG7, "error: %x\n", error);
1332                 CPR_DEBUG(CPR_DEBUG7, "descp: %p\n", (void *)descp);
1333                 CPR_DEBUG(CPR_DEBUG7, "cpr_write(%p, %p , %lx)\n",
1334                     (void *)vp, (void *)datap, cpd.cpd_length);
1335 #ifdef DEBUG
1336                 debug_enter("cpr_dump_sensitive: cpr_write() data failed!\n");
1337 #endif
1338                 return (error);
1339         }
1340 
1341         i_cpr_sensitive_bytes_dumped += cpd.cpd_length;
1342         i_cpr_sensitive_pgs_dumped += dirty_npages;
1343 
1344         return (error);
1345 }
1346 
1347 
1348 /*
1349  * Sanity check to make sure that we have dumped right amount
1350  * of pages from different sources to statefile.
1351  */
1352 int
1353 i_cpr_check_pgs_dumped(uint_t pgs_expected, uint_t regular_pgs_dumped)
1354 {
1355         uint_t total_pgs_dumped;
1356 
1357         total_pgs_dumped = regular_pgs_dumped + i_cpr_sensitive_pgs_dumped;
1358 
1359         CPR_DEBUG(CPR_DEBUG7, "\ncheck_pgs: reg %d + sens %ld = %d, "
1360             "expect %d\n\n", regular_pgs_dumped, i_cpr_sensitive_pgs_dumped,
1361             total_pgs_dumped, pgs_expected);
1362 
1363         if (pgs_expected == total_pgs_dumped)
1364                 return (0);
1365 
1366         return (EINVAL);
1367 }
1368 
1369 
1370 int
1371 i_cpr_reusefini(void)
1372 {
1373         struct vnode *vp;
1374         cdef_t *cdef;
1375         size_t size;
1376         char *bufp;
1377         int rc;
1378 
1379         if (cpr_reusable_mode)
1380                 cpr_reusable_mode = 0;
1381 
1382         if (rc = cpr_open_deffile(FREAD|FWRITE, &vp)) {
1383                 if (rc == EROFS) {
1384                         cpr_err(CE_CONT, "uadmin A_FREEZE AD_REUSEFINI "
1385                             "(uadmin %d %d)\nmust be done with / mounted "
1386                             "writeable.\n", A_FREEZE, AD_REUSEFINI);
1387                 }
1388                 return (rc);
1389         }
1390 
1391         cdef = kmem_alloc(sizeof (*cdef), KM_SLEEP);
1392         rc = cpr_rdwr(UIO_READ, vp, cdef, sizeof (*cdef));
1393 
1394         if (rc) {
1395                 cpr_err(CE_WARN, "Failed reading %s, errno = %d",
1396                     cpr_default_path, rc);
1397         } else if (cdef->mini.magic != CPR_DEFAULT_MAGIC) {
1398                 cpr_err(CE_WARN, "bad magic number in %s, cannot restore "
1399                     "prom values for %s", cpr_default_path,
1400                     cpr_enumerate_promprops(&bufp, &size));
1401                 kmem_free(bufp, size);
1402                 rc = EINVAL;
1403         } else {
1404                 /*
1405                  * clean up prom properties
1406                  */
1407                 rc = cpr_update_nvram(cdef->props);
1408                 if (rc == 0) {
1409                         /*
1410                          * invalidate the disk copy and turn off reusable
1411                          */
1412                         cdef->mini.magic = 0;
1413                         cdef->mini.reusable = 0;
1414                         if (rc = cpr_rdwr(UIO_WRITE, vp,
1415                             &cdef->mini, sizeof (cdef->mini))) {
1416                                 cpr_err(CE_WARN, "Failed writing %s, errno %d",
1417                                     cpr_default_path, rc);
1418                         }
1419                 }
1420         }
1421 
1422         (void) VOP_CLOSE(vp, FREAD|FWRITE, 1, (offset_t)0, CRED(), NULL);
1423         VN_RELE(vp);
1424         kmem_free(cdef, sizeof (*cdef));
1425 
1426         return (rc);
1427 }
1428 
1429 
1430 int
1431 i_cpr_reuseinit(void)
1432 {
1433         int rc = 0;
1434 
1435         if (rc = cpr_default_setup(1))
1436                 return (rc);
1437 
1438         /*
1439          * We need to validate default file
1440          */
1441         rc = cpr_validate_definfo(1);
1442         if (rc == 0)
1443                 cpr_reusable_mode = 1;
1444         else if (rc == EROFS) {
1445                 cpr_err(CE_NOTE, "reuseinit must be performed "
1446                     "while / is mounted writeable");
1447         }
1448 
1449         (void) cpr_default_setup(0);
1450 
1451         return (rc);
1452 }
1453 
1454 
1455 int
1456 i_cpr_check_cprinfo(void)
1457 {
1458         struct vnode *vp;
1459         cmini_t mini;
1460         int rc = 0;
1461 
1462         if (rc = cpr_open_deffile(FREAD, &vp)) {
1463                 if (rc == ENOENT)
1464                         cpr_err(CE_NOTE, "cprinfo file does not "
1465                             "exist.  You must run 'uadmin %d %d' "
1466                             "command while / is mounted writeable,\n"
1467                             "then reboot and run 'uadmin %d %d' "
1468                             "to create a reusable statefile",
1469                             A_FREEZE, AD_REUSEINIT, A_FREEZE, AD_REUSABLE);
1470                 return (rc);
1471         }
1472 
1473         rc = cpr_rdwr(UIO_READ, vp, &mini, sizeof (mini));
1474         (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
1475         VN_RELE(vp);
1476 
1477         if (rc) {
1478                 cpr_err(CE_WARN, "Failed reading %s, errno = %d",
1479                     cpr_default_path, rc);
1480         } else if (mini.magic != CPR_DEFAULT_MAGIC) {
1481                 cpr_err(CE_CONT, "bad magic number in cprinfo file.\n"
1482                     "You must run 'uadmin %d %d' while / is mounted "
1483                     "writeable, then reboot and run 'uadmin %d %d' "
1484                     "to create a reusable statefile\n",
1485                     A_FREEZE, AD_REUSEINIT, A_FREEZE, AD_REUSABLE);
1486                 rc = EINVAL;
1487         }
1488 
1489         return (rc);
1490 }
1491 
1492 
1493 int
1494 i_cpr_reusable_supported(void)
1495 {
1496         return (1);
1497 }
1498 
1499 
1500 /*
1501  * find prom phys pages and alloc space for a tmp copy
1502  */
1503 static int
1504 i_cpr_find_ppages(void)
1505 {
1506         struct page *pp;
1507         struct memlist *pmem;
1508         pgcnt_t npages, pcnt, scnt, vcnt;
1509         pfn_t ppn, plast, *dst;
1510         int mapflag;
1511 
1512         cpr_clear_bitmaps();
1513         mapflag = REGULAR_BITMAP;
1514 
1515         /*
1516          * there should be a page_t for each phys page used by the kernel;
1517          * set a bit for each phys page not tracked by a page_t
1518          */
1519         pcnt = 0;
1520         memlist_read_lock();
1521         for (pmem = phys_install; pmem; pmem = pmem->ml_next) {
1522                 npages = mmu_btop(pmem->ml_size);
1523                 ppn = mmu_btop(pmem->ml_address);
1524                 for (plast = ppn + npages; ppn < plast; ppn++) {
1525                         if (page_numtopp_nolock(ppn))
1526                                 continue;
1527                         (void) cpr_setbit(ppn, mapflag);
1528                         pcnt++;
1529                 }
1530         }
1531         memlist_read_unlock();
1532 
1533         /*
1534          * clear bits for phys pages in each segment
1535          */
1536         scnt = cpr_count_seg_pages(mapflag, cpr_clrbit);
1537 
1538         /*
1539          * set bits for phys pages referenced by the promvp vnode;
1540          * these pages are mostly comprised of forthdebug words
1541          */
1542         vcnt = 0;
1543         for (pp = promvp.v_pages; pp; ) {
1544                 if (cpr_setbit(pp->p_offset, mapflag) == 0)
1545                         vcnt++;
1546                 pp = pp->p_vpnext;
1547                 if (pp == promvp.v_pages)
1548                         break;
1549         }
1550 
1551         /*
1552          * total number of prom pages are:
1553          * (non-page_t pages - seg pages + vnode pages)
1554          */
1555         ppage_count = pcnt - scnt + vcnt;
1556         CPR_DEBUG(CPR_DEBUG1,
1557             "find_ppages: pcnt %ld - scnt %ld + vcnt %ld = %ld\n",
1558             pcnt, scnt, vcnt, ppage_count);
1559 
1560         /*
1561          * alloc array of pfn_t to store phys page list
1562          */
1563         pphys_list_size = ppage_count * sizeof (pfn_t);
1564         pphys_list = kmem_alloc(pphys_list_size, KM_NOSLEEP);
1565         if (pphys_list == NULL) {
1566                 cpr_err(CE_WARN, "cannot alloc pphys_list");
1567                 return (ENOMEM);
1568         }
1569 
1570         /*
1571          * phys pages referenced in the bitmap should be
1572          * those used by the prom; scan bitmap and save
1573          * a list of prom phys page numbers
1574          */
1575         dst = pphys_list;
1576         memlist_read_lock();
1577         for (pmem = phys_install; pmem; pmem = pmem->ml_next) {
1578                 npages = mmu_btop(pmem->ml_size);
1579                 ppn = mmu_btop(pmem->ml_address);
1580                 for (plast = ppn + npages; ppn < plast; ppn++) {
1581                         if (cpr_isset(ppn, mapflag)) {
1582                                 ASSERT(dst < (pphys_list + ppage_count));
1583                                 *dst++ = ppn;
1584                         }
1585                 }
1586         }
1587         memlist_read_unlock();
1588 
1589         /*
1590          * allocate space to store prom pages
1591          */
1592         ppage_buf = kmem_alloc(mmu_ptob(ppage_count), KM_NOSLEEP);
1593         if (ppage_buf == NULL) {
1594                 kmem_free(pphys_list, pphys_list_size);
1595                 pphys_list = NULL;
1596                 cpr_err(CE_WARN, "cannot alloc ppage_buf");
1597                 return (ENOMEM);
1598         }
1599 
1600         return (0);
1601 }
1602 
1603 
1604 /*
1605  * save prom pages to kmem pages
1606  */
1607 static void
1608 i_cpr_save_ppages(void)
1609 {
1610         pfn_t *pphys, *plast;
1611         caddr_t dst;
1612 
1613         /*
1614          * map in each prom page and copy to a kmem page
1615          */
1616         dst = ppage_buf;
1617         plast = pphys_list + ppage_count;
1618         for (pphys = pphys_list; pphys < plast; pphys++) {
1619                 i_cpr_mapin(cpr_vaddr, 1, *pphys);
1620                 bcopy(cpr_vaddr, dst, MMU_PAGESIZE);
1621                 i_cpr_mapout(cpr_vaddr, 1);
1622                 dst += MMU_PAGESIZE;
1623         }
1624 
1625         CPR_DEBUG(CPR_DEBUG1, "saved %ld prom pages\n", ppage_count);
1626 }
1627 
1628 
1629 /*
1630  * restore prom pages from kmem pages
1631  */
1632 static void
1633 i_cpr_restore_ppages(void)
1634 {
1635         pfn_t *pphys, *plast;
1636         caddr_t src;
1637 
1638         dcache_flushall();
1639 
1640         /*
1641          * map in each prom page and copy from a kmem page
1642          */
1643         src = ppage_buf;
1644         plast = pphys_list + ppage_count;
1645         for (pphys = pphys_list; pphys < plast; pphys++) {
1646                 i_cpr_mapin(cpr_vaddr, 1, *pphys);
1647                 bcopy(src, cpr_vaddr, MMU_PAGESIZE);
1648                 i_cpr_mapout(cpr_vaddr, 1);
1649                 src += MMU_PAGESIZE;
1650         }
1651 
1652         dcache_flushall();
1653 
1654         CPR_DEBUG(CPR_DEBUG1, "restored %ld prom pages\n", ppage_count);
1655 }
1656 
1657 
1658 /*
1659  * save/restore prom pages or free related allocs
1660  */
1661 int
1662 i_cpr_prom_pages(int action)
1663 {
1664         int error;
1665 
1666         if (action == CPR_PROM_SAVE) {
1667                 if (ppage_buf == NULL) {
1668                         ASSERT(pphys_list == NULL);
1669                         if (error = i_cpr_find_ppages())
1670                                 return (error);
1671                         i_cpr_save_ppages();
1672                 }
1673         } else if (action == CPR_PROM_RESTORE) {
1674                 i_cpr_restore_ppages();
1675         } else if (action == CPR_PROM_FREE) {
1676                 if (pphys_list) {
1677                         ASSERT(pphys_list_size);
1678                         kmem_free(pphys_list, pphys_list_size);
1679                         pphys_list = NULL;
1680                         pphys_list_size = 0;
1681                 }
1682                 if (ppage_buf) {
1683                         ASSERT(ppage_count);
1684                         kmem_free(ppage_buf, mmu_ptob(ppage_count));
1685                         CPR_DEBUG(CPR_DEBUG1, "freed %ld prom pages\n",
1686                             ppage_count);
1687                         ppage_buf = NULL;
1688                         ppage_count = 0;
1689                 }
1690         }
1691         return (0);
1692 }
1693 
1694 
1695 /*
1696  * record tlb data for the nucleus, bigktsb's, and the cpr module;
1697  * this data is later used by cprboot to install dtlb/itlb entries.
1698  * when we jump into the cpr module during the resume phase, those
1699  * mappings are needed until switching to the kernel trap table.
1700  * to make the dtte/itte info available during resume, we need
1701  * the info recorded prior to saving sensitive pages, otherwise
1702  * all the data would appear as NULLs.
1703  */
1704 static void
1705 i_cpr_save_tlbinfo(void)
1706 {
1707         cti_t cti = {0};
1708 
1709         /*
1710          * during resume - shortly after jumping into the cpr module,
1711          * sfmmu_load_mmustate() will overwrite any dtlb entry at any
1712          * index used for TSBs; skip is set so that any saved tte will
1713          * target other tlb offsets and prevent being lost during
1714          * resume.  now scan the dtlb and save locked entries,
1715          * then add entries for the tmp stack / data page and the
1716          * cpr thread structure.
1717          */
1718         cti.dst = m_info.dtte;
1719         cti.tail = cti.dst + CPR_MAX_TLB;
1720         cti.reader = dtlb_rd_entry;
1721         cti.writer = NULL;
1722         cti.filter = i_cpr_lnb;
1723         cti.index = cpunodes[CPU->cpu_id].dtlb_size - 1;
1724 
1725         if (utsb_dtlb_ttenum != -1)
1726                 cti.skip = (1 << utsb_dtlb_ttenum);
1727 
1728         if (utsb4m_dtlb_ttenum != -1)
1729                 cti.skip |= (1 << utsb4m_dtlb_ttenum);
1730 
1731         i_cpr_scan_tlb(&cti);
1732         i_cpr_make_tte(&cti, &i_cpr_data_page, datava);
1733         i_cpr_make_tte(&cti, curthread, datava);
1734 
1735         /*
1736          * scan itlb and save locked entries; add an entry for
1737          * the first text page of the cpr module; cprboot will
1738          * jump to that page after restoring kernel pages.
1739          */
1740         cti.dst = m_info.itte;
1741         cti.tail = cti.dst + CPR_MAX_TLB;
1742         cti.reader = itlb_rd_entry;
1743         cti.index = cpunodes[CPU->cpu_id].itlb_size - 1;
1744         cti.skip = 0;
1745         i_cpr_scan_tlb(&cti);
1746         i_cpr_make_tte(&cti, (void *)i_cpr_resume_setup, textva);
1747 }
1748 
1749 
1750 /* ARGSUSED */
1751 int
1752 i_cpr_dump_setup(vnode_t *vp)
1753 {
1754         /*
1755          * zero out m_info and add info to dtte/itte arrays
1756          */
1757         bzero(&m_info, sizeof (m_info));
1758         i_cpr_save_tlbinfo();
1759         return (0);
1760 }
1761 
1762 
1763 int
1764 i_cpr_is_supported(int sleeptype)
1765 {
1766         char es_prop[] = "energystar-v2";
1767         pnode_t node;
1768         int last;
1769         extern int cpr_supported_override;
1770         extern int cpr_platform_enable;
1771 
1772         if (sleeptype != CPR_TODISK)
1773                 return (0);
1774 
1775         /*
1776          * The next statement tests if a specific platform has turned off
1777          * cpr support.
1778          */
1779         if (cpr_supported_override)
1780                 return (0);
1781 
1782         /*
1783          * Do not inspect energystar-v* property if a platform has
1784          * specifically turned on cpr support
1785          */
1786         if (cpr_platform_enable)
1787                 return (1);
1788 
1789         node = prom_rootnode();
1790         if (prom_getproplen(node, es_prop) != -1)
1791                 return (1);
1792         last = strlen(es_prop) - 1;
1793         es_prop[last] = '3';
1794         return (prom_getproplen(node, es_prop) != -1);
1795 }
1796 
1797 
1798 /*
1799  * the actual size of the statefile data isn't known until after all the
1800  * compressed pages are written; even the inode size doesn't reflect the
1801  * data size since there are usually many extra fs blocks.  for recording
1802  * the actual data size, the first sector of the statefile is copied to
1803  * a tmp buf, and the copy is later updated and flushed to disk.
1804  */
1805 int
1806 i_cpr_blockzero(char *base, char **bufpp, int *blkno, vnode_t *vp)
1807 {
1808         extern int cpr_flush_write(vnode_t *);
1809         static char cpr_sector[DEV_BSIZE];
1810         cpr_ext bytes, *dst;
1811 
1812         /*
1813          * this routine is called after cdd_t and csu_md_t are copied
1814          * to cpr_buf; mini-hack alert: the save/update method creates
1815          * a dependency on the combined struct size being >= one sector
1816          * or DEV_BSIZE; since introduction in Sol2.7, csu_md_t size is
1817          * over 1K bytes and will probably grow with any changes.
1818          *
1819          * copy when vp is NULL, flush when non-NULL
1820          */
1821         if (vp == NULL) {
1822                 ASSERT((*bufpp - base) >= DEV_BSIZE);
1823                 bcopy(base, cpr_sector, sizeof (cpr_sector));
1824                 return (0);
1825         } else {
1826                 bytes = dbtob(*blkno);
1827                 dst = &((cdd_t *)cpr_sector)->cdd_filesize;
1828                 bcopy(&bytes, dst, sizeof (bytes));
1829                 bcopy(cpr_sector, base, sizeof (cpr_sector));
1830                 *bufpp = base + sizeof (cpr_sector);
1831                 *blkno = cpr_statefile_offset();
1832                 CPR_DEBUG(CPR_DEBUG1, "statefile data size: %ld\n\n", bytes);
1833                 return (cpr_flush_write(vp));
1834         }
1835 }
1836 
1837 
1838 /*
1839  * Allocate bitmaps according to the phys_install list.
1840  */
1841 static int
1842 i_cpr_bitmap_setup(void)
1843 {
1844         struct memlist *pmem;
1845         cbd_t *dp, *tail;
1846         void *space;
1847         size_t size;
1848 
1849         /*
1850          * The number of bitmap descriptors will be the count of
1851          * phys_install ranges plus 1 for a trailing NULL struct.
1852          */
1853         cpr_nbitmaps = 1;
1854         for (pmem = phys_install; pmem; pmem = pmem->ml_next)
1855                 cpr_nbitmaps++;
1856 
1857         if (cpr_nbitmaps > (CPR_MAX_BMDESC - 1)) {
1858                 cpr_err(CE_WARN, "too many physical memory ranges %d, max %d",
1859                     cpr_nbitmaps, CPR_MAX_BMDESC - 1);
1860                 return (EFBIG);
1861         }
1862 
1863         /* Alloc an array of bitmap descriptors. */
1864         dp = kmem_zalloc(cpr_nbitmaps * sizeof (*dp), KM_NOSLEEP);
1865         if (dp == NULL) {
1866                 cpr_nbitmaps = 0;
1867                 return (ENOMEM);
1868         }
1869         tail = dp + cpr_nbitmaps;
1870 
1871         CPR->c_bmda = dp;
1872         for (pmem = phys_install; pmem; pmem = pmem->ml_next) {
1873                 size = BITMAP_BYTES(pmem->ml_size);
1874                 space = kmem_zalloc(size * 2, KM_NOSLEEP);
1875                 if (space == NULL)
1876                         return (ENOMEM);
1877                 ASSERT(dp < tail);
1878                 dp->cbd_magic = CPR_BITMAP_MAGIC;
1879                 dp->cbd_spfn = mmu_btop(pmem->ml_address);
1880                 dp->cbd_epfn = mmu_btop(pmem->ml_address + pmem->ml_size) - 1;
1881                 dp->cbd_size = size;
1882                 dp->cbd_reg_bitmap = (cpr_ptr)space;
1883                 dp->cbd_vlt_bitmap = (cpr_ptr)((caddr_t)space + size);
1884                 dp++;
1885         }
1886 
1887         /* set magic for the last descriptor */
1888         ASSERT(dp == (tail - 1));
1889         dp->cbd_magic = CPR_BITMAP_MAGIC;
1890 
1891         return (0);
1892 }
1893 
1894 
1895 void
1896 i_cpr_bitmap_cleanup(void)
1897 {
1898         cbd_t *dp;
1899 
1900         if (CPR->c_bmda == NULL)
1901                 return;
1902         for (dp = CPR->c_bmda; dp->cbd_size; dp++)
1903                 kmem_free((void *)dp->cbd_reg_bitmap, dp->cbd_size * 2);
1904         kmem_free(CPR->c_bmda, cpr_nbitmaps * sizeof (*CPR->c_bmda));
1905         CPR->c_bmda = NULL;
1906         cpr_nbitmaps = 0;
1907 }
1908 
1909 
1910 /*
1911  * A "regular" and "volatile" bitmap are created for each range of
1912  * physical memory.  The volatile maps are used to count and track pages
1913  * susceptible to heap corruption - caused by drivers that allocate mem
1914  * during VOP_DUMP(); the regular maps are used for all the other non-
1915  * susceptible pages.  Before writing the bitmaps to the statefile,
1916  * each bitmap pair gets merged to simplify handling within cprboot.
1917  */
1918 int
1919 i_cpr_alloc_bitmaps(void)
1920 {
1921         int err;
1922 
1923         memlist_read_lock();
1924         err = i_cpr_bitmap_setup();
1925         memlist_read_unlock();
1926         if (err)
1927                 i_cpr_bitmap_cleanup();
1928         return (err);
1929 }
1930 
1931 
1932 
1933 /*
1934  * Power down the system.
1935  */
1936 int
1937 i_cpr_power_down(int sleeptype)
1938 {
1939         int is_defined = 0;
1940         char *wordexists = "p\" power-off\" find nip swap l! ";
1941         char *req = "power-off";
1942 
1943         ASSERT(sleeptype == CPR_TODISK);
1944 
1945         /*
1946          * is_defined has value -1 when defined
1947          */
1948         prom_interpret(wordexists, (uintptr_t)&is_defined, 0, 0, 0, 0);
1949         if (is_defined) {
1950                 CPR_DEBUG(CPR_DEBUG1, "\ncpr: %s...\n", req);
1951                 prom_interpret(req, 0, 0, 0, 0, 0);
1952         }
1953         /*
1954          * Only returns if failed
1955          */
1956         return (EIO);
1957 }
1958 
1959 void
1960 i_cpr_stop_other_cpus(void)
1961 {
1962         stop_other_cpus();
1963 }
1964 
1965 /*
1966  *      Save context for the specified CPU
1967  */
1968 /* ARGSUSED */
1969 void *
1970 i_cpr_save_context(void *arg)
1971 {
1972         /*
1973          * Not yet
1974          */
1975         ASSERT(0);
1976         return (NULL);
1977 }
1978 
1979 void
1980 i_cpr_pre_resume_cpus(void)
1981 {
1982         /*
1983          * Not yet
1984          */
1985         ASSERT(0);
1986 }
1987 
1988 void
1989 i_cpr_post_resume_cpus(void)
1990 {
1991         /*
1992          * Not yet
1993          */
1994         ASSERT(0);
1995 }
1996 
1997 /*
1998  * nothing to do
1999  */
2000 void
2001 i_cpr_alloc_cpus(void)
2002 {
2003 }
2004 
2005 /*
2006  * nothing to do
2007  */
2008 void
2009 i_cpr_free_cpus(void)
2010 {
2011 }
2012 
2013 /* ARGSUSED */
2014 void
2015 i_cpr_save_configuration(dev_info_t *dip)
2016 {
2017         /*
2018          * this is a no-op on sparc
2019          */
2020 }
2021 
2022 /* ARGSUSED */
2023 void
2024 i_cpr_restore_configuration(dev_info_t *dip)
2025 {
2026         /*
2027          * this is a no-op on sparc
2028          */
2029 }