Print this page
XXXX pass in cpu_pause_func via pause_cpus
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/sun4v/os/suspend.c
+++ new/usr/src/uts/sun4v/os/suspend.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 */
24 24
25 25 #include <sys/mutex.h>
26 26 #include <sys/cpuvar.h>
27 27 #include <sys/cyclic.h>
28 28 #include <sys/disp.h>
29 29 #include <sys/ddi.h>
30 30 #include <sys/wdt.h>
31 31 #include <sys/callb.h>
32 32 #include <sys/cmn_err.h>
33 33 #include <sys/hypervisor_api.h>
34 34 #include <sys/membar.h>
35 35 #include <sys/x_call.h>
36 36 #include <sys/promif.h>
37 37 #include <sys/systm.h>
38 38 #include <sys/mach_descrip.h>
39 39 #include <sys/cpu_module.h>
40 40 #include <sys/pg.h>
41 41 #include <sys/lgrp.h>
42 42 #include <sys/sysmacros.h>
43 43 #include <sys/sunddi.h>
44 44 #include <sys/cpupart.h>
45 45 #include <sys/hsvc.h>
46 46 #include <sys/mpo.h>
47 47 #include <vm/hat_sfmmu.h>
48 48 #include <sys/time.h>
49 49 #include <sys/clock.h>
50 50
51 51 /*
52 52 * Sun4v OS Suspend
53 53 *
54 54 * Provides a means to suspend a sun4v guest domain by pausing CPUs and then
55 55 * calling into the HV to initiate a suspension. Suspension is sequenced
56 56 * externally by calling suspend_pre, suspend_start, and suspend_post.
57 57 * suspend_pre and suspend_post are meant to perform any special operations
58 58 * that should be done before or after a suspend/resume operation. e.g.,
59 59 * callbacks to cluster software to disable heartbeat monitoring before the
60 60 * system is suspended. suspend_start prepares kernel services to be suspended
61 61 * and then suspends the domain by calling hv_guest_suspend.
62 62 *
63 63 * Special Handling for %tick and %stick Registers
64 64 *
65 65 * After a suspend/resume operation, the %tick and %stick registers may have
66 66 * jumped forwards or backwards. The delta is assumed to be consistent across
67 67 * all CPUs, within the negligible level of %tick and %stick variation
68 68 * acceptable on a cold boot. In order to maintain increasing %tick and %stick
69 69 * counter values without exposing large positive or negative jumps to kernel
70 70 * or user code, a %tick and %stick offset is used. Kernel reads of these
71 71 * counters return the sum of the hardware register counter and offset
72 72 * variable. After a suspend/resume operation, user reads of %tick or %stick
73 73 * are emulated. Suspend code enables emulation by setting the
74 74 * %{tick,stick}.NPT fields which trigger a privileged instruction access
75 75 * trap whenever the registers are read from user mode. If emulation has been
76 76 * enabled, the trap handler emulates the instruction. Emulation is only
77 77 * enabled during a successful suspend/resume operation. When emulation is
78 78 * enabled, CPUs that are DR'd into the system will have their
79 79 * %{tick,stick}.NPT bits set to 1 as well.
80 80 */
81 81
82 82 extern u_longlong_t gettick(void); /* returns %stick */
83 83 extern uint64_t gettick_counter(void); /* returns %tick */
84 84 extern uint64_t gettick_npt(void);
85 85 extern uint64_t getstick_npt(void);
86 86 extern int mach_descrip_update(void);
87 87 extern cpuset_t cpu_ready_set;
88 88 extern uint64_t native_tick_offset;
89 89 extern uint64_t native_stick_offset;
90 90 extern uint64_t sys_tick_freq;
91 91
92 92 /*
93 93 * Global Sun Cluster pre/post callbacks.
94 94 */
95 95 const char *(*cl_suspend_error_decode)(int);
96 96 int (*cl_suspend_pre_callback)(void);
97 97 int (*cl_suspend_post_callback)(void);
98 98 #define SC_PRE_FAIL_STR_FMT "Sun Cluster pre-suspend failure: %d"
99 99 #define SC_POST_FAIL_STR_FMT "Sun Cluster post-suspend failure: %d"
100 100 #define SC_FAIL_STR_MAX 256
101 101
102 102 /*
103 103 * The minimum major and minor version of the HSVC_GROUP_CORE API group
104 104 * required in order to use OS suspend.
105 105 */
106 106 #define SUSPEND_CORE_MAJOR 1
107 107 #define SUSPEND_CORE_MINOR 2
108 108
109 109 /*
110 110 * By default, sun4v OS suspend is supported if the required HV version
111 111 * is present. suspend_disabled should be set on platforms that do not
112 112 * allow OS suspend regardless of whether or not the HV supports it.
113 113 * It can also be set in /etc/system.
114 114 */
115 115 static int suspend_disabled = 0;
116 116
117 117 /*
118 118 * Controls whether or not user-land tick and stick register emulation
119 119 * will be enabled following a successful suspend operation.
120 120 */
121 121 static int enable_user_tick_stick_emulation = 1;
122 122
123 123 /*
124 124 * Indicates whether or not tick and stick emulation is currently active.
125 125 * After a successful suspend operation, if emulation is enabled, this
126 126 * variable is set to B_TRUE. Global scope to allow emulation code to
127 127 * check if emulation is active.
128 128 */
129 129 boolean_t tick_stick_emulation_active = B_FALSE;
130 130
131 131 /*
132 132 * When non-zero, after a successful suspend and resume, cpunodes, CPU HW
133 133 * sharing data structures, and processor groups will be updated using
134 134 * information from the updated MD.
135 135 */
136 136 static int suspend_update_cpu_mappings = 1;
137 137
138 138 /*
139 139 * The maximum number of microseconds by which the %tick or %stick register
140 140 * can vary between any two CPUs in the system. To calculate the
141 141 * native_stick_offset and native_tick_offset, we measure the change in these
142 142 * registers on one CPU over a suspend/resume. Other CPUs may experience
143 143 * slightly larger or smaller changes. %tick and %stick should be synchronized
144 144 * between CPUs, but there may be some variation. So we add an additional value
145 145 * derived from this variable to ensure that these registers always increase
146 146 * over a suspend/resume operation, assuming all %tick and %stick registers
147 147 * are synchronized (within a certain limit) across CPUs in the system. The
148 148 * delta between %sticks on different CPUs should be a small number of cycles,
149 149 * not perceptible to readers of %stick that migrate between CPUs. We set this
150 150 * value to 1 millisecond which means that over a suspend/resume operation,
151 151 * all CPU's %tick and %stick will advance forwards as long as, across all
152 152 * CPUs, the %tick and %stick are synchronized to within 1 ms. This applies to
153 153 * CPUs before the suspend and CPUs after the resume. 1 ms is conservative,
154 154 * but small enough to not trigger TOD faults.
155 155 */
156 156 static uint64_t suspend_tick_stick_max_delta = 1000; /* microseconds */
157 157
158 158 /*
159 159 * The number of times the system has been suspended and resumed.
160 160 */
161 161 static uint64_t suspend_count = 0;
162 162
163 163 /*
164 164 * DBG and DBG_PROM() macro.
165 165 */
166 166 #ifdef DEBUG
167 167
168 168 static int suspend_debug_flag = 0;
169 169
170 170 #define DBG_PROM \
171 171 if (suspend_debug_flag) \
172 172 prom_printf
173 173
174 174 #define DBG \
175 175 if (suspend_debug_flag) \
176 176 suspend_debug
177 177
178 178 static void
179 179 suspend_debug(const char *fmt, ...)
180 180 {
181 181 char buf[512];
182 182 va_list ap;
183 183
184 184 va_start(ap, fmt);
185 185 (void) vsprintf(buf, fmt, ap);
186 186 va_end(ap);
187 187
188 188 cmn_err(CE_NOTE, "%s", buf);
189 189 }
190 190
191 191 #else /* DEBUG */
192 192
193 193 #define DBG_PROM
194 194 #define DBG
195 195
196 196 #endif /* DEBUG */
197 197
198 198 /*
199 199 * Return true if the HV supports OS suspend and if suspend has not been
200 200 * disabled on this platform.
201 201 */
202 202 boolean_t
203 203 suspend_supported(void)
204 204 {
205 205 uint64_t major, minor;
206 206
207 207 if (suspend_disabled)
208 208 return (B_FALSE);
209 209
210 210 if (hsvc_version(HSVC_GROUP_CORE, &major, &minor) != 0)
211 211 return (B_FALSE);
212 212
213 213 return ((major == SUSPEND_CORE_MAJOR && minor >= SUSPEND_CORE_MINOR) ||
214 214 (major > SUSPEND_CORE_MAJOR));
215 215 }
216 216
217 217 /*
218 218 * Memory DR is not permitted if the system has been suspended and resumed.
219 219 * It is the responsibility of the caller of suspend_start and the DR
220 220 * subsystem to serialize DR operations and suspend_memdr_allowed() checks.
221 221 */
222 222 boolean_t
223 223 suspend_memdr_allowed(void)
224 224 {
225 225 return (suspend_count == 0);
226 226 }
227 227
228 228 /*
229 229 * Given a source tick, stick, and tod value, set the tick and stick offsets
230 230 * such that the (current physical register value) + offset == (source value)
231 231 * and in addition account for some variation between the %tick/%stick on
232 232 * different CPUs. We account for this variation by adding in double the value
233 233 * of suspend_tick_stick_max_delta. The following is an explanation of why
234 234 * suspend_tick_stick_max_delta must be multplied by two and added to
235 235 * native_stick_offset.
236 236 *
237 237 * Consider a guest instance that is yet to be suspended with CPUs p0 and p1
238 238 * with physical "source" %stick values s0 and s1 respectively. When the guest
239 239 * is first resumed, the physical "target" %stick values are t0 and t1
240 240 * respectively. The virtual %stick values after the resume are v0 and v1
241 241 * respectively. Let x be the maximum difference between any two CPU's %stick
242 242 * register at a given point in time and let the %stick values be assigned
243 243 * such that
244 244 *
245 245 * s1 = s0 + x and
246 246 * t1 = t0 - x
247 247 *
248 248 * Let us assume that p0 is driving the suspend and resume. Then, we will
249 249 * calculate the stick offset f and the virtual %stick on p0 after the
250 250 * resume as follows.
251 251 *
252 252 * f = s0 - t0 and
253 253 * v0 = t0 + f
254 254 *
255 255 * We calculate the virtual %stick v1 on p1 after the resume as
256 256 *
257 257 * v1 = t1 + f
258 258 *
259 259 * Substitution yields
260 260 *
261 261 * v1 = t1 + (s0 - t0)
262 262 * v1 = (t0 - x) + (s0 - t0)
263 263 * v1 = -x + s0
264 264 * v1 = s0 - x
265 265 * v1 = (s1 - x) - x
266 266 * v1 = s1 - 2x
267 267 *
268 268 * Therefore, in this scenario, without accounting for %stick variation in
269 269 * the calculation of the native_stick_offset f, the virtual %stick on p1
270 270 * is less than the value of the %stick on p1 before the suspend which is
271 271 * unacceptable. By adding 2x to v1, we guarantee it will be equal to s1
272 272 * which means the %stick on p1 after the resume will always be greater
273 273 * than or equal to the %stick on p1 before the suspend. Since v1 = t1 + f
274 274 * at any point in time, we can accomplish this by adding 2x to f. This
275 275 * guarantees any processes bound to CPU P0 or P1 will not see a %stick
276 276 * decrease across a suspend/resume. Hence, in the code below, we multiply
277 277 * suspend_tick_stick_max_delta by two in the calculation for
278 278 * native_stick_offset, native_tick_offset, and target_hrtime.
279 279 */
280 280 static void
281 281 set_tick_offsets(uint64_t source_tick, uint64_t source_stick, timestruc_t *tsp)
282 282 {
283 283 uint64_t target_tick;
284 284 uint64_t target_stick;
285 285 hrtime_t source_hrtime;
286 286 hrtime_t target_hrtime;
287 287
288 288 /*
289 289 * Temporarily set the offsets to zero so that the following reads
290 290 * of the registers will yield physical unadjusted counter values.
291 291 */
292 292 native_tick_offset = 0;
293 293 native_stick_offset = 0;
294 294
295 295 target_tick = gettick_counter(); /* returns %tick */
296 296 target_stick = gettick(); /* returns %stick */
297 297
298 298 /*
299 299 * Calculate the new offsets. In addition to the delta observed on
300 300 * this CPU, add an additional value. Multiply the %tick/%stick
301 301 * frequency by suspend_tick_stick_max_delta (us). Then, multiply by 2
302 302 * to account for a delta between CPUs before the suspend and a
303 303 * delta between CPUs after the resume.
304 304 */
305 305 native_tick_offset = (source_tick - target_tick) +
306 306 (CPU->cpu_curr_clock * suspend_tick_stick_max_delta * 2 / MICROSEC);
307 307 native_stick_offset = (source_stick - target_stick) +
308 308 (sys_tick_freq * suspend_tick_stick_max_delta * 2 / MICROSEC);
309 309
310 310 /*
311 311 * We've effectively increased %stick and %tick by twice the value
312 312 * of suspend_tick_stick_max_delta to account for variation across
313 313 * CPUs. Now adjust the preserved TOD by the same amount.
314 314 */
315 315 source_hrtime = ts2hrt(tsp);
316 316 target_hrtime = source_hrtime +
317 317 (suspend_tick_stick_max_delta * 2 * (NANOSEC/MICROSEC));
318 318 hrt2ts(target_hrtime, tsp);
319 319 }
320 320
321 321 /*
322 322 * Set the {tick,stick}.NPT field to 1 on this CPU.
323 323 */
324 324 static void
325 325 enable_tick_stick_npt(void)
326 326 {
327 327 (void) hv_stick_set_npt(1);
328 328 (void) hv_tick_set_npt(1);
329 329 }
330 330
331 331 /*
332 332 * Synchronize a CPU's {tick,stick}.NPT fields with the current state
333 333 * of the system. This is used when a CPU is DR'd into the system.
334 334 */
335 335 void
336 336 suspend_sync_tick_stick_npt(void)
337 337 {
338 338 if (tick_stick_emulation_active) {
339 339 DBG("enabling {%%tick/%%stick}.NPT on CPU 0x%x", CPU->cpu_id);
340 340 (void) hv_stick_set_npt(1);
341 341 (void) hv_tick_set_npt(1);
342 342 } else {
343 343 ASSERT(gettick_npt() == 0);
344 344 ASSERT(getstick_npt() == 0);
345 345 }
346 346 }
347 347
348 348 /*
349 349 * Obtain an updated MD from the hypervisor and update cpunodes, CPU HW
350 350 * sharing data structures, and processor groups.
351 351 */
352 352 static void
353 353 update_cpu_mappings(void)
354 354 {
355 355 md_t *mdp;
356 356 processorid_t id;
357 357 cpu_t *cp;
358 358 cpu_pg_t *pgps[NCPU];
359 359
360 360 if ((mdp = md_get_handle()) == NULL) {
361 361 DBG("suspend: md_get_handle failed");
362 362 return;
363 363 }
364 364
365 365 DBG("suspend: updating CPU mappings");
366 366
367 367 mutex_enter(&cpu_lock);
368 368
369 369 setup_chip_mappings(mdp);
370 370 setup_exec_unit_mappings(mdp);
371 371 for (id = 0; id < NCPU; id++) {
372 372 if ((cp = cpu_get(id)) == NULL)
373 373 continue;
374 374 cpu_map_exec_units(cp);
375 375 }
376 376
377 377 /*
378 378 * Re-calculate processor groups.
379 379 *
380 380 * First tear down all PG information before adding any new PG
381 381 * information derived from the MD we just downloaded. We must
↓ open down ↓ |
381 lines elided |
↑ open up ↑ |
382 382 * call pg_cpu_inactive and pg_cpu_active with CPUs paused and
383 383 * we want to minimize the number of times pause_cpus is called.
384 384 * Inactivating all CPUs would leave PGs without any active CPUs,
385 385 * so while CPUs are paused, call pg_cpu_inactive and swap in the
386 386 * bootstrap PG structure saving the original PG structure to be
387 387 * fini'd afterwards. This prevents the dispatcher from encountering
388 388 * PGs in which all CPUs are inactive. Offline CPUs are already
389 389 * inactive in their PGs and shouldn't be reactivated, so we must
390 390 * not call pg_cpu_inactive or pg_cpu_active for those CPUs.
391 391 */
392 - pause_cpus(NULL);
392 + pause_cpus(NULL, NULL);
393 393 for (id = 0; id < NCPU; id++) {
394 394 if ((cp = cpu_get(id)) == NULL)
395 395 continue;
396 396 if ((cp->cpu_flags & CPU_OFFLINE) == 0)
397 397 pg_cpu_inactive(cp);
398 398 pgps[id] = cp->cpu_pg;
399 399 pg_cpu_bootstrap(cp);
400 400 }
401 401 start_cpus();
402 402
403 403 /*
404 404 * pg_cpu_fini* and pg_cpu_init* must be called while CPUs are
405 405 * not paused. Use two separate loops here so that we do not
406 406 * initialize PG data for CPUs until all the old PG data structures
407 407 * are torn down.
408 408 */
409 409 for (id = 0; id < NCPU; id++) {
410 410 if ((cp = cpu_get(id)) == NULL)
411 411 continue;
412 412 pg_cpu_fini(cp, pgps[id]);
413 413 mpo_cpu_remove(id);
414 414 }
415 415
416 416 /*
417 417 * Initialize PG data for each CPU, but leave the bootstrapped
418 418 * PG structure in place to avoid running with any PGs containing
419 419 * nothing but inactive CPUs.
420 420 */
421 421 for (id = 0; id < NCPU; id++) {
422 422 if ((cp = cpu_get(id)) == NULL)
↓ open down ↓ |
20 lines elided |
↑ open up ↑ |
423 423 continue;
424 424 mpo_cpu_add(mdp, id);
425 425 pgps[id] = pg_cpu_init(cp, B_TRUE);
426 426 }
427 427
428 428 /*
429 429 * Now that PG data has been initialized for all CPUs in the
430 430 * system, replace the bootstrapped PG structure with the
431 431 * initialized PG structure and call pg_cpu_active for each CPU.
432 432 */
433 - pause_cpus(NULL);
433 + pause_cpus(NULL, NULL);
434 434 for (id = 0; id < NCPU; id++) {
435 435 if ((cp = cpu_get(id)) == NULL)
436 436 continue;
437 437 cp->cpu_pg = pgps[id];
438 438 if ((cp->cpu_flags & CPU_OFFLINE) == 0)
439 439 pg_cpu_active(cp);
440 440 }
441 441 start_cpus();
442 442
443 443 mutex_exit(&cpu_lock);
444 444
445 445 (void) md_fini_handle(mdp);
446 446 }
447 447
448 448 /*
449 449 * Wrapper for the Sun Cluster error decoding function.
450 450 */
451 451 static int
452 452 cluster_error_decode(int error, char *error_reason, size_t max_reason_len)
453 453 {
454 454 const char *decoded;
455 455 size_t decoded_len;
456 456
457 457 ASSERT(error_reason != NULL);
458 458 ASSERT(max_reason_len > 0);
459 459
460 460 max_reason_len = MIN(max_reason_len, SC_FAIL_STR_MAX);
461 461
462 462 if (cl_suspend_error_decode == NULL)
463 463 return (-1);
464 464
465 465 if ((decoded = (*cl_suspend_error_decode)(error)) == NULL)
466 466 return (-1);
467 467
468 468 /* Get number of non-NULL bytes */
469 469 if ((decoded_len = strnlen(decoded, max_reason_len - 1)) == 0)
470 470 return (-1);
471 471
472 472 bcopy(decoded, error_reason, decoded_len);
473 473
474 474 /*
475 475 * The error string returned from cl_suspend_error_decode
476 476 * should be NULL-terminated, but set the terminator here
477 477 * because we only copied non-NULL bytes. If the decoded
478 478 * string was not NULL-terminated, this guarantees that
479 479 * error_reason will be.
480 480 */
481 481 error_reason[decoded_len] = '\0';
482 482
483 483 return (0);
484 484 }
485 485
486 486 /*
487 487 * Wrapper for the Sun Cluster pre-suspend callback.
488 488 */
489 489 static int
490 490 cluster_pre_wrapper(char *error_reason, size_t max_reason_len)
491 491 {
492 492 int rv = 0;
493 493
494 494 if (cl_suspend_pre_callback != NULL) {
495 495 rv = (*cl_suspend_pre_callback)();
496 496 DBG("suspend: cl_suspend_pre_callback returned %d", rv);
497 497 if (rv != 0 && error_reason != NULL && max_reason_len > 0) {
498 498 if (cluster_error_decode(rv, error_reason,
499 499 max_reason_len)) {
500 500 (void) snprintf(error_reason, max_reason_len,
501 501 SC_PRE_FAIL_STR_FMT, rv);
502 502 }
503 503 }
504 504 }
505 505
506 506 return (rv);
507 507 }
508 508
509 509 /*
510 510 * Wrapper for the Sun Cluster post-suspend callback.
511 511 */
512 512 static int
513 513 cluster_post_wrapper(char *error_reason, size_t max_reason_len)
514 514 {
515 515 int rv = 0;
516 516
517 517 if (cl_suspend_post_callback != NULL) {
518 518 rv = (*cl_suspend_post_callback)();
519 519 DBG("suspend: cl_suspend_post_callback returned %d", rv);
520 520 if (rv != 0 && error_reason != NULL && max_reason_len > 0) {
521 521 if (cluster_error_decode(rv, error_reason,
522 522 max_reason_len)) {
523 523 (void) snprintf(error_reason,
524 524 max_reason_len, SC_POST_FAIL_STR_FMT, rv);
525 525 }
526 526 }
527 527 }
528 528
529 529 return (rv);
530 530 }
531 531
532 532 /*
533 533 * Execute pre-suspend callbacks preparing the system for a suspend operation.
534 534 * Returns zero on success, non-zero on failure. Sets the recovered argument
535 535 * to indicate whether or not callbacks could be undone in the event of a
536 536 * failure--if callbacks were successfully undone, *recovered is set to B_TRUE,
537 537 * otherwise *recovered is set to B_FALSE. Must be called successfully before
538 538 * suspend_start can be called. Callers should first call suspend_support to
539 539 * determine if OS suspend is supported.
540 540 */
541 541 int
542 542 suspend_pre(char *error_reason, size_t max_reason_len, boolean_t *recovered)
543 543 {
544 544 int rv;
545 545
546 546 ASSERT(recovered != NULL);
547 547
548 548 /*
549 549 * Return an error if suspend_pre is erreoneously called
550 550 * when OS suspend is not supported.
551 551 */
552 552 ASSERT(suspend_supported());
553 553 if (!suspend_supported()) {
554 554 DBG("suspend: suspend_pre called without suspend support");
555 555 *recovered = B_TRUE;
556 556 return (ENOTSUP);
557 557 }
558 558 DBG("suspend: %s", __func__);
559 559
560 560 rv = cluster_pre_wrapper(error_reason, max_reason_len);
561 561
562 562 /*
563 563 * At present, only one pre-suspend operation exists.
564 564 * If it fails, no recovery needs to be done.
565 565 */
566 566 if (rv != 0 && recovered != NULL)
567 567 *recovered = B_TRUE;
568 568
569 569 return (rv);
570 570 }
571 571
572 572 /*
573 573 * Execute post-suspend callbacks. Returns zero on success, non-zero on
574 574 * failure. Must be called after suspend_start is called, regardless of
575 575 * whether or not suspend_start is successful.
576 576 */
577 577 int
578 578 suspend_post(char *error_reason, size_t max_reason_len)
579 579 {
580 580 ASSERT(suspend_supported());
581 581 DBG("suspend: %s", __func__);
582 582 return (cluster_post_wrapper(error_reason, max_reason_len));
583 583 }
584 584
585 585 /*
586 586 * Suspends the OS by pausing CPUs and calling into the HV to initiate
587 587 * the suspend. When the HV routine hv_guest_suspend returns, the system
588 588 * will be resumed. Must be called after a successful call to suspend_pre.
589 589 * suspend_post must be called after suspend_start, whether or not
590 590 * suspend_start returns an error.
591 591 */
592 592 /*ARGSUSED*/
593 593 int
594 594 suspend_start(char *error_reason, size_t max_reason_len)
595 595 {
596 596 uint64_t source_tick;
597 597 uint64_t source_stick;
598 598 uint64_t rv;
599 599 timestruc_t source_tod;
600 600 int spl;
601 601
602 602 ASSERT(suspend_supported());
603 603 DBG("suspend: %s", __func__);
604 604
605 605 sfmmu_ctxdoms_lock();
606 606
607 607 mutex_enter(&cpu_lock);
↓ open down ↓ |
164 lines elided |
↑ open up ↑ |
608 608
609 609 /* Suspend the watchdog */
610 610 watchdog_suspend();
611 611
612 612 /* Record the TOD */
613 613 mutex_enter(&tod_lock);
614 614 source_tod = tod_get();
615 615 mutex_exit(&tod_lock);
616 616
617 617 /* Pause all other CPUs */
618 - pause_cpus(NULL);
618 + pause_cpus(NULL, NULL);
619 619 DBG_PROM("suspend: CPUs paused\n");
620 620
621 621 /* Suspend cyclics */
622 622 cyclic_suspend();
623 623 DBG_PROM("suspend: cyclics suspended\n");
624 624
625 625 /* Disable interrupts */
626 626 spl = spl8();
627 627 DBG_PROM("suspend: spl8()\n");
628 628
629 629 source_tick = gettick_counter();
630 630 source_stick = gettick();
631 631 DBG_PROM("suspend: source_tick: 0x%lx\n", source_tick);
632 632 DBG_PROM("suspend: source_stick: 0x%lx\n", source_stick);
633 633
634 634 /*
635 635 * Call into the HV to initiate the suspend. hv_guest_suspend()
636 636 * returns after the guest has been resumed or if the suspend
637 637 * operation failed or was cancelled. After a successful suspend,
638 638 * the %tick and %stick registers may have changed by an amount
639 639 * that is not proportional to the amount of time that has passed.
640 640 * They may have jumped forwards or backwards. Some variation is
641 641 * allowed and accounted for using suspend_tick_stick_max_delta,
642 642 * but otherwise this jump must be uniform across all CPUs and we
643 643 * operate under the assumption that it is (maintaining two global
644 644 * offset variables--one for %tick and one for %stick.)
645 645 */
646 646 DBG_PROM("suspend: suspending... \n");
647 647 rv = hv_guest_suspend();
648 648 if (rv != 0) {
649 649 splx(spl);
650 650 cyclic_resume();
651 651 start_cpus();
652 652 watchdog_resume();
653 653 mutex_exit(&cpu_lock);
654 654 sfmmu_ctxdoms_unlock();
655 655 DBG("suspend: failed, rv: %ld\n", rv);
656 656 return (rv);
657 657 }
658 658
659 659 suspend_count++;
660 660
661 661 /* Update the global tick and stick offsets and the preserved TOD */
662 662 set_tick_offsets(source_tick, source_stick, &source_tod);
663 663
664 664 /* Ensure new offsets are globally visible before resuming CPUs */
665 665 membar_sync();
666 666
667 667 /* Enable interrupts */
668 668 splx(spl);
669 669
670 670 /* Set the {%tick,%stick}.NPT bits on all CPUs */
671 671 if (enable_user_tick_stick_emulation) {
672 672 xc_all((xcfunc_t *)enable_tick_stick_npt, NULL, NULL);
673 673 xt_sync(cpu_ready_set);
674 674 ASSERT(gettick_npt() != 0);
675 675 ASSERT(getstick_npt() != 0);
676 676 }
677 677
678 678 /* If emulation is enabled, but not currently active, enable it */
679 679 if (enable_user_tick_stick_emulation && !tick_stick_emulation_active) {
680 680 tick_stick_emulation_active = B_TRUE;
681 681 }
682 682
683 683 sfmmu_ctxdoms_remove();
684 684
685 685 /* Resume cyclics, unpause CPUs */
686 686 cyclic_resume();
687 687 start_cpus();
688 688
689 689 /* Set the TOD */
690 690 mutex_enter(&tod_lock);
691 691 tod_set(source_tod);
692 692 mutex_exit(&tod_lock);
693 693
694 694 /* Re-enable the watchdog */
695 695 watchdog_resume();
696 696
697 697 mutex_exit(&cpu_lock);
698 698
699 699 /* Download the latest MD */
700 700 if ((rv = mach_descrip_update()) != 0)
701 701 cmn_err(CE_PANIC, "suspend: mach_descrip_update failed: %ld",
702 702 rv);
703 703
704 704 sfmmu_ctxdoms_update();
705 705 sfmmu_ctxdoms_unlock();
706 706
707 707 /* Get new MD, update CPU mappings/relationships */
708 708 if (suspend_update_cpu_mappings)
709 709 update_cpu_mappings();
710 710
711 711 DBG("suspend: target tick: 0x%lx", gettick_counter());
712 712 DBG("suspend: target stick: 0x%llx", gettick());
713 713 DBG("suspend: user %%tick/%%stick emulation is %d",
714 714 tick_stick_emulation_active);
715 715 DBG("suspend: finished");
716 716
717 717 return (0);
718 718 }
↓ open down ↓ |
90 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX