1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License, Version 1.0 only
   6  * (the "License").  You may not use this file except in compliance
   7  * with the License.
   8  *
   9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  10  * or http://www.opensolaris.org/os/licensing.
  11  * See the License for the specific language governing permissions
  12  * and limitations under the License.
  13  *
  14  * When distributing Covered Code, include this CDDL HEADER in each
  15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  16  * If applicable, add the following below this CDDL HEADER, with the
  17  * fields enclosed by brackets "[]" replaced with your own identifying
  18  * information: Portions Copyright [yyyy] [name of copyright owner]
  19  *
  20  * CDDL HEADER END
  21  */
  22 /*
  23  * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*
  28  * Copyright (c) 2012, Joyent Inc. All rights reserved.
  29  */
  30 
  31 #include <sys/timer.h>
  32 #include <sys/systm.h>
  33 #include <sys/param.h>
  34 #include <sys/kmem.h>
  35 #include <sys/debug.h>
  36 #include <sys/cyclic.h>
  37 #include <sys/cmn_err.h>
  38 #include <sys/pset.h>
  39 #include <sys/atomic.h>
  40 #include <sys/policy.h>
  41 
  42 static clock_backend_t clock_highres;
  43 
  44 /*ARGSUSED*/
  45 static int
  46 clock_highres_settime(timespec_t *ts)
  47 {
  48         return (EINVAL);
  49 }
  50 
  51 static int
  52 clock_highres_gettime(timespec_t *ts)
  53 {
  54         hrt2ts(gethrtime(), (timestruc_t *)ts);
  55 
  56         return (0);
  57 }
  58 
  59 static int
  60 clock_highres_getres(timespec_t *ts)
  61 {
  62         hrt2ts(cyclic_getres(), (timestruc_t *)ts);
  63 
  64         return (0);
  65 }
  66 
  67 /*ARGSUSED*/
  68 static int
  69 clock_highres_timer_create(itimer_t *it, struct sigevent *ev)
  70 {
  71         /*
  72          * CLOCK_HIGHRES timers of sufficiently high resolution can deny
  73          * service; only allow privileged users to create such timers.
  74          * Sites that do not wish to have this restriction should
  75          * give users the "proc_clock_highres" privilege.
  76          */
  77         if (secpolicy_clock_highres(CRED()) != 0) {
  78                 it->it_arg = NULL;
  79                 return (EPERM);
  80         }
  81 
  82         it->it_arg = kmem_zalloc(sizeof (cyclic_id_t), KM_SLEEP);
  83 
  84         return (0);
  85 }
  86 
  87 static void
  88 clock_highres_fire(void *arg)
  89 {
  90         itimer_t *it = (itimer_t *)arg;
  91         hrtime_t *addr = &it->it_hrtime;
  92         hrtime_t old = *addr, new = gethrtime();
  93 
  94         do {
  95                 old = *addr;
  96         } while (atomic_cas_64((uint64_t *)addr, old, new) != old);
  97 
  98         timer_fire(it);
  99 }
 100 
 101 static int
 102 clock_highres_timer_settime(itimer_t *it, int flags,
 103         const struct itimerspec *when)
 104 {
 105         cyclic_id_t cyc, *cycp = it->it_arg;
 106         proc_t *p = curproc;
 107         kthread_t *t = curthread;
 108         cyc_time_t cyctime;
 109         cyc_handler_t hdlr;
 110         cpu_t *cpu;
 111         cpupart_t *cpupart;
 112         int pset;
 113 
 114         cyctime.cyt_when = ts2hrt(&when->it_value);
 115         cyctime.cyt_interval = ts2hrt(&when->it_interval);
 116 
 117         if (cyctime.cyt_when != 0 && cyctime.cyt_interval == 0 &&
 118             it->it_itime.it_interval.tv_sec == 0 &&
 119             it->it_itime.it_interval.tv_nsec == 0 &&
 120             (cyc = *cycp) != CYCLIC_NONE) {
 121                 /*
 122                  * If our existing timer is a one-shot and our new timer is a
 123                  * one-shot, we'll save ourselves a world of grief and just
 124                  * reprogram the cyclic.
 125                  */
 126                 it->it_itime = *when;
 127 
 128                 if (!(flags & TIMER_ABSTIME))
 129                         cyctime.cyt_when += gethrtime();
 130 
 131                 hrt2ts(cyctime.cyt_when, &it->it_itime.it_value);
 132                 (void) cyclic_reprogram(cyc, cyctime.cyt_when);
 133                 return (0);
 134         }
 135 
 136         mutex_enter(&cpu_lock);
 137         if ((cyc = *cycp) != CYCLIC_NONE) {
 138                 cyclic_remove(cyc);
 139                 *cycp = CYCLIC_NONE;
 140         }
 141 
 142         if (cyctime.cyt_when == 0) {
 143                 mutex_exit(&cpu_lock);
 144                 return (0);
 145         }
 146 
 147         if (!(flags & TIMER_ABSTIME))
 148                 cyctime.cyt_when += gethrtime();
 149 
 150         /*
 151          * Now we will check for overflow (that is, we will check to see
 152          * that the start time plus the interval time doesn't exceed
 153          * INT64_MAX).  The astute code reviewer will observe that this
 154          * one-time check doesn't guarantee that a future expiration
 155          * will not wrap.  We wish to prove, then, that if a future
 156          * expiration does wrap, the earliest the problem can be encountered
 157          * is (INT64_MAX / 2) nanoseconds (191 years) after boot.  Formally:
 158          *
 159          *  Given:      s + i < m    s > 0        i > 0
 160          *              s + ni > m   n > 1
 161          *
 162          *    (where "s" is the start time, "i" is the interval, "n" is the
 163          *    number of times the cyclic has fired and "m" is INT64_MAX)
 164          *
 165          *  Prove:
 166          *              (a)  s + (n - 1)i > (m / 2)
 167          *              (b)  s + (n - 1)i < m
 168          *
 169          * That is, prove that we must have fired at least once 191 years
 170          * after boot.  The proof is very straightforward; since the left
 171          * side of (a) is minimized when i is small, it is sufficient to show
 172          * that the statement is true for i's smallest possible value
 173          * (((m - s) / n) + epsilon).  The same goes for (b); showing that the
 174          * statement is true for i's largest possible value (m - s + epsilon)
 175          * is sufficient to prove the statement.
 176          *
 177          * The actual arithmetic manipulation is left up to reader.
 178          */
 179         if (cyctime.cyt_when > INT64_MAX - cyctime.cyt_interval) {
 180                 mutex_exit(&cpu_lock);
 181                 return (EOVERFLOW);
 182         }
 183 
 184         if (cyctime.cyt_interval == 0) {
 185                 /*
 186                  * If this is a one-shot, then we set the interval to be
 187                  * inifinite.  If this timer is never touched, this cyclic will
 188                  * simply consume space in the cyclic subsystem.  As soon as
 189                  * timer_settime() or timer_delete() is called, the cyclic is
 190                  * removed (so it's not possible to run the machine out
 191                  * of resources by creating one-shots).
 192                  */
 193                 cyctime.cyt_interval = CY_INFINITY;
 194         }
 195 
 196         it->it_itime = *when;
 197 
 198         hrt2ts(cyctime.cyt_when, &it->it_itime.it_value);
 199 
 200         hdlr.cyh_func = (cyc_func_t)clock_highres_fire;
 201         hdlr.cyh_arg = it;
 202         hdlr.cyh_level = CY_LOW_LEVEL;
 203 
 204         if (cyctime.cyt_when != 0)
 205                 *cycp = cyc = cyclic_add(&hdlr, &cyctime);
 206 
 207         /*
 208          * Now that we have the cyclic created, we need to bind it to our
 209          * bound CPU and processor set (if any).
 210          */
 211         mutex_enter(&p->p_lock);
 212         cpu = t->t_bound_cpu;
 213         cpupart = t->t_cpupart;
 214         pset = t->t_bind_pset;
 215 
 216         mutex_exit(&p->p_lock);
 217 
 218         cyclic_bind(cyc, cpu, pset == PS_NONE ? NULL : cpupart);
 219 
 220         mutex_exit(&cpu_lock);
 221 
 222         return (0);
 223 }
 224 
 225 static int
 226 clock_highres_timer_gettime(itimer_t *it, struct itimerspec *when)
 227 {
 228         /*
 229          * CLOCK_HIGHRES doesn't update it_itime.
 230          */
 231         hrtime_t start = ts2hrt(&it->it_itime.it_value);
 232         hrtime_t interval = ts2hrt(&it->it_itime.it_interval);
 233         hrtime_t diff, now = gethrtime();
 234         hrtime_t *addr = &it->it_hrtime;
 235         hrtime_t last;
 236 
 237         /*
 238          * We're using atomic_cas_64() here only to assure that we slurp the
 239          * entire timestamp atomically.
 240          */
 241         last = atomic_cas_64((uint64_t *)addr, 0, 0);
 242 
 243         *when = it->it_itime;
 244 
 245         if (!timerspecisset(&when->it_value))
 246                 return (0);
 247 
 248         if (start > now) {
 249                 /*
 250                  * We haven't gone off yet...
 251                  */
 252                 diff = start - now;
 253         } else {
 254                 if (interval == 0) {
 255                         /*
 256                          * This is a one-shot which should have already
 257                          * fired; set it_value to 0.
 258                          */
 259                         timerspecclear(&when->it_value);
 260                         return (0);
 261                 }
 262 
 263                 /*
 264                  * Calculate how far we are into this interval.
 265                  */
 266                 diff = (now - start) % interval;
 267 
 268                 /*
 269                  * Now check to see if we've dealt with the last interval
 270                  * yet.
 271                  */
 272                 if (now - diff > last) {
 273                         /*
 274                          * The last interval hasn't fired; set it_value to 0.
 275                          */
 276                         timerspecclear(&when->it_value);
 277                         return (0);
 278                 }
 279 
 280                 /*
 281                  * The last interval _has_ fired; we can return the amount
 282                  * of time left in this interval.
 283                  */
 284                 diff = interval - diff;
 285         }
 286 
 287         hrt2ts(diff, &when->it_value);
 288 
 289         return (0);
 290 }
 291 
 292 static int
 293 clock_highres_timer_delete(itimer_t *it)
 294 {
 295         cyclic_id_t cyc;
 296 
 297         if (it->it_arg == NULL) {
 298                 /*
 299                  * This timer was never fully created; we must have failed
 300                  * in the clock_highres_timer_create() routine.
 301                  */
 302                 return (0);
 303         }
 304 
 305         mutex_enter(&cpu_lock);
 306 
 307         if ((cyc = *((cyclic_id_t *)it->it_arg)) != CYCLIC_NONE)
 308                 cyclic_remove(cyc);
 309 
 310         mutex_exit(&cpu_lock);
 311 
 312         kmem_free(it->it_arg, sizeof (cyclic_id_t));
 313 
 314         return (0);
 315 }
 316 
 317 static void
 318 clock_highres_timer_lwpbind(itimer_t *it)
 319 {
 320         proc_t *p = curproc;
 321         kthread_t *t = curthread;
 322         cyclic_id_t cyc = *((cyclic_id_t *)it->it_arg);
 323         cpu_t *cpu;
 324         cpupart_t *cpupart;
 325         int pset;
 326 
 327         if (cyc == CYCLIC_NONE)
 328                 return;
 329 
 330         mutex_enter(&cpu_lock);
 331         mutex_enter(&p->p_lock);
 332 
 333         /*
 334          * Okay, now we can safely look at the bindings.
 335          */
 336         cpu = t->t_bound_cpu;
 337         cpupart = t->t_cpupart;
 338         pset = t->t_bind_pset;
 339 
 340         /*
 341          * Now we drop p_lock.  We haven't dropped cpu_lock; we're guaranteed
 342          * that even if the bindings change, the CPU and/or processor set
 343          * that this timer was bound to remain valid (and the combination
 344          * remains self-consistent).
 345          */
 346         mutex_exit(&p->p_lock);
 347 
 348         cyclic_bind(cyc, cpu, pset == PS_NONE ? NULL : cpupart);
 349 
 350         mutex_exit(&cpu_lock);
 351 }
 352 
 353 void
 354 clock_highres_init()
 355 {
 356         clock_backend_t *be = &clock_highres;
 357         struct sigevent *ev = &be->clk_default;
 358 
 359         ev->sigev_signo = SIGALRM;
 360         ev->sigev_notify = SIGEV_SIGNAL;
 361         ev->sigev_value.sival_ptr = NULL;
 362 
 363         be->clk_clock_settime = clock_highres_settime;
 364         be->clk_clock_gettime = clock_highres_gettime;
 365         be->clk_clock_getres = clock_highres_getres;
 366         be->clk_timer_create = clock_highres_timer_create;
 367         be->clk_timer_gettime = clock_highres_timer_gettime;
 368         be->clk_timer_settime = clock_highres_timer_settime;
 369         be->clk_timer_delete = clock_highres_timer_delete;
 370         be->clk_timer_lwpbind = clock_highres_timer_lwpbind;
 371 
 372         clock_add_backend(CLOCK_HIGHRES, &clock_highres);
 373 }