1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  22 /*        All Rights Reserved   */
  23 
  24 
  25 /*
  26  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  27  * Use is subject to license terms.
  28  */
  29 
  30 /*
  31  * UNIX Device Driver Interface functions
  32  *
  33  * This file contains functions that are to be added to the kernel
  34  * to put the interface presented to drivers in conformance with
  35  * the DDI standard. Of the functions added to the kernel, 17 are
  36  * function equivalents of existing macros in sysmacros.h,
  37  * stream.h, and param.h
  38  *
  39  * 17 additional functions -- drv_getparm(), drv_setparm(),
  40  * getrbuf(), freerbuf(),
  41  * getemajor(), geteminor(), etoimajor(), itoemajor(), drv_usectohz(),
  42  * drv_hztousec(), drv_usecwait(), drv_priv(), and kvtoppid() --
  43  * are specified by DDI to exist in the kernel and are implemented here.
  44  *
  45  * Note that putnext() and put() are not in this file. The C version of
  46  * these routines are in uts/common/os/putnext.c and assembly versions
  47  * might exist for some architectures.
  48  */
  49 
  50 #include <sys/types.h>
  51 #include <sys/param.h>
  52 #include <sys/t_lock.h>
  53 #include <sys/time.h>
  54 #include <sys/systm.h>
  55 #include <sys/cpuvar.h>
  56 #include <sys/signal.h>
  57 #include <sys/pcb.h>
  58 #include <sys/user.h>
  59 #include <sys/errno.h>
  60 #include <sys/buf.h>
  61 #include <sys/proc.h>
  62 #include <sys/cmn_err.h>
  63 #include <sys/stream.h>
  64 #include <sys/strsubr.h>
  65 #include <sys/uio.h>
  66 #include <sys/kmem.h>
  67 #include <sys/conf.h>
  68 #include <sys/cred.h>
  69 #include <sys/vnode.h>
  70 #include <sys/file.h>
  71 #include <sys/poll.h>
  72 #include <sys/session.h>
  73 #include <sys/ddi.h>
  74 #include <sys/sunddi.h>
  75 #include <sys/esunddi.h>
  76 #include <sys/mkdev.h>
  77 #include <sys/debug.h>
  78 #include <sys/vtrace.h>
  79 
  80 /*
  81  * return internal major number corresponding to device
  82  * number (new format) argument
  83  */
  84 major_t
  85 getmajor(dev_t dev)
  86 {
  87 #ifdef _LP64
  88         return ((major_t)((dev >> NBITSMINOR64) & MAXMAJ64));
  89 #else
  90         return ((major_t)((dev >> NBITSMINOR) & MAXMAJ));
  91 #endif
  92 }
  93 
  94 /*
  95  * return external major number corresponding to device
  96  * number (new format) argument
  97  */
  98 major_t
  99 getemajor(dev_t dev)
 100 {
 101 #ifdef _LP64
 102         return ((major_t)((dev >> NBITSMINOR64) & MAXMAJ64));
 103 #else
 104         return ((major_t)((dev >> NBITSMINOR) & MAXMAJ));
 105 #endif
 106 }
 107 
 108 /*
 109  * return internal minor number corresponding to device
 110  * number (new format) argument
 111  */
 112 minor_t
 113 getminor(dev_t dev)
 114 {
 115 #ifdef _LP64
 116         return ((minor_t)(dev & MAXMIN64));
 117 #else
 118         return ((minor_t)(dev & MAXMIN));
 119 #endif
 120 }
 121 
 122 /*
 123  * return external minor number corresponding to device
 124  * number (new format) argument
 125  */
 126 minor_t
 127 geteminor(dev_t dev)
 128 {
 129 #ifdef _LP64
 130         return ((minor_t)(dev & MAXMIN64));
 131 #else
 132         return ((minor_t)(dev & MAXMIN));
 133 #endif
 134 }
 135 
 136 /*
 137  * return internal major number corresponding to external
 138  * major number.
 139  */
 140 int
 141 etoimajor(major_t emajnum)
 142 {
 143 #ifdef _LP64
 144         if (emajnum >= devcnt)
 145                 return (-1); /* invalid external major */
 146 #else
 147         if (emajnum > MAXMAJ || emajnum >= devcnt)
 148                 return (-1); /* invalid external major */
 149 #endif
 150         return ((int)emajnum);
 151 }
 152 
 153 /*
 154  * return external major number corresponding to internal
 155  * major number argument or -1 if no external major number
 156  * can be found after lastemaj that maps to the internal
 157  * major number. Pass a lastemaj val of -1 to start
 158  * the search initially. (Typical use of this function is
 159  * of the form:
 160  *
 161  *      lastemaj = -1;
 162  *      while ((lastemaj = itoemajor(imag, lastemaj)) != -1)
 163  *              { process major number }
 164  */
 165 int
 166 itoemajor(major_t imajnum, int lastemaj)
 167 {
 168         if (imajnum >= devcnt)
 169                 return (-1);
 170 
 171         /*
 172          * if lastemaj == -1 then start from beginning of
 173          * the (imaginary) MAJOR table
 174          */
 175         if (lastemaj < -1)
 176                 return (-1);
 177 
 178         /*
 179          * given that there's a 1-1 mapping of internal to external
 180          * major numbers, searching is somewhat pointless ... let's
 181          * just go there directly.
 182          */
 183         if (++lastemaj < devcnt && imajnum < devcnt)
 184                 return (imajnum);
 185         return (-1);
 186 }
 187 
 188 /*
 189  * encode external major and minor number arguments into a
 190  * new format device number
 191  */
 192 dev_t
 193 makedevice(major_t maj, minor_t minor)
 194 {
 195 #ifdef _LP64
 196         return (((dev_t)maj << NBITSMINOR64) | (minor & MAXMIN64));
 197 #else
 198         return (((dev_t)maj << NBITSMINOR) | (minor & MAXMIN));
 199 #endif
 200 }
 201 
 202 /*
 203  * cmpdev - compress new device format to old device format
 204  */
 205 o_dev_t
 206 cmpdev(dev_t dev)
 207 {
 208         major_t major_d;
 209         minor_t minor_d;
 210 
 211 #ifdef _LP64
 212         major_d = dev >> NBITSMINOR64;
 213         minor_d = dev & MAXMIN64;
 214 #else
 215         major_d = dev >> NBITSMINOR;
 216         minor_d = dev & MAXMIN;
 217 #endif
 218         if (major_d > OMAXMAJ || minor_d > OMAXMIN)
 219                 return ((o_dev_t)NODEV);
 220         return ((o_dev_t)((major_d << ONBITSMINOR) | minor_d));
 221 }
 222 
 223 dev_t
 224 expdev(dev_t dev)
 225 {
 226         major_t major_d;
 227         minor_t minor_d;
 228 
 229         major_d = ((dev >> ONBITSMINOR) & OMAXMAJ);
 230         minor_d = (dev & OMAXMIN);
 231 #ifdef _LP64
 232         return ((((dev_t)major_d << NBITSMINOR64) | minor_d));
 233 #else
 234         return ((((dev_t)major_d << NBITSMINOR) | minor_d));
 235 #endif
 236 }
 237 
 238 /*
 239  * return true (1) if the message type input is a data
 240  * message type, 0 otherwise
 241  */
 242 #undef datamsg
 243 int
 244 datamsg(unsigned char db_type)
 245 {
 246         return (db_type == M_DATA || db_type == M_PROTO ||
 247             db_type == M_PCPROTO || db_type == M_DELAY);
 248 }
 249 
 250 /*
 251  * return a pointer to the other queue in the queue pair of qp
 252  */
 253 queue_t *
 254 OTHERQ(queue_t *q)
 255 {
 256         return (_OTHERQ(q));
 257 }
 258 
 259 /*
 260  * return a pointer to the read queue in the queue pair of qp.
 261  */
 262 queue_t *
 263 RD(queue_t *q)
 264 {
 265                 return (_RD(q));
 266 
 267 }
 268 
 269 /*
 270  * return a pointer to the write queue in the queue pair of qp.
 271  */
 272 int
 273 SAMESTR(queue_t *q)
 274 {
 275         return (_SAMESTR(q));
 276 }
 277 
 278 /*
 279  * return a pointer to the write queue in the queue pair of qp.
 280  */
 281 queue_t *
 282 WR(queue_t *q)
 283 {
 284         return (_WR(q));
 285 }
 286 
 287 /*
 288  * store value of kernel parameter associated with parm
 289  */
 290 int
 291 drv_getparm(unsigned int parm, void *valuep)
 292 {
 293         proc_t  *p = curproc;
 294         time_t  now;
 295 
 296         switch (parm) {
 297         case UPROCP:
 298                 *(proc_t **)valuep = p;
 299                 break;
 300         case PPGRP:
 301                 mutex_enter(&p->p_lock);
 302                 *(pid_t *)valuep = p->p_pgrp;
 303                 mutex_exit(&p->p_lock);
 304                 break;
 305         case LBOLT:
 306                 *(clock_t *)valuep = ddi_get_lbolt();
 307                 break;
 308         case TIME:
 309                 if ((now = gethrestime_sec()) == 0) {
 310                         timestruc_t ts;
 311                         mutex_enter(&tod_lock);
 312                         ts = tod_get();
 313                         mutex_exit(&tod_lock);
 314                         *(time_t *)valuep = ts.tv_sec;
 315                 } else {
 316                         *(time_t *)valuep = now;
 317                 }
 318                 break;
 319         case PPID:
 320                 *(pid_t *)valuep = p->p_pid;
 321                 break;
 322         case PSID:
 323                 mutex_enter(&p->p_splock);
 324                 *(pid_t *)valuep = p->p_sessp->s_sid;
 325                 mutex_exit(&p->p_splock);
 326                 break;
 327         case UCRED:
 328                 *(cred_t **)valuep = CRED();
 329                 break;
 330         default:
 331                 return (-1);
 332         }
 333 
 334         return (0);
 335 }
 336 
 337 /*
 338  * set value of kernel parameter associated with parm
 339  */
 340 int
 341 drv_setparm(unsigned int parm, unsigned long value)
 342 {
 343         switch (parm) {
 344         case SYSRINT:
 345                 CPU_STATS_ADDQ(CPU, sys, rcvint, value);
 346                 break;
 347         case SYSXINT:
 348                 CPU_STATS_ADDQ(CPU, sys, xmtint, value);
 349                 break;
 350         case SYSMINT:
 351                 CPU_STATS_ADDQ(CPU, sys, mdmint, value);
 352                 break;
 353         case SYSRAWC:
 354                 CPU_STATS_ADDQ(CPU, sys, rawch, value);
 355                 break;
 356         case SYSCANC:
 357                 CPU_STATS_ADDQ(CPU, sys, canch, value);
 358                 break;
 359         case SYSOUTC:
 360                 CPU_STATS_ADDQ(CPU, sys, outch, value);
 361                 break;
 362         default:
 363                 return (-1);
 364         }
 365 
 366         return (0);
 367 }
 368 
 369 /*
 370  * allocate space for buffer header and return pointer to it.
 371  * preferred means of obtaining space for a local buf header.
 372  * returns pointer to buf upon success, NULL for failure
 373  */
 374 struct buf *
 375 getrbuf(int sleep)
 376 {
 377         struct buf *bp;
 378 
 379         bp = kmem_alloc(sizeof (struct buf), sleep);
 380         if (bp == NULL)
 381                 return (NULL);
 382         bioinit(bp);
 383 
 384         return (bp);
 385 }
 386 
 387 /*
 388  * free up space allocated by getrbuf()
 389  */
 390 void
 391 freerbuf(struct buf *bp)
 392 {
 393         biofini(bp);
 394         kmem_free(bp, sizeof (struct buf));
 395 }
 396 
 397 /*
 398  * convert byte count input to logical page units
 399  * (byte counts that are not a page-size multiple
 400  * are rounded down)
 401  */
 402 pgcnt_t
 403 btop(size_t numbytes)
 404 {
 405         return (numbytes >> PAGESHIFT);
 406 }
 407 
 408 /*
 409  * convert byte count input to logical page units
 410  * (byte counts that are not a page-size multiple
 411  * are rounded up)
 412  */
 413 pgcnt_t
 414 btopr(size_t numbytes)
 415 {
 416         return ((numbytes + PAGEOFFSET) >> PAGESHIFT);
 417 }
 418 
 419 /*
 420  * convert size in pages to bytes.
 421  */
 422 size_t
 423 ptob(pgcnt_t numpages)
 424 {
 425         return (numpages << PAGESHIFT);
 426 }
 427 
 428 #define MAXCLOCK_T LONG_MAX
 429 
 430 /*
 431  * Convert from system time units (hz) to microseconds.
 432  *
 433  * If ticks <= 0, return 0.
 434  * If converting ticks to usecs would overflow, return MAXCLOCK_T.
 435  * Otherwise, convert ticks to microseconds.
 436  */
 437 clock_t
 438 drv_hztousec(clock_t ticks)
 439 {
 440         if (ticks <= 0)
 441                 return (0);
 442 
 443         if (ticks > MAXCLOCK_T / usec_per_tick)
 444                 return (MAXCLOCK_T);
 445 
 446         return (TICK_TO_USEC(ticks));
 447 }
 448 
 449 
 450 /*
 451  * Convert from microseconds to system time units (hz), rounded up.
 452  *
 453  * If ticks <= 0, return 0.
 454  * Otherwise, convert microseconds to ticks, rounding up.
 455  */
 456 clock_t
 457 drv_usectohz(clock_t microsecs)
 458 {
 459         if (microsecs <= 0)
 460                 return (0);
 461 
 462         return (USEC_TO_TICK_ROUNDUP(microsecs));
 463 }
 464 
 465 #ifdef  sun
 466 /*
 467  * drv_usecwait implemented in each architecture's machine
 468  * specific code somewhere. For sparc, it is the alternate entry
 469  * to usec_delay (eventually usec_delay goes away). See
 470  * sparc/os/ml/sparc_subr.s
 471  */
 472 #endif
 473 
 474 /*
 475  * bcanputnext, canputnext assume called from timeout, bufcall,
 476  * or esballoc free routines.  since these are driven by
 477  * clock interrupts, instead of system calls the appropriate plumbing
 478  * locks have not been acquired.
 479  */
 480 int
 481 bcanputnext(queue_t *q, unsigned char band)
 482 {
 483         int     ret;
 484 
 485         claimstr(q);
 486         ret = bcanput(q->q_next, band);
 487         releasestr(q);
 488         return (ret);
 489 }
 490 
 491 int
 492 canputnext(queue_t *q)
 493 {
 494         queue_t *qofsq = q;
 495         struct stdata *stp = STREAM(q);
 496         kmutex_t *sdlock;
 497 
 498         TRACE_1(TR_FAC_STREAMS_FR, TR_CANPUTNEXT_IN,
 499             "canputnext?:%p\n", q);
 500 
 501         if (stp->sd_ciputctrl != NULL) {
 502                 int ix = CPU->cpu_seqid & stp->sd_nciputctrl;
 503                 sdlock = &stp->sd_ciputctrl[ix].ciputctrl_lock;
 504                 mutex_enter(sdlock);
 505         } else
 506                 mutex_enter(sdlock = &stp->sd_reflock);
 507 
 508         /* get next module forward with a service queue */
 509         q = q->q_next->q_nfsrv;
 510         ASSERT(q != NULL);
 511 
 512         /* this is for loopback transports, they should not do a canputnext */
 513         ASSERT(STRMATED(q->q_stream) || STREAM(q) == STREAM(qofsq));
 514 
 515         if (!(q->q_flag & QFULL)) {
 516                 mutex_exit(sdlock);
 517                 TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUTNEXT_OUT,
 518                     "canputnext:%p %d", q, 1);
 519                 return (1);
 520         }
 521 
 522         if (sdlock != &stp->sd_reflock) {
 523                 mutex_exit(sdlock);
 524                 mutex_enter(&stp->sd_reflock);
 525         }
 526 
 527         /* the above is the most frequently used path */
 528         stp->sd_refcnt++;
 529         ASSERT(stp->sd_refcnt != 0); /* Wraparound */
 530         mutex_exit(&stp->sd_reflock);
 531 
 532         mutex_enter(QLOCK(q));
 533         if (q->q_flag & QFULL) {
 534                 q->q_flag |= QWANTW;
 535                 mutex_exit(QLOCK(q));
 536                 TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUTNEXT_OUT,
 537                     "canputnext:%p %d", q, 0);
 538                 releasestr(qofsq);
 539 
 540                 return (0);
 541         }
 542         mutex_exit(QLOCK(q));
 543         TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUTNEXT_OUT, "canputnext:%p %d", q, 1);
 544         releasestr(qofsq);
 545 
 546         return (1);
 547 }
 548 
 549 
 550 /*
 551  * Open has progressed to the point where it is safe to send/receive messages.
 552  *
 553  * "qprocson enables the put and service routines of the driver
 554  * or module... Prior to the call to qprocson, the put and service
 555  * routines of a newly pushed module or newly opened driver are
 556  * disabled.  For the module, messages flow around it as if it
 557  * were not present in the stream... qprocson must be called by
 558  * the first open of a module or driver after allocation and
 559  * initialization of any resource on which the put and service
 560  * routines depend."
 561  *
 562  * Note that before calling qprocson a module/driver could itself cause its
 563  * put or service procedures to be run by using put() or qenable().
 564  */
 565 void
 566 qprocson(queue_t *q)
 567 {
 568         ASSERT(q->q_flag & QREADR);
 569         /*
 570          * Do not call insertq() if it is a re-open.  But if _QINSERTING
 571          * is set, q_next will not be NULL and we need to call insertq().
 572          */
 573         if ((q->q_next == NULL && WR(q)->q_next == NULL) ||
 574             (q->q_flag & _QINSERTING))
 575                 insertq(STREAM(q), q);
 576 }
 577 
 578 /*
 579  * Close has reached a point where it can no longer allow put/service
 580  * into the queue.
 581  *
 582  * "qprocsoff disables the put and service routines of the driver
 583  * or module... When the routines are disabled in a module, messages
 584  * flow around the module as if it were not present in the stream.
 585  * qprocsoff must be called by the close routine of a driver or module
 586  * before deallocating any resources on which the driver/module's
 587  * put and service routines depend.  qprocsoff will remove the
 588  * queue's service routines from the list of service routines to be
 589  * run and waits until any concurrent put or service routines are
 590  * finished."
 591  *
 592  * Note that after calling qprocsoff a module/driver could itself cause its
 593  * put procedures to be run by using put().
 594  */
 595 void
 596 qprocsoff(queue_t *q)
 597 {
 598         ASSERT(q->q_flag & QREADR);
 599         if (q->q_flag & QWCLOSE) {
 600                 /* Called more than once */
 601                 return;
 602         }
 603         disable_svc(q);
 604         removeq(q);
 605 }
 606 
 607 /*
 608  * "freezestr() freezes the state of the entire STREAM  containing
 609  *  the  queue  pair  q.  A frozen STREAM blocks any thread
 610  *  attempting to enter any open, close, put or service  routine
 611  *  belonging  to  any  queue instance in the STREAM, and blocks
 612  *  any thread currently within the STREAM if it attempts to put
 613  *  messages  onto  or take messages off of any queue within the
 614  *  STREAM (with the sole exception  of  the  caller).   Threads
 615  *  blocked  by  this  mechanism  remain  so until the STREAM is
 616  *  thawed by a call to unfreezestr().
 617  *
 618  * Use strblock to set SQ_FROZEN in all syncqs in the stream (prevents
 619  * further entry into put, service, open, and close procedures) and
 620  * grab (and hold) all the QLOCKs in the stream (to block putq, getq etc.)
 621  *
 622  * Note: this has to be the only code that acquires one QLOCK while holding
 623  * another QLOCK (otherwise we would have locking hirarchy/ordering violations.)
 624  */
 625 void
 626 freezestr(queue_t *q)
 627 {
 628         struct stdata *stp = STREAM(q);
 629 
 630         /*
 631          * Increment refcnt to prevent q_next from changing during the strblock
 632          * as well as while the stream is frozen.
 633          */
 634         claimstr(RD(q));
 635 
 636         strblock(q);
 637         ASSERT(stp->sd_freezer == NULL);
 638         stp->sd_freezer = curthread;
 639         for (q = stp->sd_wrq; q != NULL; q = SAMESTR(q) ? q->q_next : NULL) {
 640                 mutex_enter(QLOCK(q));
 641                 mutex_enter(QLOCK(RD(q)));
 642         }
 643 }
 644 
 645 /*
 646  * Undo what freezestr did.
 647  * Have to drop the QLOCKs before the strunblock since strunblock will
 648  * potentially call other put procedures.
 649  */
 650 void
 651 unfreezestr(queue_t *q)
 652 {
 653         struct stdata *stp = STREAM(q);
 654         queue_t *q1;
 655 
 656         for (q1 = stp->sd_wrq; q1 != NULL;
 657             q1 = SAMESTR(q1) ? q1->q_next : NULL) {
 658                 mutex_exit(QLOCK(q1));
 659                 mutex_exit(QLOCK(RD(q1)));
 660         }
 661         ASSERT(stp->sd_freezer == curthread);
 662         stp->sd_freezer = NULL;
 663         strunblock(q);
 664         releasestr(RD(q));
 665 }
 666 
 667 /*
 668  * Used by open and close procedures to "sleep" waiting for messages to
 669  * arrive. Note: can only be used in open and close procedures.
 670  *
 671  * Lower the gate and let in either messages on the syncq (if there are
 672  * any) or put/service procedures.
 673  *
 674  * If the queue has an outer perimeter this will not prevent entry into this
 675  * syncq (since outer_enter does not set SQ_WRITER on the syncq that gets the
 676  * exclusive access to the outer perimeter.)
 677  *
 678  * Return 0 is the cv_wait_sig was interrupted; otherwise 1.
 679  *
 680  * It only makes sense to grab sq_putlocks for !SQ_CIOC sync queues because
 681  * otherwise put entry points were not blocked in the first place. if this is
 682  * SQ_CIOC then qwait is used to wait for service procedure to run since syncq
 683  * is always SQ_CIPUT if it is SQ_CIOC.
 684  *
 685  * Note that SQ_EXCL is dropped and SQ_WANTEXITWAKEUP set in sq_flags
 686  * atomically under sq_putlocks to make sure putnext will not miss a pending
 687  * wakeup.
 688  */
 689 int
 690 qwait_sig(queue_t *q)
 691 {
 692         syncq_t         *sq, *outer;
 693         uint_t          flags;
 694         int             ret = 1;
 695         int             is_sq_cioc;
 696 
 697         /*
 698          * Perform the same operations as a leavesq(sq, SQ_OPENCLOSE)
 699          * while detecting all cases where the perimeter is entered
 700          * so that qwait_sig can return to the caller.
 701          *
 702          * Drain the syncq if possible. Otherwise reset SQ_EXCL and
 703          * wait for a thread to leave the syncq.
 704          */
 705         sq = q->q_syncq;
 706         ASSERT(sq);
 707         is_sq_cioc = (sq->sq_type & SQ_CIOC) ? 1 : 0;
 708         ASSERT(sq->sq_outer == NULL || sq->sq_outer->sq_flags & SQ_WRITER);
 709         outer = sq->sq_outer;
 710         /*
 711          * XXX this does not work if there is only an outer perimeter.
 712          * The semantics of qwait/qwait_sig are undefined in this case.
 713          */
 714         if (outer)
 715                 outer_exit(outer);
 716 
 717         mutex_enter(SQLOCK(sq));
 718         if (is_sq_cioc == 0) {
 719                 SQ_PUTLOCKS_ENTER(sq);
 720         }
 721         flags = sq->sq_flags;
 722         /*
 723          * Drop SQ_EXCL and sq_count but hold the SQLOCK
 724          * to prevent any undetected entry and exit into the perimeter.
 725          */
 726         ASSERT(sq->sq_count > 0);
 727         sq->sq_count--;
 728 
 729         if (is_sq_cioc == 0) {
 730                 ASSERT(flags & SQ_EXCL);
 731                 flags &= ~SQ_EXCL;
 732         }
 733         /*
 734          * Unblock any thread blocked in an entersq or outer_enter.
 735          * Note: we do not unblock a thread waiting in qwait/qwait_sig,
 736          * since that could lead to livelock with two threads in
 737          * qwait for the same (per module) inner perimeter.
 738          */
 739         if (flags & SQ_WANTWAKEUP) {
 740                 cv_broadcast(&sq->sq_wait);
 741                 flags &= ~SQ_WANTWAKEUP;
 742         }
 743         sq->sq_flags = flags;
 744         if ((flags & SQ_QUEUED) && !(flags & SQ_STAYAWAY)) {
 745                 if (is_sq_cioc == 0) {
 746                         SQ_PUTLOCKS_EXIT(sq);
 747                 }
 748                 /* drain_syncq() drops SQLOCK */
 749                 drain_syncq(sq);
 750                 ASSERT(MUTEX_NOT_HELD(SQLOCK(sq)));
 751                 entersq(sq, SQ_OPENCLOSE);
 752                 return (1);
 753         }
 754         /*
 755          * Sleep on sq_exitwait to only be woken up when threads leave the
 756          * put or service procedures. We can not sleep on sq_wait since an
 757          * outer_exit in a qwait running in the same outer perimeter would
 758          * cause a livelock "ping-pong" between two or more qwait'ers.
 759          */
 760         do {
 761                 sq->sq_flags |= SQ_WANTEXWAKEUP;
 762                 if (is_sq_cioc == 0) {
 763                         SQ_PUTLOCKS_EXIT(sq);
 764                 }
 765                 ret = cv_wait_sig(&sq->sq_exitwait, SQLOCK(sq));
 766                 if (is_sq_cioc == 0) {
 767                         SQ_PUTLOCKS_ENTER(sq);
 768                 }
 769         } while (ret && (sq->sq_flags & SQ_WANTEXWAKEUP));
 770         if (is_sq_cioc == 0) {
 771                 SQ_PUTLOCKS_EXIT(sq);
 772         }
 773         mutex_exit(SQLOCK(sq));
 774 
 775         /*
 776          * Re-enter the perimeters again
 777          */
 778         entersq(sq, SQ_OPENCLOSE);
 779         return (ret);
 780 }
 781 
 782 /*
 783  * Used by open and close procedures to "sleep" waiting for messages to
 784  * arrive. Note: can only be used in open and close procedures.
 785  *
 786  * Lower the gate and let in either messages on the syncq (if there are
 787  * any) or put/service procedures.
 788  *
 789  * If the queue has an outer perimeter this will not prevent entry into this
 790  * syncq (since outer_enter does not set SQ_WRITER on the syncq that gets the
 791  * exclusive access to the outer perimeter.)
 792  *
 793  * It only makes sense to grab sq_putlocks for !SQ_CIOC sync queues because
 794  * otherwise put entry points were not blocked in the first place. if this is
 795  * SQ_CIOC then qwait is used to wait for service procedure to run since syncq
 796  * is always SQ_CIPUT if it is SQ_CIOC.
 797  *
 798  * Note that SQ_EXCL is dropped and SQ_WANTEXITWAKEUP set in sq_flags
 799  * atomically under sq_putlocks to make sure putnext will not miss a pending
 800  * wakeup.
 801  */
 802 void
 803 qwait(queue_t *q)
 804 {
 805         syncq_t         *sq, *outer;
 806         uint_t          flags;
 807         int             is_sq_cioc;
 808 
 809         /*
 810          * Perform the same operations as a leavesq(sq, SQ_OPENCLOSE)
 811          * while detecting all cases where the perimeter is entered
 812          * so that qwait can return to the caller.
 813          *
 814          * Drain the syncq if possible. Otherwise reset SQ_EXCL and
 815          * wait for a thread to leave the syncq.
 816          */
 817         sq = q->q_syncq;
 818         ASSERT(sq);
 819         is_sq_cioc = (sq->sq_type & SQ_CIOC) ? 1 : 0;
 820         ASSERT(sq->sq_outer == NULL || sq->sq_outer->sq_flags & SQ_WRITER);
 821         outer = sq->sq_outer;
 822         /*
 823          * XXX this does not work if there is only an outer perimeter.
 824          * The semantics of qwait/qwait_sig are undefined in this case.
 825          */
 826         if (outer)
 827                 outer_exit(outer);
 828 
 829         mutex_enter(SQLOCK(sq));
 830         if (is_sq_cioc == 0) {
 831                 SQ_PUTLOCKS_ENTER(sq);
 832         }
 833         flags = sq->sq_flags;
 834         /*
 835          * Drop SQ_EXCL and sq_count but hold the SQLOCK
 836          * to prevent any undetected entry and exit into the perimeter.
 837          */
 838         ASSERT(sq->sq_count > 0);
 839         sq->sq_count--;
 840 
 841         if (is_sq_cioc == 0) {
 842                 ASSERT(flags & SQ_EXCL);
 843                 flags &= ~SQ_EXCL;
 844         }
 845         /*
 846          * Unblock any thread blocked in an entersq or outer_enter.
 847          * Note: we do not unblock a thread waiting in qwait/qwait_sig,
 848          * since that could lead to livelock with two threads in
 849          * qwait for the same (per module) inner perimeter.
 850          */
 851         if (flags & SQ_WANTWAKEUP) {
 852                 cv_broadcast(&sq->sq_wait);
 853                 flags &= ~SQ_WANTWAKEUP;
 854         }
 855         sq->sq_flags = flags;
 856         if ((flags & SQ_QUEUED) && !(flags & SQ_STAYAWAY)) {
 857                 if (is_sq_cioc == 0) {
 858                         SQ_PUTLOCKS_EXIT(sq);
 859                 }
 860                 /* drain_syncq() drops SQLOCK */
 861                 drain_syncq(sq);
 862                 ASSERT(MUTEX_NOT_HELD(SQLOCK(sq)));
 863                 entersq(sq, SQ_OPENCLOSE);
 864                 return;
 865         }
 866         /*
 867          * Sleep on sq_exitwait to only be woken up when threads leave the
 868          * put or service procedures. We can not sleep on sq_wait since an
 869          * outer_exit in a qwait running in the same outer perimeter would
 870          * cause a livelock "ping-pong" between two or more qwait'ers.
 871          */
 872         do {
 873                 sq->sq_flags |= SQ_WANTEXWAKEUP;
 874                 if (is_sq_cioc == 0) {
 875                         SQ_PUTLOCKS_EXIT(sq);
 876                 }
 877                 cv_wait(&sq->sq_exitwait, SQLOCK(sq));
 878                 if (is_sq_cioc == 0) {
 879                         SQ_PUTLOCKS_ENTER(sq);
 880                 }
 881         } while (sq->sq_flags & SQ_WANTEXWAKEUP);
 882         if (is_sq_cioc == 0) {
 883                 SQ_PUTLOCKS_EXIT(sq);
 884         }
 885         mutex_exit(SQLOCK(sq));
 886 
 887         /*
 888          * Re-enter the perimeters again
 889          */
 890         entersq(sq, SQ_OPENCLOSE);
 891 }
 892 
 893 /*
 894  * Used for the synchronous streams entrypoints when sleeping outside
 895  * the perimeters. Must never be called from regular put entrypoint.
 896  *
 897  * There's no need to grab sq_putlocks here (which only exist for CIPUT sync
 898  * queues). If it is CIPUT sync queue put entry points were not blocked in the
 899  * first place by rwnext/infonext which are treated as put entrypoints for
 900  * permiter syncronization purposes.
 901  *
 902  * Consolidation private.
 903  */
 904 boolean_t
 905 qwait_rw(queue_t *q)
 906 {
 907         syncq_t         *sq;
 908         ulong_t         flags;
 909         boolean_t       gotsignal = B_FALSE;
 910 
 911         /*
 912          * Perform the same operations as a leavesq(sq, SQ_PUT)
 913          * while detecting all cases where the perimeter is entered
 914          * so that qwait_rw can return to the caller.
 915          *
 916          * Drain the syncq if possible. Otherwise reset SQ_EXCL and
 917          * wait for a thread to leave the syncq.
 918          */
 919         sq = q->q_syncq;
 920         ASSERT(sq);
 921 
 922         mutex_enter(SQLOCK(sq));
 923         flags = sq->sq_flags;
 924         /*
 925          * Drop SQ_EXCL and sq_count but hold the SQLOCK until to prevent any
 926          * undetected entry and exit into the perimeter.
 927          */
 928         ASSERT(sq->sq_count > 0);
 929         sq->sq_count--;
 930         if (!(sq->sq_type & SQ_CIPUT)) {
 931                 ASSERT(flags & SQ_EXCL);
 932                 flags &= ~SQ_EXCL;
 933         }
 934         /*
 935          * Unblock any thread blocked in an entersq or outer_enter.
 936          * Note: we do not unblock a thread waiting in qwait/qwait_sig,
 937          * since that could lead to livelock with two threads in
 938          * qwait for the same (per module) inner perimeter.
 939          */
 940         if (flags & SQ_WANTWAKEUP) {
 941                 cv_broadcast(&sq->sq_wait);
 942                 flags &= ~SQ_WANTWAKEUP;
 943         }
 944         sq->sq_flags = flags;
 945         if ((flags & SQ_QUEUED) && !(flags & SQ_STAYAWAY)) {
 946                 /* drain_syncq() drops SQLOCK */
 947                 drain_syncq(sq);
 948                 ASSERT(MUTEX_NOT_HELD(SQLOCK(sq)));
 949                 entersq(sq, SQ_PUT);
 950                 return (B_FALSE);
 951         }
 952         /*
 953          * Sleep on sq_exitwait to only be woken up when threads leave the
 954          * put or service procedures. We can not sleep on sq_wait since an
 955          * outer_exit in a qwait running in the same outer perimeter would
 956          * cause a livelock "ping-pong" between two or more qwait'ers.
 957          */
 958         do {
 959                 sq->sq_flags |= SQ_WANTEXWAKEUP;
 960                 if (cv_wait_sig(&sq->sq_exitwait, SQLOCK(sq)) <= 0) {
 961                         sq->sq_flags &= ~SQ_WANTEXWAKEUP;
 962                         gotsignal = B_TRUE;
 963                         break;
 964                 }
 965         } while (sq->sq_flags & SQ_WANTEXWAKEUP);
 966         mutex_exit(SQLOCK(sq));
 967 
 968         /*
 969          * Re-enter the perimeters again
 970          */
 971         entersq(sq, SQ_PUT);
 972         return (gotsignal);
 973 }
 974 
 975 /*
 976  * Asynchronously upgrade to exclusive access at either the inner or
 977  * outer perimeter.
 978  */
 979 void
 980 qwriter(queue_t *q, mblk_t *mp, void (*func)(), int perim)
 981 {
 982         if (perim == PERIM_INNER)
 983                 qwriter_inner(q, mp, func);
 984         else if (perim == PERIM_OUTER)
 985                 qwriter_outer(q, mp, func);
 986         else
 987                 panic("qwriter: wrong \"perimeter\" parameter");
 988 }
 989 
 990 /*
 991  * Schedule a synchronous streams timeout
 992  */
 993 timeout_id_t
 994 qtimeout(queue_t *q, void (*func)(void *), void *arg, clock_t tim)
 995 {
 996         syncq_t         *sq;
 997         callbparams_t   *cbp;
 998         timeout_id_t    tid;
 999 
1000         sq = q->q_syncq;
1001         /*
1002          * you don't want the timeout firing before its params are set up
1003          * callbparams_alloc() acquires SQLOCK(sq)
1004          * qtimeout() can't fail and can't sleep, so panic if memory is not
1005          * available.
1006          */
1007         cbp = callbparams_alloc(sq, func, arg, KM_NOSLEEP | KM_PANIC);
1008         /*
1009          * the callbflags in the sq use the same flags. They get anded
1010          * in the callbwrapper to determine if a qun* of this callback type
1011          * is required. This is not a request to cancel.
1012          */
1013         cbp->cbp_flags = SQ_CANCEL_TOUT;
1014         /* check new timeout version return codes */
1015         tid = timeout(qcallbwrapper, cbp, tim);
1016         cbp->cbp_id = (callbparams_id_t)tid;
1017         mutex_exit(SQLOCK(sq));
1018         /* use local id because the cbp memory could be free by now */
1019         return (tid);
1020 }
1021 
1022 bufcall_id_t
1023 qbufcall(queue_t *q, size_t size, uint_t pri, void (*func)(void *), void *arg)
1024 {
1025         syncq_t         *sq;
1026         callbparams_t   *cbp;
1027         bufcall_id_t    bid;
1028 
1029         sq = q->q_syncq;
1030         /*
1031          * you don't want the timeout firing before its params are set up
1032          * callbparams_alloc() acquires SQLOCK(sq) if successful.
1033          */
1034         cbp = callbparams_alloc(sq, func, arg, KM_NOSLEEP);
1035         if (cbp == NULL)
1036                 return ((bufcall_id_t)0);
1037 
1038         /*
1039          * the callbflags in the sq use the same flags. They get anded
1040          * in the callbwrapper to determine if a qun* of this callback type
1041          * is required. This is not a request to cancel.
1042          */
1043         cbp->cbp_flags = SQ_CANCEL_BUFCALL;
1044         /* check new timeout version return codes */
1045         bid = bufcall(size, pri, qcallbwrapper, cbp);
1046         cbp->cbp_id = (callbparams_id_t)bid;
1047         if (bid == 0) {
1048                 callbparams_free(sq, cbp);
1049         }
1050         mutex_exit(SQLOCK(sq));
1051         /* use local id because the params memory could be free by now */
1052         return (bid);
1053 }
1054 
1055 /*
1056  * cancel a timeout callback which enters the inner perimeter.
1057  * cancelling of all callback types on a given syncq is serialized.
1058  * the SQ_CALLB_BYPASSED flag indicates that the callback fn did
1059  * not execute. The quntimeout return value needs to reflect this.
1060  * As with out existing callback programming model - callbacks must
1061  * be cancelled before a close completes - so ensuring that the sq
1062  * is valid when the callback wrapper is executed.
1063  */
1064 clock_t
1065 quntimeout(queue_t *q, timeout_id_t id)
1066 {
1067         syncq_t *sq = q->q_syncq;
1068         clock_t ret;
1069 
1070         mutex_enter(SQLOCK(sq));
1071         /* callbacks are processed serially on each syncq */
1072         while (sq->sq_callbflags & SQ_CALLB_CANCEL_MASK) {
1073                 sq->sq_flags |= SQ_WANTWAKEUP;
1074                 cv_wait(&sq->sq_wait, SQLOCK(sq));
1075         }
1076         sq->sq_cancelid = (callbparams_id_t)id;
1077         sq->sq_callbflags = SQ_CANCEL_TOUT;
1078         if (sq->sq_flags & SQ_WANTWAKEUP) {
1079                 cv_broadcast(&sq->sq_wait);
1080                 sq->sq_flags &= ~SQ_WANTWAKEUP;
1081         }
1082         mutex_exit(SQLOCK(sq));
1083         ret = untimeout(id);
1084         mutex_enter(SQLOCK(sq));
1085         if (ret != -1) {
1086                 /* The wrapper was never called - need to free based on id */
1087                 callbparams_free_id(sq, (callbparams_id_t)id, SQ_CANCEL_TOUT);
1088         }
1089         if (sq->sq_callbflags & SQ_CALLB_BYPASSED) {
1090                 ret = 0;        /* this was how much time left */
1091         }
1092         sq->sq_callbflags = 0;
1093         if (sq->sq_flags & SQ_WANTWAKEUP) {
1094                 cv_broadcast(&sq->sq_wait);
1095                 sq->sq_flags &= ~SQ_WANTWAKEUP;
1096         }
1097         mutex_exit(SQLOCK(sq));
1098         return (ret);
1099 }
1100 
1101 
1102 void
1103 qunbufcall(queue_t *q, bufcall_id_t id)
1104 {
1105         syncq_t *sq = q->q_syncq;
1106 
1107         mutex_enter(SQLOCK(sq));
1108         /* callbacks are processed serially on each syncq */
1109         while (sq->sq_callbflags & SQ_CALLB_CANCEL_MASK) {
1110                 sq->sq_flags |= SQ_WANTWAKEUP;
1111                 cv_wait(&sq->sq_wait, SQLOCK(sq));
1112         }
1113         sq->sq_cancelid = (callbparams_id_t)id;
1114         sq->sq_callbflags = SQ_CANCEL_BUFCALL;
1115         if (sq->sq_flags & SQ_WANTWAKEUP) {
1116                 cv_broadcast(&sq->sq_wait);
1117                 sq->sq_flags &= ~SQ_WANTWAKEUP;
1118         }
1119         mutex_exit(SQLOCK(sq));
1120         unbufcall(id);
1121         mutex_enter(SQLOCK(sq));
1122         /*
1123          * No indication from unbufcall if the callback has already run.
1124          * Always attempt to free it.
1125          */
1126         callbparams_free_id(sq, (callbparams_id_t)id, SQ_CANCEL_BUFCALL);
1127         sq->sq_callbflags = 0;
1128         if (sq->sq_flags & SQ_WANTWAKEUP) {
1129                 cv_broadcast(&sq->sq_wait);
1130                 sq->sq_flags &= ~SQ_WANTWAKEUP;
1131         }
1132         mutex_exit(SQLOCK(sq));
1133 }
1134 
1135 /*
1136  * Associate the stream with an instance of the bottom driver.  This
1137  * function is called by APIs that establish or modify the hardware
1138  * association (ppa) of an open stream.  Two examples of such
1139  * post-open(9E) APIs are the dlpi(7p) DL_ATTACH_REQ message, and the
1140  * ndd(1M) "instance=" ioctl(2).  This interface may be called from a
1141  * stream driver's wput procedure and from within syncq perimeters,
1142  * so it can't block.
1143  *
1144  * The qassociate() "model" is that it should drive attach(9E), yet it
1145  * can't really do that because driving attach(9E) is a blocking
1146  * operation.  Instead, the qassociate() implementation has complex
1147  * dependencies on the implementation behavior of other parts of the
1148  * kernel to ensure all appropriate instances (ones that have not been
1149  * made inaccessible by DR) are attached at stream open() time, and
1150  * that they will not autodetach.  The code relies on the fact that an
1151  * open() of a stream that ends up using qassociate() always occurs on
1152  * a minor node created with CLONE_DEV.  The open() comes through
1153  * clnopen() and since clnopen() calls ddi_hold_installed_driver() we
1154  * attach all instances and mark them DN_NO_AUTODETACH (given
1155  * DN_DRIVER_HELD is maintained correctly).
1156  *
1157  * Since qassociate() can't really drive attach(9E), there are corner
1158  * cases where the compromise described above leads to qassociate()
1159  * returning failure.  This can happen when administrative functions
1160  * that cause detach(9E), such as "update_drv" or "modunload -i", are
1161  * performed on the driver between the time the stream was opened and
1162  * the time its hardware association was established.  Although this can
1163  * theoretically be an arbitrary amount of time, in practice the window
1164  * is usually quite small, since applications almost always issue their
1165  * hardware association request immediately after opening the stream,
1166  * and do not typically switch association while open.  When these
1167  * corner cases occur, and qassociate() finds the requested instance
1168  * detached, it will return failure.  This failure should be propagated
1169  * to the requesting administrative application using the appropriate
1170  * post-open(9E) API error mechanism.
1171  *
1172  * All qassociate() callers are expected to check for and gracefully handle
1173  * failure return, propagating errors back to the requesting administrative
1174  * application.
1175  */
1176 int
1177 qassociate(queue_t *q, int instance)
1178 {
1179         vnode_t *vp;
1180         major_t major;
1181         dev_info_t *dip;
1182 
1183         if (instance == -1) {
1184                 ddi_assoc_queue_with_devi(q, NULL);
1185                 return (0);
1186         }
1187 
1188         vp = STREAM(q)->sd_vnode;
1189         major = getmajor(vp->v_rdev);
1190         dip = ddi_hold_devi_by_instance(major, instance,
1191             E_DDI_HOLD_DEVI_NOATTACH);
1192         if (dip == NULL)
1193                 return (-1);
1194 
1195         ddi_assoc_queue_with_devi(q, dip);
1196         ddi_release_devi(dip);
1197         return (0);
1198 }
1199 
1200 /*
1201  * This routine is the SVR4MP 'replacement' for
1202  * hat_getkpfnum.  The only major difference is
1203  * the return value for illegal addresses - since
1204  * sunm_getkpfnum() and srmmu_getkpfnum() both
1205  * return '-1' for bogus mappings, we can (more or
1206  * less) return the value directly.
1207  */
1208 ppid_t
1209 kvtoppid(caddr_t addr)
1210 {
1211         return ((ppid_t)hat_getpfnum(kas.a_hat, addr));
1212 }
1213 
1214 /*
1215  * This is used to set the timeout value for cv_timed_wait() or
1216  * cv_timedwait_sig().
1217  */
1218 void
1219 time_to_wait(clock_t *now, clock_t time)
1220 {
1221         *now = ddi_get_lbolt() + time;
1222 }