1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #ifndef _INET_NCA_H
  27 #define _INET_NCA_H
  28 
  29 #ifdef  __cplusplus
  30 extern "C" {
  31 #endif
  32 
  33 #include <sys/thread.h>
  34 #include <sys/door.h>
  35 #include <sys/disp.h>
  36 #include <sys/systm.h>
  37 #include <sys/processor.h>
  38 #include <sys/socket.h>
  39 #include <inet/common.h>
  40 #include <inet/ip.h>
  41 #include <inet/tcp.h>
  42 #include <inet/nca/ncadoorhdr.h>
  43 
  44 /*
  45  * The NCA debugging facilities provided via ADB and MDB depend on a
  46  * number of NCA implementation details.  In particular, note that:
  47  *
  48  *      * ADB macros *must* be revised whenever members are added or
  49  *        removed from the following structures:
  50  *
  51  *              nca_conn_t connf_t nca_cpu_t dcb_t hcb_t nca_if_t nca_io2_t
  52  *              node_t nodef_t sqfan_t nca_squeue_t tb_t te_t ti_t tw_t
  53  *
  54  *      * ADB macros should be added when new core data structures are
  55  *        added to NCA.  Generally, if you had to put it in here, you
  56  *        need to write a macro for it.
  57  *
  58  *      * MDB has many dependencies on the way core data structures
  59  *        are connected.  In general, if you break these dependencies,
  60  *        the MDB NCA module will fail to build.  However, breakage
  61  *        may go undetected (for instance, changing a linked list
  62  *        into a circularly linked list).  If you have any doubts,
  63  *        inspect the NCA module source before committing your changes.
  64  *
  65  *      * MDB depends on the following variables (and their current
  66  *        semantics) in order to function correctly:
  67  *
  68  *              nca_conn_fanout nca_conn_fanout_size nca_gv nca_lru
  69  *              urihash filehash
  70  *
  71  *        If you change the names or *semantics* of these variables,
  72  *        you must modify the MDB module accordingly.
  73  *
  74  *        In addition, you should consider whether the changes you've
  75  *        made should be reflected in the MDB dcmds themselves.
  76  */
  77 
  78 /* The queue to make upcall on for NCAfs */
  79 extern queue_t *ncaupcallq;
  80 extern kmutex_t ncaupcallq_lock;
  81 
  82 extern int nca_logging_on;
  83 extern int nca_conn_fanout_size;
  84 extern boolean_t nca_deferred_oq_if;
  85 extern boolean_t nca_fanout_iq_if;
  86 
  87 /* Checksum pointer for no checksum */
  88 
  89 #define NO_CKSUM (void *)-1
  90 
  91 /* undef any tcp.h:tcp_t members overloaded by the Solaris 8 tcp.h */
  92 
  93 #undef  tcp_last_rcv_lbolt
  94 #undef  tcp_state
  95 #undef  tcp_rto
  96 #undef  tcp_snd_ts_ok
  97 #undef  tcp_snd_ws_ok
  98 #undef  tcp_snxt
  99 #undef  tcp_swnd
 100 #undef  tcp_mss
 101 #undef  tcp_iss
 102 #undef  tcp_rnxt
 103 #undef  tcp_rwnd
 104 #undef  tcp_lport
 105 #undef  tcp_fport
 106 #undef  tcp_ports
 107 
 108 /* the iph_t is no longer defined in ip.h for Solaris 8 ? */
 109 
 110 /* Unaligned IP header */
 111 typedef struct iph_s {
 112         uchar_t iph_version_and_hdr_length;
 113         uchar_t iph_type_of_service;
 114         uchar_t iph_length[2];
 115         uchar_t iph_ident[2];
 116         uchar_t iph_fragment_offset_and_flags[2];
 117         uchar_t iph_ttl;
 118         uchar_t iph_protocol;
 119         uchar_t iph_hdr_checksum[2];
 120         uchar_t iph_src[4];
 121         uchar_t iph_dst[4];
 122 } iph_t;
 123 
 124 
 125 #define true    B_TRUE                  /* used with type boolean_t */
 126 #define false   B_FALSE                 /* used with type boolean_t */
 127 
 128 /*
 129  * Power of 2^N Primes useful for hashing for N of 0-28,
 130  * these primes are the nearest prime <= 2^N - 2^(N-2).
 131  */
 132 
 133 #define P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067,  \
 134                 6143, 12281, 24571, 49139, 98299, 196597, 393209,       \
 135                 786431, 1572853, 3145721, 6291449, 12582893, 25165813,  \
 136                 50331599, 100663291, 201326557, 0}
 137 
 138 /*
 139  * Serialization queue type (move to strsubr.h (stream.h?) as a general
 140  * purpose lightweight mechanism for mblk_t serialization ?).
 141  */
 142 typedef struct nca_squeue_s {
 143         uint16_t        sq_state;       /* state flags */
 144         uint16_t        sq_count;       /* message count */
 145         uint32_t        sq_type;        /* type flags */
 146         processorid_t   sq_bind;        /* processor to bind to */
 147         ddi_softintr_t  sq_softid;      /* softintr() id */
 148         void            (*sq_init)();   /* initialize function */
 149         void            *sq_init_arg;   /* initialize argument */
 150         void            (*sq_proc)();   /* process function */
 151         mblk_t          *sq_first;      /* first mblk chain or NULL */
 152         mblk_t          *sq_last;       /* last mblk chain or NULL */
 153         clock_t         sq_wait;        /* lbolts to wait after a fill() */
 154         clock_t         sq_iwait;       /* lbolt after nointr() */
 155         clock_t         sq_pwait;       /* lbolt after pause() */
 156         int             sq_isintr;      /* is being or was serviced by */
 157         timeout_id_t    sq_tid;         /* timer id of pending timeout() */
 158         kcondvar_t      sq_async;       /* async thread blocks on */
 159         kmutex_t        sq_lock;        /* lock before using any member */
 160         clock_t         sq_awaken;      /* time async thread was awakened */
 161         void            *sq_priv;       /* user defined private */
 162         kt_did_t        sq_ktid;        /* kernel thread id */
 163 } nca_squeue_t;
 164 
 165 /*
 166  * State flags and message count (i.e. properties that change)
 167  * Note: The MDB NCA module depends on the values of these flags.
 168  */
 169 
 170 #define SQS_CNT_TOOMANY 0x8000  /* message count toomany */
 171 
 172 /* nca_squeue_t state flags now only 16 bits */
 173 
 174 #define SQS_PROC        0x0001  /* being processed */
 175 #define SQS_WORKER      0x0002  /* worker thread */
 176 #define SQS_ENTER       0x0004  /* enter thread */
 177 #define SQS_FAST        0x0008  /* enter-fast thread */
 178 #define SQS_PROXY       0x0010  /* proxy thread */
 179 #define SQS_SOFTINTR    0x0020  /* softint thread */
 180                                 /* 0x00C0 bits not used */
 181 
 182 #define SQS_NOINTR      0x0100  /* no interrupt processing */
 183 #define SQS_PAUSE       0x0200  /* paused */
 184 #define SQS_INTRWAIT    0x0400  /* interrupt waiting */
 185 #define SQS_NOPROC      0x0800  /* no processing */
 186                                 /* 0x7000 bits not used */
 187 #define SQS_EXIT        0x8000  /* worker(s) exit */
 188 
 189 /*
 190  * Type flags (i.e. properties that don't change).
 191  * Note: The MDB NCA module depends on the values of these flags.
 192  */
 193 
 194 #define SQT_BIND_MASK   0xFF000000      /* bind flags mask */
 195 
 196 #define SQT_KMEM        0x00000001      /* was kmem_alloc()ed */
 197 #define SQT_DEFERRED    0x00000002      /* deferred processing */
 198 #define SQT_SOFTINTR    0x00000004      /* use softintr() */
 199 
 200 #define SQT_BIND_ANY    0x01000000      /* bind worker thread to any CPU */
 201 #define SQT_BIND_TO     0x02000000      /* bind worker thread to speced CPU */
 202 
 203 #define SQ_STATE_IS(sqp, flags) ((sqp)->sq_state & (flags))
 204 #define SQ_TYPE_IS(sqp, flags) ((sqp)->sq_type & (flags))
 205 
 206 
 207 typedef struct sqfan_s {
 208         uint32_t        flg;            /* flags only */
 209         uint32_t        cnt;            /* vector count */
 210         uint32_t        ix;             /* next sqv[] to process */
 211         uint32_t        drain;          /* max mblk(s) draind per */
 212         nca_squeue_t    **sqv;  /* pointer to nca_squeue_t pointer vector */
 213 } sqfan_t;
 214 
 215 #define SQF_DIST_CNT    0x0001  /* sqfan_t dist by queue count */
 216 #define SQF_DIST_IPv4   0x0002  /* sqfan_t dist by IPv4 src addr */
 217 
 218 /*
 219  * A multiphase timer is implemented using the te_t, tb_t, and ti_t structs.
 220  *
 221  * The multiple phases of timer entry execution are:
 222  *
 223  * 1) resource, execution is done from resource reclaim when the timer event
 224  *    is the freeing of the timed resource.
 225  *
 226  * 2) process, execution is done from process thread yield (idle/return).
 227  *
 228  * 3) time, execution is done from a timeout callback thread.
 229  *
 230  * Each of the phases have a seperate timer fire time represented by the
 231  * the ti_t members lbolt1, lbolt2, and lbolt3. Each lbolt is an absolute
 232  * lbolt value with lbolt1 <= lbolt2 <= lbolt3.
 233  */
 234 
 235 /*
 236  * te_t - timer entry.
 237  */
 238 
 239 typedef struct te_s {
 240         struct te_s *prev;      /* prev te_t */
 241         struct te_s *next;      /* next te_t */
 242         struct tb_s *tbp;       /* pointer to timer bucket */
 243         void    *ep;            /* pointer to encapsulating struct */
 244 } te_t;
 245 
 246 /*
 247  * tb_t - timer bucket.
 248  */
 249 
 250 typedef struct tb_s {
 251         struct tb_s *next;      /* next tb_t in ascending time order */
 252         clock_t exec;           /* te_t lbolt exec value for bucket */
 253         te_t    *head;          /* head of te_t list (first timer) */
 254         te_t    *tail;          /* tail of te_t list (last timer) */
 255 } tb_t;
 256 
 257 /*
 258  * ti_t - timer state.
 259  */
 260 
 261 typedef struct ti_s {
 262         clock_t exec;           /* next te_t exec value (0 = NONE) */
 263         clock_t lbolt1;         /* phase1 lbolt1 (0 = NONE) */
 264         clock_t lbolt2;         /* phase2 lbolt2 (0 = NONE) */
 265         clock_t lbolt3;         /* phase3 lbolt3 (0 = NONE) */
 266         tb_t    *head;          /* head of tb_t list (first timer bucket) */
 267         tb_t    *tail;          /* tail of tb_t list (last timer bucket) */
 268         timeout_id_t tid;       /* timer id of pending timeout() (0 = NONE) */
 269         void    *ep;            /* pointer to encapsulating struct */
 270 } ti_t;
 271 
 272 #define NCA_TI_INPROC   -1      /* Processing going on */
 273 #define NCA_TI_NONE     0       /* no lbolt */
 274 
 275 /*
 276  * TIME_WAIT grounded doubly linked list of nca_conn_t's awaiting TIME_WAIT
 277  * expiration for. This list is used for reclaim, reap, and timer based
 278  * processing.
 279  *
 280  * A multiphase timer is used:
 281  *
 282  * phase 1) reclaim of connections during connection allocation
 283  *
 284  * phase 2) reaping of connections during nca_squeue_t inq thread unwind
 285  *
 286  * phase 3) timeout of connections as a result of a timeout().
 287  *
 288  * Each of the phases have a seperate timer fire lbolt represented by the
 289  * the members lbolt1, lbolt2, and lbolt3, each is an absolute lbolt value
 290  * with lbolt1 <= lbolt2 <= lbolt3.
 291  */
 292 
 293 typedef struct tw_s {
 294         clock_t lbolt1;         /* phase1 lbolt value (0 = NONE) */
 295         clock_t lbolt2;         /* phase2 lbolt value  */
 296         clock_t lbolt3;         /* phase3 lbolt value  */
 297         struct nca_conn_s *head;        /* Head of nca_conn_t list */
 298         struct nca_conn_s *tail;        /* Tail of nca_conn_t list */
 299         timeout_id_t tid;       /* Timer id of pending timeout() (0 = NONE) */
 300         void    *ep;            /* pointer to encapsulating struct */
 301 } tw_t;
 302 
 303 #define NCA_TW_NONE     0       /* no lbolt */
 304 
 305 #define NCA_TW_MS       1000
 306 
 307 #define NCA_TW_LBOLT MSEC_TO_TICK(NCA_TW_MS)
 308 
 309 #define NCA_TW_LBOLTS(twp, future) {                                    \
 310         clock_t _lbolt = (future);                                      \
 311         clock_t _mod = _lbolt % NCA_TW_LBOLT;                           \
 312                                                                         \
 313         if (_mod) {                                                     \
 314                 /* Roundup to next TIME_WAIT bucket */                  \
 315                 _lbolt += NCA_TW_LBOLT - _mod;                          \
 316         }                                                               \
 317         if ((twp)->lbolt1 != _lbolt) {                                       \
 318                 (twp)->lbolt1 = _lbolt;                                      \
 319                 _lbolt += NCA_TW_LBOLT;                                 \
 320                 (twp)->lbolt2 = _lbolt;                                      \
 321                 _lbolt += NCA_TW_LBOLT;                                 \
 322                 (twp)->lbolt3 = _lbolt;                                      \
 323                 if ((twp)->tid != 0) {                                       \
 324                         (void) untimeout((twp)->tid);                        \
 325                         (twp)->tid = 0;                                      \
 326                 }                                                       \
 327                 if ((_lbolt) != NCA_TW_NONE) {                          \
 328                         (twp)->tid = timeout((pfv_t)nca_tw_fire, (twp),      \
 329                             (twp)->lbolt3 - ddi_get_lbolt());                \
 330                 }                                                       \
 331         }                                                               \
 332 }
 333 
 334 /*
 335  * The Node Fanout structure.
 336  *
 337  * The hash tables and their linkage (hashnext) are protected by the
 338  * per-bucket lock. Each node_t inserted in the list points back at
 339  * the nodef_t that heads the bucket (hashfanout).
 340  */
 341 
 342 typedef struct nodef_s {
 343         struct node_s   *head;
 344         kmutex_t        lock;
 345 } nodef_t;
 346 
 347 /*
 348  * A node_t is used to represent a cached byte-stream object. A node_t is
 349  * in one of four active states:
 350  *
 351  * 1) path != NULL, member of a node_t hash list with an object description
 352  *    (hashnext, size, path, pathsz members valid).
 353  *
 354  * 2) pp != NULL, 1) + phys pages allocated (pp, plrupn, plrunn members valid).
 355  *
 356  * 3) data != NULL, 2) + virt mapping allocated (data, datasz, vlrupn, vlrunn
 357  *    members valid).
 358  *
 359  * 4) cksum != NULL 3) + checksum mapping allocated
 360  */
 361 
 362 typedef struct node_s {
 363         uint32_t        ref;            /* ref (see below) state */
 364         uint32_t        cnt;            /* ref count */
 365         int32_t         size;           /* object size (-1 = UNKNOWN) */
 366         uint32_t        mss;            /* mblk(s) in size mss */
 367         uint64_t        ctag;           /* usr defined cache tag, 0 => no tag */
 368         ipaddr_t        laddr;          /* local IP, for virtual hosting */
 369         uint16_t        lport;          /* local port, for virtual hosting */
 370 
 371         struct node_s   *plrunn;        /* Phys LRU list next node_t */
 372         struct node_s   *plrupn;        /* Phys LRU list previous node_t */
 373         struct node_s   *vlrunn;        /* Virt LRU list next node_t */
 374         struct node_s   *vlrupn;        /* Virt LRU list previous node_t */
 375 
 376         nodef_t *hashfanout;            /* hash bucket we're part of */
 377         nodef_t *ctaghashfanout;        /* ctaghash bucket we're part off */
 378         struct node_s *hashnext;        /* hash list next node_t */
 379         struct node_s *ctaghashnext;    /* ctaghash list next node_t */
 380         struct nca_conn_s *connhead;    /* head of list of conn(s) in miss */
 381         struct nca_conn_s *conntail;    /* tail of list of conn(s) in miss */
 382         struct node_s *next;            /* needed if data is in chunks */
 383         struct node_s *back;            /* needed if data is in chunks */
 384 
 385         clock_t expire;         /* lbolt node_t expires (0 = NOW, -1 = NEVER) */
 386         time_t  lastmod;        /* HTTP "Last-Modified:" value */
 387 
 388         mblk_t  *req;           /* whole HTTP request (including headers) */
 389         int     reqsz;          /* size of above */
 390         int     reqcontl;       /* HTTP "Content-Length:" value */
 391         uint32_t rcv_cnt;       /* rcv_list byte count */
 392         mblk_t  *rcv_head;      /* rcv_list head */
 393         mblk_t  *rcv_tail;      /* rcv_list tail */
 394         mblk_t  *rcv_ptr;       /* rcv_list pointer */
 395 
 396         nca_squeue_t *sqp;      /* squeue node_t is being processed from */
 397         char    *path;          /* URI path component */
 398         int     pathsz;         /* size of above */
 399         uint_t  method;         /* HTTP request method */
 400         uint_t  version;        /* HTTP request version */
 401         char    *reqhdr;        /* HTTP request header(s) */
 402         int     reqhdrsz;       /* size of above */
 403         char    *reqhost;       /* HTTP "Host:" string */
 404         int     reqhostsz;      /* size of above */
 405         char    *reqaccept;     /* HTTP "Accept:" string */
 406         int     reqacceptsz;    /* size of above */
 407         char    *reqacceptl;    /* HTTP "Accept-Language:" string */
 408         int     reqacceptlsz;   /* size of above */
 409 
 410         page_t  **pp;           /* page pointer vector for data */
 411         char    *data;          /* data buffer */
 412         int     datasz;         /* size of above */
 413         uint16_t *cksum;        /* cksum() vector for data by mss */
 414         size_t  cksumlen;       /* length of memory block for above vector */
 415         uint_t  resbody;        /* HTTP response body at &data[resbody] */
 416 
 417         int     hlen;           /* data buffer split header len */
 418         int     fileoff;        /* file include offset */
 419         int     filelen;        /* length of file */
 420         struct node_s *fileback; /* head node_t of a file list (-1 for death) */
 421         struct node_s *filenext; /* next node_t of a file list */
 422         struct node_s *ctagback; /* head node_t of a ctag list */
 423         struct node_s *ctagnext; /* next node_t of a ctag list */
 424         vnode_t *filevp;        /* vnode for the file */
 425 
 426         kmutex_t lock;          /* serializes access to node_t */
 427         frtn_t  frtn;           /* STREAMS free routine; always node_freeb() */
 428         boolean_t headchunk;    /* true if this node is the head chunk */
 429 
 430         /*
 431          * The following 4 fields are used to record node states when
 432          * upcalls are preempted. When preempted upcalls are not relevant,
 433          * these fields should have default value 0.
 434          */
 435         uint8_t advise;         /* an interpreted advise from http */
 436         boolean_t last_advisory; /* preempted upcall state -- advisory bit */
 437         boolean_t advisory;     /* need advisory from httpd before use */
 438         boolean_t first_upcall; /* node in first upcall, a internal state */
 439 
 440         kcondvar_t cv;          /* sync upcall/downcall process on a node */
 441         int     onqueue;        /* == 1 if on miss_queue, debug aid */
 442 } node_t;
 443 
 444 /* Note: The MDB NCA module depends on the values of these flags. */
 445 
 446 #define REF_URI         0x80000000 /* & ref = node_t URI hashed */
 447 #define REF_PHYS        0x40000000 /* & ref = phys mapping in-use */
 448 #define REF_VIRT        0x20000000 /* & ref = virt mapping in-use */
 449 #define REF_CKSUM       0x10000000 /* & ref = checksum mapping in-use */
 450 #define REF_KMEM        0x08000000 /* & ref = kmem mapped (PHYS|VIRT) */
 451 #define REF_DONE        0x04000000 /* & ref = node_t fill is done */
 452 #define REF_SAFED       0x02000000 /* & ref = node_t not safe for use */
 453 #define REF_FILE        0x01000000 /* & ref = node_t filename hashed */
 454 #define REF_RESP        0x00800000 /* & ref = node_t response header parsed */
 455 #define REF_NOLRU       0x00400000 /* & ref = node_t not safe for lru reclaim */
 456 #define REF_MISS        0x00200000 /* & ref = node_t is/will missed() proc */
 457 #define REF_ONPLRU      0x00100000 /* & ref = node_t is on Phys LRU */
 458 #define REF_ONVLRU      0x00080000 /* & ref = node_t is on Virt LRU */
 459 #define REF_PREEMPT     0x00040000 /* & ref = node_t processing preempted */
 460 #define REF_CTAG        0x00020000 /* & ref = node_t CTAG hashed */
 461 #define REF_UPCALL      0x00010000 /* & ref = node_t upcall not yet complete */
 462 #define REF_OWNED       0x00008000 /* & ref = node_t owned (won't be freed) */
 463 #define REF_ERROR       0x00004000 /* & ref = node_t errored */
 464 #define REF_VNODE       0x00002000 /* & ref = node_t vnode hashed */
 465 #define REF_NCAFS       0x00001000 /* & ref = node_t is NCAfs required */
 466 #define REF_SEGMAP      0x00000800 /* & ref = segmapped (PHYS|VIRT) */
 467 #define REF_UNUSED      0x000007FF /* & ref = UNUSED */
 468 /*
 469  * Mappings where no seperate PHYS and VIRT, i.e. single mapping with a
 470  * virtual address e.g. REF_KMEM and REF_SEGMAP.
 471  */
 472 #define REF_NOVIRT      (REF_KMEM | REF_SEGMAP)
 473 
 474 /* Is this node safe for reclaim ? */
 475 #define REF_RECLAIM     (REF_SAFED | REF_NOLRU | REF_MISS)
 476 
 477 /*
 478  * NCA node_t reference counting is more complicated than nca_conn_t reference
 479  * counting because we pass parts of node_t's (masquerading as dblk
 480  * buffers) into the STREAMS subsystem which eventually get freed by
 481  * network drivers just like regular dblk buffers.  Also, unlike nca_conn_t's,
 482  * we may wish to keep a node_t around even after there are no outstanding
 483  * references, since it's possible that it will be requested again.
 484  *
 485  * Thus, the node_t reference count reflects the number of active codepaths
 486  * in Solaris making use of a given node_t -- each codepath that requires
 487  * that the node_t stick around once it drops the node_t lock must acquire
 488  * a reference via NODE_REFHOLD and drop that reference via NODE_REFRELE
 489  * when done.  Note that following a NODE_REFRELE the node that was
 490  * released may no longer exist and thus it should not be referenced unless
 491  * the codepath has another outstanding reference.  When a node_t is passed
 492  * into the STREAMS subsystem via desballoc() and related interfaces, a
 493  * NODE_REFHOLD should be placed on the node_t and the free routine should
 494  * be set to node_freeb(), which will in turn call NODE_REFRELE.
 495  *
 496  * The concept of node ownership allows NCA to express that it would like
 497  * this node to hang around, even if there are no "explicit" references to
 498  * it (the ownership counts as an implicit reference).  All "headchunk"
 499  * hashed nodes are owned when they are created.  If they subsequently
 500  * become disowned (currently via nca_node_del() or nca_reclaim_vlru()),
 501  * they may have some or all their resources freed (via node_fr()) as soon
 502  * as the last reference to them is removed.  Note that it's possible that
 503  * a disowned node may become of interest again before some or all of its
 504  * resources were reclaimed -- in this case, it must be reowned via
 505  * NODE_OWN.  Note that an unhashed node should never be owned, though it
 506  * of course may be held and released; this is because there is no sense
 507  * in owning a node which is merely temporary (i.e., not hashed somewhere).
 508  * Note that the corollary of this statement is not true -- that is, just
 509  * because a node is hashed does not mean it is owned (it may have been
 510  * disowned via nca_reclaim_vlru()) -- this is why code must always reown
 511  * hashed nodes if it's desirable to have them stick around.
 512  *
 513  * All four macros *must* be called with the node lock held.  However,
 514  * NODE_DISOWN and NODE_REFRELE return with the lock unlocked (if there is
 515  * still a lock at all), because the operation may have just removed the
 516  * final reference to a node and it may no longer exist.
 517  *
 518  * A version of NODE_REFRELE is provided which doesn't unlock the lock but
 519  * can only be used when the caller can gaurantee that it's not the last ref
 520  * (e.g. the caller has another outstanding reference) as if it's the last
 521  * ref the node_t may no longer exist. The new macro is NODE_REFRELE_LOCKED.
 522  */
 523 
 524 #define NODE_DISOWN(np) {                                               \
 525                                                                         \
 526         NODE_T_TRACE((np), NODE_T_TRACE_DISOWN);                        \
 527         ASSERT(mutex_owned(&(np)->lock));                                \
 528                                                                         \
 529         if ((np)->ref & REF_OWNED) {                                     \
 530                 if ((np)->cnt == 0)  {                               \
 531                         panic("nca NODE_DISOWN: %p has no references",  \
 532                             (void *)(np));                              \
 533                 }                                                       \
 534                 (np)->ref &= ~REF_OWNED;                         \
 535                 NODE_REFRELE(np);                                       \
 536         } else {                                                        \
 537                 mutex_exit(&(np)->lock);                         \
 538         }                                                               \
 539 }
 540 
 541 #define NODE_OWN(np) {                                                  \
 542                                                                         \
 543         NODE_T_TRACE((np), NODE_T_TRACE_OWN);                           \
 544         ASSERT(mutex_owned(&(np)->lock));                                \
 545                                                                         \
 546         if (!((np)->ref & REF_OWNED)) {                                  \
 547                 if ((np)->cnt == UINT_MAX)                           \
 548                         panic(                                          \
 549                             "nca NODE_OWN: %p has too many references", \
 550                             (void *)(np));                              \
 551                 (np)->ref |= REF_OWNED;                                      \
 552                 (np)->cnt++;                                         \
 553         }                                                               \
 554 }
 555 
 556 #define NODE_REFHOLD(np) {                                              \
 557                                                                         \
 558         NODE_T_TRACE((np), NODE_T_TRACE_REFHOLD | ((np)->cnt + 1));  \
 559         ASSERT(mutex_owned(&(np)->lock));                                \
 560                                                                         \
 561         if ((np)->cnt == UINT_MAX)                                   \
 562                 panic("nca NODE_REFHOLD: %p has too many references",   \
 563                     (void *)(np));                                      \
 564         (np)->cnt++;                                                 \
 565 }
 566 
 567 #define NODE_REFRELE(np) {                                              \
 568                                                                         \
 569         NODE_T_TRACE((np), NODE_T_TRACE_REFRELE | ((np)->cnt - 1));  \
 570         ASSERT(mutex_owned(&(np)->lock));                                \
 571                                                                         \
 572         if (((np)->ref & REF_OWNED) && (np)->cnt == 1)                        \
 573                 panic(                                                  \
 574                     "nca NODE_REFRELE: %p has only OWNED reference",    \
 575                     (void *)(np));                                      \
 576         if ((np)->cnt == 0)                                          \
 577                 panic("nca NODE_REFRELE: %p has no references",         \
 578                     (void *)(np));                                      \
 579         (np)->cnt--;                                                 \
 580         if ((np)->cnt == 0) {                                                \
 581                 ASSERT(((np)->ref & REF_OWNED) == 0);                    \
 582                 node_fr(np);            /* node_fr unlocks the lock */  \
 583         } else {                                                        \
 584                 mutex_exit(&(np)->lock);                         \
 585         }                                                               \
 586 }
 587 
 588 #define NODE_REFRELE_LOCKED(np) {                                       \
 589         uint_t  _cnt = (np)->cnt;                                    \
 590                                                                         \
 591         NODE_T_TRACE((np), NODE_T_TRACE_REFRELE | (_cnt - 1));          \
 592         ASSERT(mutex_owned(&(np)->lock));                                \
 593                                                                         \
 594         if ((np)->ref & REF_OWNED)                                       \
 595                 _cnt--;                                                 \
 596         if (((np)->ref & REF_OWNED) && _cnt == 0)                        \
 597                 panic("nca NODE_REFRELE_LOCKED: "                       \
 598                     "%p has only OWNED reference", (void *)(np));       \
 599         if (_cnt == 0)                                                  \
 600                 panic("nca NODE_REFRELEL_LOCKED: "                      \
 601                     "%p has no references", (void *)(np));              \
 602         if (_cnt == 1)                                                  \
 603                 panic("nca NODE_REFRELEL_LOCKED: "                      \
 604                     "%p has only one reference", (void *)(np));         \
 605         (np)->cnt--;                                                 \
 606 }
 607 
 608 
 609 /*
 610  * NODE_T_TRACE - trace node_t events.
 611  *
 612  * adb:
 613  * 32 bit
 614  *      *node_tp,0t8192-(((*node_tp)-node_tv)%0t48)/PXXDDnPnPnPnPnPnPnPnn
 615  *      node_tv,((*node_tp)-node_tv)%0t48/PXXDDnPnPnPnPnPnPnPnn
 616  *
 617  * 64 bit
 618  *      *node_tp,0t8192-(((*node_tp)-node_tv)%0t56)/PXXDDnXnXnXnXnXnXnXnn
 619  *      node_tv,((*node_tp)-node_tv)%0t56/PXXDDnXnXnXnXnXnXnXnn
 620  *
 621  * For incremental node tracing, note the value of node_tp (node_tp/X) after
 622  * a run, then replace that in the 2nd line for node_tv.
 623  */
 624 
 625 #define NODE_T_STK_DEPTH        6
 626 
 627 struct node_ts {
 628         node_t  *node;
 629         unsigned action;
 630         unsigned ref;
 631         unsigned cnt;
 632         int     cpu;
 633         pc_t    stk[NODE_T_STK_DEPTH + 1];
 634 };
 635 
 636 #undef  NODE_T_TRACE_ON
 637 
 638 #ifdef  NODE_T_TRACE_ON
 639 
 640 #define NODE_T_TRACE_ALLOC      0xFF000000      /* kmem_alloc() of */
 641 #define NODE_T_TRACE_ADD        0xFE000000      /* node_add() */
 642 
 643 #define NODE_T_TRACE_OWN        0xEF000000      /* node has been owned */
 644 #define NODE_T_TRACE_DISOWN     0xEE000000      /* node has been disowned */
 645 #define NODE_T_TRACE_DESBALLOC  0xED000000      /* desballoc() */
 646 #define NODE_T_TRACE_REFRELE    0xEC000000      /* refrele */
 647 #define NODE_T_TRACE_REFHOLD    0xEB000000      /* refhold */
 648 #define NODE_T_TRACE_NODE_FR    0xEA000000      /* node_fr() */
 649 
 650 #define NODE_T_TRACE_TEMPNODE   0xDF000000      /* node_temp() */
 651 #define NODE_T_TRACE_REPLACE    0xDE000000      /* node_replace() */
 652 #define NODE_T_TRACE_FLUSH      0xDD000000      /* node_flush() */
 653 #define NODE_T_TRACE_DOWNCALL   0xDC000000      /* downcall_service() */
 654 #define NODE_T_TRACE_DOWNCALL_2 0xDB000000      /* dcall_service->httpd_data */
 655 
 656 #define NODE_T_TRACE_DATA       0xCF000000      /* httpd_data() */
 657 
 658 #define NODE_T_TRACE_LRU        0xAF000000      /* nca_lru insert */
 659 #define NODE_T_TRACE_HTTPD      0xAE000000      /* call nca_httpd() */
 660 #define NODE_T_TRACE_MISS       0xAD000000      /* http_miss() */
 661 #define NODE_T_TRACE_TEMP       0xAC000000      /* np != *npp */
 662 #define NODE_T_TRACE_XMIT       0xAB000000      /* tcp_xmit() */
 663 #define NODE_T_TRACE_MISSED     0xAA000000      /* nca_missed() */
 664 
 665 #define NODE_T_TRACE_DEL        0x00000000      /* node_del() */
 666 
 667 #if defined(__i386) || defined(__amd64)
 668 #define NODE_T_TRACE_STK() {                                            \
 669         _ix = getpcstack(&_p->stk[0], NODE_T_STK_DEPTH + 1);             \
 670         if (_ix < NODE_T_STK_DEPTH + 1) {                            \
 671                 _p->stk[_ix + 1] = 0;                                        \
 672         }                                                               \
 673 }
 674 #else
 675 #define NODE_T_TRACE_STK() {                                            \
 676         _p->stk[0] = (pc_t)callee();                                 \
 677         _ix = getpcstack(&_p->stk[1], NODE_T_STK_DEPTH);         \
 678         if (_ix < NODE_T_STK_DEPTH) {                                        \
 679                 _p->stk[_ix + 1] = 0;                                        \
 680         }                                                               \
 681 }
 682 #endif
 683 
 684 #define NODE_TV_SZ 8192
 685 
 686 extern struct node_ts node_tv[NODE_TV_SZ];
 687 extern struct node_ts *node_tp;
 688 
 689 #define NODE_T_TRACE(p, a) {                                            \
 690         struct node_ts *_p;                                             \
 691         struct node_ts *_np;                                            \
 692         int    _ix;                                                     \
 693                                                                         \
 694         do {                                                            \
 695                 _p = node_tp;                                           \
 696                 if ((_np = _p + 1) == &node_tv[NODE_TV_SZ])         \
 697                         _np = node_tv;                                  \
 698         } while (casptr(&node_tp, _p, _np) != _p);                  \
 699         _p->node = (p);                                                      \
 700         _p->action = (a);                                            \
 701         _p->ref = (p) ? (p)->ref : 0;                                     \
 702         _p->cnt = (p) ? (p)->cnt : 0;                                     \
 703         _p->cpu = CPU->cpu_seqid;                                 \
 704         NODE_T_TRACE_STK();                                             \
 705 }
 706 
 707 #else   /* NODE_T_TRACE_ON */
 708 
 709 #define NODE_T_TRACE(p, a)
 710 
 711 #endif  /* NODE_T_TRACE_ON */
 712 
 713 /*
 714  * DOOR_TRACE - trace door node_t events.
 715  *
 716  * adb:
 717  * 32 bit
 718  *      *door_tp,0t8192-(((*door_tp)-door_tv)%0t112)/5XnPnPnPnPnPnPnPn64cnn
 719  *      door_tv,((*door_tp)-door_tv)%0t112/5XnPnPnPnPnPnPnPn64cnn
 720  * 64 bit
 721  *      *door_tp,0t8192-(((*door_tp)-door_tv)%0t128)/PXPXXnXnXnXnXnXnXnXn64cnn
 722  *      door_tv,((*door_tp)-door_tv)%0t128/PXPXXnXnXnXnXnXnXnXn64cnn
 723  */
 724 
 725 #define DOOR_STK_DEPTH  6
 726 
 727 struct door_ts {
 728         struct nca_conn_s *cp;
 729         unsigned action;
 730         node_t  *np;
 731         int     ref;
 732         unsigned state;
 733         pc_t    stk[DOOR_STK_DEPTH + 1];
 734         char    data[64];
 735 };
 736 
 737 #undef  DOOR_TRACE_ON
 738 
 739 #ifdef  DOOR_TRACE_ON
 740 
 741 #define DOOR_TRACE_UPCALL       0xF0000000      /* upcall() */
 742 #define DOOR_TRACE_UPCALL_RAW   0xF1000000      /* upcall() RAW ? */
 743 #define DOOR_TRACE_UPCALL_RET   0xFF000000      /* upcall() return */
 744 
 745 #define DOOR_TRACE_DOWNCALL     0xE0000000      /* downcall() */
 746 #define DOOR_TRACE_CONNECT      0xE1000000      /* connect() */
 747 #define DOOR_TRACE_CONNECT_DATA 0xE2000000      /* connect() */
 748 #define DOOR_TRACE_DIRECTFROM   0xE3000000      /* tee_splice() from */
 749 #define DOOR_TRACE_DIRECTTO     0xE4000000      /* tee_splice() to */
 750 #define DOOR_TRACE_DOWNCALL_RET 0xEF000000      /* downcall() return */
 751 
 752 #define DOOR_TRACE_INIT         0x80000000      /* doorcall_init() */
 753 #define DOOR_TRACE_INIT_RET     0x88000000      /* doorcall_init() return */
 754 
 755 #if defined(__i386) || defined(__amd64)
 756 #define DOOR_TRACE_STK() {                                              \
 757         _ix = getpcstack(&_p->stk[0], DOOR_STK_DEPTH + 1);               \
 758         if (_ix < DOOR_STK_DEPTH + 1) {                                      \
 759                 _p->stk[_ix] = 0;                                    \
 760         }                                                               \
 761 }
 762 #else
 763 #define DOOR_TRACE_STK() {                                              \
 764         _p->stk[0] = (pc_t)callee();                                 \
 765         _ix = getpcstack(&_p->stk[1], DOOR_STK_DEPTH);                   \
 766         if (_ix < DOOR_STK_DEPTH) {                                  \
 767                 _p->stk[_ix + 1] = 0;                                        \
 768         }                                                               \
 769 }
 770 #endif
 771 
 772 #define DOOR_TV_SZ 8192
 773 
 774 extern struct door_ts door_tv[DOOR_TV_SZ];
 775 extern struct door_ts *door_tp;
 776 
 777 #define DOOR_TRACE(io, d, d_sz, a) {                            \
 778         nca_conn_t *_cp = (io) ? (nca_conn_t *)(io)->cid : (nca_conn_t *)NULL; \
 779         node_t *_req_np = _cp ? _cp->req_np : (node_t *)NULL;                \
 780         struct door_ts *_p;                                             \
 781         struct door_ts *_np;                                            \
 782         int    _ix;                                                     \
 783                                                                         \
 784         do {                                                            \
 785                 _p = door_tp;                                           \
 786                 if ((_np = _p + 1) == &door_tv[DOOR_TV_SZ])         \
 787                         _np = door_tv;                                  \
 788         } while (casptr(&door_tp, _p, _np) != _p);                  \
 789         _p->cp = _cp;                                                        \
 790         _p->np = _req_np;                                            \
 791         _p->action = (a);                                            \
 792         _p->ref = _req_np ? _req_np->ref : 0;                             \
 793         if ((io)) {                                                     \
 794                 _p->state = ((io)->op == http_op ? 0x80000000 : 0) |      \
 795                             ((io)->more ? 0x40000000 : 0) |          \
 796                             ((io)->first ? 0x20000000 : 0) |         \
 797                             ((io)->advisory ? 0x10000000 : 0) |              \
 798                             ((io)->nocache ? 0x08000000 : 0) |               \
 799                             ((io)->preempt ? 0x04000000 : 0) |               \
 800                             ((io)->peer_len ? 0x02000000 : 0) |              \
 801                             ((io)->local_len ? 0x01000000 : 0) |     \
 802                             ((io)->data_len ? 0x00800000 : 0) |              \
 803                             (((io)->direct_type << 20) & 0x00700000) |     \
 804                             ((io)->direct_len ? 0x00080000 : 0) |    \
 805                             ((io)->trailer_len ? 0x00040000 : 0) |   \
 806                             (((io)->peer_len + (io)->local_len +  \
 807                             (io)->data_len + (io)->direct_len +           \
 808                             (io)->trailer_len) & 0x3FFFF);               \
 809         } else {                                                        \
 810                 _p->state = 0;                                               \
 811         }                                                               \
 812         if ((d_sz)) {                                                   \
 813                 int _n = MIN((d_sz), 63);                               \
 814                                                                         \
 815                 bcopy((d), _p->data, _n);                            \
 816                 bzero(&_p->data[_n], 64 - _n);                           \
 817         } else {                                                        \
 818                 bzero(_p->data, 64);                                 \
 819         }                                                               \
 820         DOOR_TRACE_STK();                                               \
 821 }
 822 
 823 #else   /* DOOR_TRACE_ON */
 824 
 825 #define DOOR_TRACE(io, d, d_sz, a)
 826 
 827 #endif  /* DOOR_TRACE_ON */
 828 
 829 /*
 830  * NCA node LRU cache.  Defined here so that the NCA mdb module can use it.
 831  */
 832 typedef struct lru_s {
 833         node_t          *phead; /* Phys LRU list head (MRU) */
 834         node_t          *ptail; /* Phys LRU list tail (LRU) */
 835         node_t          *vhead; /* Virt LRU list head (MRU) */
 836         node_t          *vtail; /* Virt LRU list tail (LRU) */
 837 
 838         uint32_t        pcount; /* Phys count of node_t members */
 839         uint32_t        vcount; /* Virt count of node_t members */
 840 
 841         kmutex_t        lock;   /* Guarantee atomic access of above */
 842 } lru_t;
 843 
 844 /*
 845  * Per CPU instance structure.
 846  *
 847  * 32-bit adb: XXXnnDnnXXnnXXnnXDnnXXnn228+na
 848  * 64-bit adb: PPPnnD4+nnPPnnPPnnJDnnJ180+na
 849  */
 850 
 851 typedef struct nca_cpu_s {
 852 
 853         node_t *persist_hdr_none;
 854         node_t *persist_hdr_close;
 855         node_t *persist_hdr_ka;
 856 
 857         uint32_t dcb_readers;   /* count of dcb_list readers for this CPU */
 858 
 859         nca_squeue_t *if_inq;   /* if_t input nca_squeue_t */
 860         nca_squeue_t *if_ouq;   /* if_t output nca_squeue_t */
 861 
 862         ti_t    *tcp_ti;        /* TCP TIMER list */
 863         tw_t    *tcp_tw;        /* TCP TIME_WAIT list */
 864 
 865         ddi_softintr_t soft_id; /* soft interrupt id for if_inq worker */
 866         int     if_inq_cnt;     /* count of if_t.inq references */
 867 
 868         char    pad[256 - sizeof (node_t *) - sizeof (node_t *) -
 869                     sizeof (node_t *) - sizeof (uint32_t) -
 870                     sizeof (nca_squeue_t *) - sizeof (nca_squeue_t *) -
 871                     sizeof (ti_t *) - sizeof (tw_t *) -
 872                     sizeof (ddi_softintr_t) - sizeof (int)];
 873 } nca_cpu_t;
 874 
 875 extern nca_cpu_t *nca_gv;       /* global per CPU state indexed by cpu_seqid */
 876 
 877 /*
 878  * hcb_t - host control block.
 879  *
 880  * Used early on in packet switching to select packets to be serviced by NCA
 881  * and optionally later on by the HTTP protocol layer to further select HTTP
 882  * request to be serviced.
 883  *
 884  * dcb_t - door control block.
 885  *
 886  * Used to associate one or more hcb_t(s) with a given httpd door instance.
 887  *
 888  * dcb_list - dcb_t global list, a singly linked grounded list of dcb_t's.
 889  *
 890  * Used to search for a hcb_t match, currently a singly linked grounded list
 891  * of dcb_t's with a linear walk of the list. While this is adequate for the
 892  * current httpd support (i.e. a single door) a move to either a hash or tree
 893  * will be required for multiple httpd instance support (i.e. multiple doors).
 894  *
 895  * The dcb_list is protected by a custom reader/writer lock, the motivation
 896  * for using a custom lock instead of a krwlock_t is that this lock is the
 897  * single hot spot in NCA (i.e. all in-bound packets must acquire this lock)
 898  * and a nonlocking atomic readers count scheme is used in the common case
 899  * (i.e. reader lock) with a fall-back to a conventional kmutex_t for writer
 900  * (i.e. ndd list add/delete).
 901  */
 902 
 903 typedef struct hcb_s {
 904         struct hcb_s    *next;          /* Next hcb_t (none: NULL) */
 905         ipaddr_t        addr;           /* IP address (any: INADDR_ANY or 0) */
 906         uint16_t        port;           /* TCP port number */
 907         char            *host;          /* Host: name (any: NULL) */
 908         ssize_t         hostsz;         /* Size of above */
 909         char            *root;          /* Document root ("/": NULL) */
 910         ssize_t         rootsz;         /* Size of above */
 911 } hcb_t;
 912 
 913 typedef struct dcb_s {
 914         struct dcb_s    *next;          /* Next dcb_t (none: NULL) */
 915         char            *door;          /* Door file (default: NULL) */
 916         ssize_t         doorsz;         /* Size of above */
 917         door_handle_t   hand;           /* Door handle (default: NULL) */
 918         hcb_t           list;           /* Head of a hcb_t list (any: NULL) */
 919 } dcb_t;
 920 
 921 extern dcb_t dcb_list;
 922 extern kmutex_t nca_dcb_lock;
 923 extern kcondvar_t nca_dcb_wait;
 924 extern kmutex_t nca_dcb_readers;
 925 
 926 #define NOHANDLE ((door_handle_t)-1)
 927 
 928 #define DCB_COUNT_USELOCK       0x80000000
 929 #define DCB_COUNT_MASK          0x3FFFFFFF
 930 
 931 #define DCB_RD_ENTER(cpu) {                                             \
 932         uint32_t *rp;                                                   \
 933                                                                         \
 934         cpu = CPU->cpu_seqid;                                                \
 935         rp = &nca_gv[cpu].dcb_readers;                                      \
 936         while (atomic_add_32_nv(rp, 1) & DCB_COUNT_USELOCK) {               \
 937                 /* Need to use the lock, so do the dance */             \
 938                 mutex_enter(&nca_dcb_lock);                         \
 939                 if (atomic_add_32_nv(rp, -1) == DCB_COUNT_USELOCK &&    \
 940                     CV_HAS_WAITERS(&nca_dcb_wait)) {                        \
 941                         /* May be the last reader for this CPU */       \
 942                         cv_signal(&nca_dcb_wait);                   \
 943                 }                                                       \
 944                 mutex_exit(&nca_dcb_lock);                          \
 945                 mutex_enter(&nca_dcb_readers);                              \
 946                 /*                                                      \
 947                  * We block above waiting for the writer to exit the    \
 948                  * readers lock, if we didn't block then while we were  \
 949                  * away in the nca_dcb_lock enter the writer exited,    \
 950                  * we could optimize for this case by checking USELOCK  \
 951                  * after the decrement, but as this is an exceptional   \
 952                  * case not in the fast-path we'll just take the hit    \
 953                  * of a needless readers enter/exit.                    \
 954                  */                                                     \
 955                 mutex_exit(&nca_dcb_readers);                               \
 956         }                                                               \
 957 }
 958 
 959 #define DCB_RD_EXIT(cpu) {                                              \
 960         uint32_t *rp = &nca_gv[cpu].dcb_readers;                    \
 961                                                                         \
 962         if (atomic_add_32_nv(rp, -1) == DCB_COUNT_USELOCK) {            \
 963                 mutex_enter(&nca_dcb_lock);                         \
 964                 if (CV_HAS_WAITERS(&nca_dcb_wait)) {                        \
 965                         /* May be the last reader for this CPU */       \
 966                         cv_signal(&nca_dcb_wait);                   \
 967                 }                                                       \
 968                 mutex_exit(&nca_dcb_lock);                          \
 969         }                                                               \
 970 }
 971 
 972 #define DCB_WR_ENTER() {                                                \
 973         int cpu;                                                        \
 974         int readers;                                                    \
 975                                                                         \
 976         mutex_enter(&nca_dcb_readers);                                      \
 977         mutex_enter(&nca_dcb_lock);                                 \
 978         for (;;) {                                                      \
 979                 readers = 0;                                            \
 980                 for (cpu = 0; cpu < max_ncpus; cpu++) {                      \
 981                         int new;                                        \
 982                         uint32_t *rp = &nca_gv[cpu].dcb_readers;    \
 983                         int old = *rp;                                  \
 984                                                                         \
 985                         if (old & DCB_COUNT_USELOCK) {                      \
 986                                 readers += old & DCB_COUNT_MASK;    \
 987                                 continue;                               \
 988                         }                                               \
 989                         new = old | DCB_COUNT_USELOCK;                  \
 990                         while (cas32(rp, old, new) != old) {            \
 991                                 old = *rp;                              \
 992                                 new = old | DCB_COUNT_USELOCK;          \
 993                         }                                               \
 994                         readers += (new & DCB_COUNT_MASK);          \
 995                 }                                                       \
 996                 if (readers == 0)                                       \
 997                         break;                                          \
 998                 cv_wait(&nca_dcb_wait, &nca_dcb_lock);                  \
 999         }                                                               \
1000         mutex_exit(&nca_dcb_lock);                                  \
1001 }
1002 
1003 #define DCB_WR_EXIT() {                                                 \
1004         int cpu;                                                        \
1005                                                                         \
1006         mutex_enter(&nca_dcb_lock);                                 \
1007         for (cpu = 0; cpu < max_ncpus; cpu++) {                              \
1008                 int new;                                                \
1009                 uint32_t *rp = &nca_gv[cpu].dcb_readers;            \
1010                 int old = *rp;                                          \
1011                                                                         \
1012                 new = old & ~DCB_COUNT_USELOCK;                             \
1013                 while (cas32(rp, old, new) != old) {                    \
1014                         old = *rp;                                      \
1015                         new = old & ~DCB_COUNT_USELOCK;                     \
1016                 }                                                       \
1017         }                                                               \
1018         mutex_exit(&nca_dcb_lock);                                  \
1019         mutex_exit(&nca_dcb_readers);                                       \
1020 }
1021 
1022 typedef struct nca_door_s {
1023         door_handle_t   handle;         /* The door handle */
1024         char            *name;          /* The door name */
1025         kmutex_t        lock;           /* The door lock */
1026         kcondvar_t      cv_writer;      /* condvar for thread waiting */
1027                                         /* to do door_init */
1028         kcondvar_t      cv_reader;      /* condvar for thread waiting */
1029                                         /* for a door_init to finish */
1030         uint32_t        upcalls;        /* Number of upcalls in progress */
1031         boolean_t       init_waiting;   /* door_init thread wanting to */
1032                                         /* be exclusive */
1033 } nca_door_t;
1034 
1035 /*
1036  * if_t - interface per instance data.
1037  */
1038 
1039 typedef struct if_s {
1040 
1041         boolean_t dev;          /* is a device instance */
1042 
1043         queue_t *rqp;           /* our read-side STREAMS queue */
1044         queue_t *wqp;           /* our write-side STREAMS queue */
1045 
1046         /* DLPI M_DATA IP fastpath template */
1047         size_t  mac_length;
1048         mblk_t  *mac_mp;
1049         int32_t mac_mtu;
1050         int32_t mac_addr_len;
1051 
1052         uint32_t ip_ident;      /* our IP ident value */
1053 
1054         boolean_t hwcksum;      /* underlying NIC supports checksum offload */
1055 
1056         nca_squeue_t *inq;              /* in-bound nca_squeue_t */
1057         nca_squeue_t *ouq;              /* out-bound nca_squeue_t */
1058 
1059         /*
1060          * All if_t are associated with a CPU and have a default
1061          * router on link are chained in a circular linked list.
1062          */
1063         struct if_s *next_if;
1064         struct if_s *prev_if;
1065         ipaddr_t local_addr;    /* This interface's IP address. */
1066         uchar_t router_ether_addr[6];
1067 
1068         uint_t  hdr_ioc_id;     /* id of DL_IOC_HDR_INFO M_IOCTL sent down */
1069         boolean_t info_req_pending;
1070 
1071         int32_t capab_state;    /* Capability probe state */
1072 
1073         /* Bound local address of a NCAfs instance. */
1074         struct sockaddr_in      bound_addr;
1075 } if_t;
1076 
1077 /*
1078  * connf_t - connection fanout data.
1079  *
1080  * The hash tables and their linkage (hashnextp, hashprevp) are protected
1081  * by the per-bucket lock. Each nca_conn_t inserted in the list points back at
1082  * the connf_t that heads the bucket.
1083  */
1084 
1085 typedef struct connf_s {
1086         uint32_t        max;
1087         struct nca_conn_s       *head;
1088         kmutex_t        lock;
1089 } connf_t;
1090 
1091 #ifdef  CONNP_T_TRACE_ON
1092 
1093 #define CONNP_TV_SZ 32
1094 
1095 /*
1096  * Per nca_conn_t packet tracing.
1097  */
1098 typedef struct connp_s {
1099         clock_t         lbolt;
1100         clock_t         tcp_ti;
1101         int32_t         len : 16,
1102                         dir : 1,
1103                         state : 4,
1104                         flags : 6,
1105                         xmit_np : 1,
1106                         xmit_head : 1,
1107                         unsent : 1,
1108                         tail_unsent : 1,
1109                         direct : 1;
1110         uint32_t        state1;
1111         uint32_t        state2;
1112         uint32_t        seq;
1113         uint32_t        ack;
1114         uint32_t        snxt;
1115         uint32_t        swnd;
1116 } connp_t;
1117 
1118 #endif  /* CONNP_T_TRACE_ON */
1119 
1120 /*
1121  * nca_conn_t - connection per instance data.
1122  *
1123  * Note: hashlock is used to provide atomic access to all nca_conn_t members
1124  * above it. All other members are protected by the per CPU inq nca_squeue_t
1125  * which is used to serialize access to all nca_conn_t's per interface.
1126  *
1127  * Note: the nca_conn_t can have up to 3 NODE_REFHOLDs:
1128  *
1129  *      1) if req_np != NULL then a NODE_REFHOLD(req_np) was done:
1130  *
1131  *          1.1) if http_refed then a NODE_REFHOLD(req_np) was done
1132  *
1133  *          1.2) if http_frefed then a NODE_REFHOLD(req_np->fileback) was done
1134  *
1135  *
1136  * TODO: reorder elements in fast-path code access order.
1137  *
1138  * Dnn4XnXXDnnDnnXXXnnXXXnnUXnnXXXnnXXnnDDXXXDXDXDXnnDnnXXDDnXXXDDnnXXXDDnn
1139  * XXXDDnnXXXDDnnXXXDDnnXXnnDXXnn
1140  * b+++DDnAnDDDDDnnDnnUnnUUDXDUnnDnn20xnnXnnddnnUUUnnXXUnXXnnUUUnn
1141  * DDDDDDnnUUnnXXUXUnn4UD4Unn4UnUUnn
1142  * 64-bit: Xnn4+4pnnppEnEnn3pnn3pnnEJnnXXnnuunn4+ppnnXX3pD4+pD4+pD4+pnnEnnppnnD
1143  */
1144 
1145 #define TCP_XMIT_MAX_IX 5               /* Max xmit descriptors */
1146 
1147 typedef struct nca_conn_s {
1148 
1149         int32_t ref;                    /* Reference counter */
1150 
1151         te_t    tcp_ti;                 /* TCP TIMER timer entry */
1152 
1153         struct nca_conn_s       *twnext;        /* TIME_WAIT next */
1154         struct nca_conn_s       *twprev;        /* TIME_WAIT prev */
1155         clock_t twlbolt;                /* TIME_WAIT lbolt */
1156 
1157         clock_t create;                 /* Create lbolt time */
1158 
1159         connf_t *hashfanout;            /* Hash bucket we're part of */
1160         struct nca_conn_s       *hashnext;      /* Hash chain next */
1161         struct nca_conn_s       *hashprev;      /* Hash chain prev */
1162 
1163         struct nca_conn_s       *bindnext;      /* Next conn_s in bind list. */
1164         struct nca_conn_s       *bindprev;      /* Prev conn_s in bind list. */
1165         void            *tbf;           /* Pointer to bind hash list struct. */
1166         /*
1167          * Note: atomic access of memebers above is guaranteed by the
1168          * hashfanout->lock of the hash bucket that the nca_conn_t is in.
1169          */
1170 
1171         size_t  mac_length;             /* MAC prepend length */
1172         mblk_t  *mac_mp;                /* MAC prepend data */
1173 
1174         ipaddr_t        laddr;          /* Local address */
1175         ipaddr_t        faddr;          /* Remote address. 0 => not connected */
1176 
1177         union {
1178                 struct {
1179                         uint16_t u_fport; /* Remote port */
1180                         uint16_t u_lport; /* Local port */
1181                 } u_ports1;
1182                 uint32_t u_ports2;      /* Rem port, local port */
1183                                         /* Used for TCP_MATCH performance */
1184         } u_port;
1185 #define conn_lport      u_port.u_ports1.u_lport
1186 #define conn_fport      u_port.u_ports1.u_fport
1187 #define conn_ports      u_port.u_ports2
1188 
1189         if_t    *ifp;                   /* Interface for this connection */
1190         nca_squeue_t *inq;              /* Per CPU inq for this connection */
1191 
1192         uint32_t req_tag;               /* nca_io_t request tag (0 == NONE) */
1193         int     req_parse;              /* HTTP request parse state */
1194         node_t  *req_np;                /* HTTP request node_t */
1195         mblk_t  *req_mp;                /* HTTP request mblk_t */
1196         char    *reqpath;               /* HTTP request URI path component */
1197         int     reqpathsz;              /* size of above */
1198         char    *reqrefer;              /* HTTP "Referer:" string */
1199         int     reqrefersz;             /* size of above */
1200         char    *requagent;             /* HTTP "User-Agent:" string */
1201         int     requagentsz;            /* size of above */
1202         struct nca_conn_s *nodenext;    /* Node_t nca_conn_t list */
1203 
1204         clock_t http_count;             /* HTTP Keep-Alive request count */
1205 
1206         /*
1207          * req_np xmit state used accross calls to tcp_xmit(). A reference
1208          * to the req_np and to any inderect node_t (i.e. file/ctag) ...
1209          */
1210         node_t  *xmit_refed;            /* have a ref to the uri node_t */
1211         node_t  *xmit_cur;              /* current node to transmit */
1212 
1213         int     xmit_ix;                /* current xmit[] index */
1214         int     xmit_pix;               /* past end xmit[] index */
1215 
1216         struct {
1217                 node_t  *np;            /* node_t pointer for ref */
1218                 char    *dp;            /* data pointer */
1219                 uint16_t *cp;           /* cksum array */
1220                 int     sz;             /* remaining data to xmit */
1221                 int     iso;            /* initial segment offset (if any) */
1222                 node_t  *refed;         /* have a ref to the node_t */
1223                 int     dsz;            /* remaining data for current segment */
1224                 caddr_t *dvp;           /* data segment virtual pointer */
1225         } xmit[TCP_XMIT_MAX_IX];
1226 
1227         /*
1228          * Connection NCA_IO_DIRECT_SPLICE & NCA_IO_DIRECT_TEE reference,
1229          * see direct_splice and direct_tee below for type of send too.
1230          */
1231         struct nca_conn_s       *direct; /* nca_conn_t to send recv data too */
1232         mblk_t          *direct_mp;      /* mblk_t to use for tcp_close() */
1233 
1234         /*
1235          * nca_conn_t state.
1236          */
1237 
1238         int32_t tcp_state;
1239 
1240         uint32_t
1241                 tcp_urp_last_valid : 1, /* Is tcp_urp_last valid? */
1242                 tcp_hard_binding : 1,   /* If we've started a full bind */
1243                 tcp_hard_bound : 1,     /* If we've done a full bind with IP */
1244                 tcp_fin_acked : 1,      /* Has our FIN been acked? */
1245 
1246                 tcp_fin_rcvd : 1,       /* Have we seen a FIN? */
1247                 tcp_fin_sent : 1,       /* Have we sent our FIN yet? */
1248                 tcp_ordrel_done : 1,    /* Have we sent the ord_rel upstream? */
1249                 tcp_flow_stopped : 1,   /* Have we flow controlled xmitter? */
1250 
1251                 tcp_debug : 1,          /* SO_DEBUG "socket" option. */
1252                 tcp_dontroute : 1,      /* SO_DONTROUTE "socket" option. */
1253                 tcp_broadcast : 1,      /* SO_BROADCAST "socket" option. */
1254                 tcp_useloopback : 1,    /* SO_USELOOPBACK "socket" option. */
1255 
1256                 tcp_oobinline : 1,      /* SO_OOBINLINE "socket" option. */
1257                 tcp_dgram_errind : 1,   /* SO_DGRAM_ERRIND option */
1258                 tcp_detached : 1,       /* If we're detached from a stream */
1259                 tcp_bind_pending : 1,   /* Client is waiting for bind ack */
1260 
1261                 tcp_unbind_pending : 1, /* Client sent T_UNBIND_REQ */
1262                 tcp_deferred_clean_death : 1,
1263                                         /* defer tcp endpoint cleanup etc. */
1264                 tcp_co_wakeq_done : 1,  /* A strwakeq() has been done */
1265                 tcp_co_wakeq_force : 1, /* A strwakeq() must be done */
1266 
1267                 tcp_co_norm : 1,        /* In normal mode, putnext() done */
1268                 tcp_co_wakeq_need : 1,  /* A strwakeq() needs to be done */
1269                 tcp_snd_ws_ok : 1,      /* Received WSCALE from peer */
1270                 tcp_snd_ts_ok : 1,      /* Received TSTAMP from peer */
1271 
1272                 tcp_linger : 1,         /* SO_LINGER turned on */
1273                 tcp_zero_win_probe: 1,  /* Zero win probing is in progress */
1274                 tcp_loopback: 1,        /* src and dst are the same machine */
1275                 tcp_localnet: 1,        /* src and dst are on the same subnet */
1276 
1277                 tcp_syn_defense: 1,     /* For defense against SYN attack */
1278 #define tcp_dontdrop    tcp_syn_defense
1279                 tcp_set_timer : 1,
1280                 tcp_1_junk_fill_thru_bit_31 : 2;
1281 
1282         uint32_t
1283                 tcp_active_open: 1,     /* This is a active open */
1284                 tcp_timeout : 1,        /* qbufcall failed, qtimeout pending */
1285                 tcp_rexmit : 1,         /* TCP is retransmitting */
1286                 tcp_snd_sack_ok : 1,    /* Can use SACK for this connection */
1287 
1288                 tcp_bind_proxy_addr : 1,        /* proxy addr is being used */
1289                 tcp_recvdstaddr : 1,    /* return T_EXTCONN_IND with dst addr */
1290                 tcp_refed : 1,          /* nca_conn_t refed by TCP */
1291                 tcp_time_wait_comp : 1, /* TIME_WAIT compressed nca_conn_t */
1292 
1293                 tcp_close : 1,          /* nca_conn_t close */
1294                 http_persist : 3,       /* HTTP persistent connection state */
1295 
1296                 deferred_xmit_end : 1,  /* xmit_end() deferred to xmit() */
1297                 http_direct_splice : 1, /* have a connection to splice too */
1298                 http_direct_tee : 1,    /* have a connection to tee too */
1299 
1300                 tcp_2_junk_fill_thru_bit_31 : 17;
1301 /*
1302  * Note: all nca_conn_t members to be accessed by a tcp_time_wait_comp
1303  * nca_conn_t must be above this point !!!
1304  */
1305 
1306         uchar_t tcp_timer_backoff;      /* Backoff shift count. */
1307         clock_t tcp_last_recv_time;     /* Last time we receive a segment. */
1308         clock_t tcp_dack_set_time;      /* When delayed ACK timer is set. */
1309 
1310         int     tcp_ip_hdr_len;         /* Byte len of our current IP header */
1311         clock_t tcp_first_timer_threshold;  /* When to prod IP */
1312         clock_t tcp_second_timer_threshold; /* When to give up completely */
1313         clock_t tcp_first_ctimer_threshold; /* 1st threshold while connecting */
1314         clock_t tcp_second_ctimer_threshold; /* 2nd ... while connecting */
1315 
1316         clock_t tcp_last_rcv_lbolt; /* lbolt on last packet, used for PAWS */
1317 
1318 
1319         uint32_t tcp_obsegs;            /* Outbound segments on this stream */
1320 
1321         uint32_t tcp_mss;               /* Max segment size */
1322         uint32_t tcp_naglim;            /* Tunable nagle limit */
1323         int32_t tcp_hdr_len;            /* Byte len of combined TCP/IP hdr */
1324         tcph_t  *tcp_tcph;              /* tcp header within combined hdr */
1325         int32_t tcp_tcp_hdr_len;        /* tcp header len within combined */
1326         uint32_t        tcp_valid_bits;
1327 #define TCP_ISS_VALID   0x1     /* Is the tcp_iss seq num active? */
1328 #define TCP_FSS_VALID   0x2     /* Is the tcp_fss seq num active? */
1329 #define TCP_URG_VALID   0x4     /* If the tcp_urg seq num active? */
1330 
1331         int32_t tcp_xmit_hiwater;       /* Send buffer high water mark. */
1332 
1333         union {                         /* template ip header */
1334                 ipha_t  tcp_u_ipha;
1335                 char    tcp_u_buf[IP_SIMPLE_HDR_LENGTH+TCP_MIN_HEADER_LENGTH];
1336                 double  tcp_u_aligner;
1337         } tcp_u;
1338 #define tcp_ipha        tcp_u.tcp_u_ipha
1339 #define tcp_iphc        tcp_u.tcp_u_buf
1340 
1341         uint32_t tcp_sum;               /* checksum to compensate for source */
1342                                         /* routed packets. Host byte order */
1343 
1344         uint16_t tcp_last_sent_len;     /* Record length for nagle */
1345         uint16_t tcp_dupack_cnt;        /* # of consequtive duplicate acks */
1346 
1347         uint32_t tcp_rnxt;              /* Seq we expect to recv next */
1348         uint32_t tcp_rwnd;              /* Current receive window */
1349         uint32_t tcp_rwnd_max;          /* Maximum receive window */
1350 
1351         mblk_t  *tcp_rcv_head;          /* Queued until push, urgent data or */
1352         mblk_t  *tcp_rcv_tail;          /* the count exceeds */
1353         uint32_t tcp_rcv_cnt;           /* tcp_rcv_push_wait. */
1354 
1355         mblk_t  *tcp_reass_head;        /* Out of order reassembly list head */
1356         mblk_t  *tcp_reass_tail;        /* Out of order reassembly list tail */
1357 
1358         uint32_t tcp_cwnd_ssthresh;     /* Congestion window */
1359         uint32_t tcp_cwnd_max;
1360         uint32_t tcp_csuna;             /* Clear (no rexmits in window) suna */
1361 
1362         int     tcp_rttv_updates;
1363         clock_t tcp_rto;                /* Round trip timeout */
1364         clock_t tcp_rtt_sa;             /* Round trip smoothed average */
1365         clock_t tcp_rtt_sd;             /* Round trip smoothed deviation */
1366         clock_t tcp_rtt_update;         /* Round trip update(s) */
1367         clock_t tcp_ms_we_have_waited;  /* Total retrans time */
1368 
1369         uint32_t tcp_swl1;              /* These help us avoid using stale */
1370         uint32_t tcp_swl2;              /*  packets to update state */
1371 
1372         mblk_t  *tcp_xmit_head;         /* Head of rexmit list */
1373         mblk_t  *tcp_xmit_last;         /* last valid data seen by tcp_wput */
1374         uint32_t tcp_unsent;            /* # of bytes in hand that are unsent */
1375         mblk_t  *tcp_xmit_tail;         /* Last rexmit data sent */
1376         uint32_t tcp_xmit_tail_unsent;  /* # of unsent bytes in xmit_tail */
1377 
1378         uint32_t tcp_snxt;              /* Senders next seq num */
1379         uint32_t tcp_suna;              /* Sender unacknowledged */
1380         uint32_t tcp_rexmit_nxt;        /* Next rexmit seq num */
1381         uint32_t tcp_rexmit_max;        /* Max retran seq num */
1382         int32_t tcp_snd_burst;          /* Send burst factor */
1383         uint32_t tcp_swnd;              /* Senders window (relative to suna) */
1384         uint32_t tcp_cwnd;              /* Congestion window */
1385         int32_t tcp_cwnd_cnt;           /* cwnd cnt in congestion avoidance */
1386         uint32_t tcp_ackonly;           /* Senders last ack seq num */
1387 
1388         uint32_t tcp_irs;               /* Initial recv seq num */
1389         uint32_t tcp_iss;               /* Initial send seq num */
1390         uint32_t tcp_fss;               /* Final/fin send seq num */
1391         uint32_t tcp_urg;               /* Urgent data seq num */
1392 
1393         uint32_t tcp_rack;              /* Seq # we have acked */
1394         uint32_t tcp_rack_cnt;          /* # of bytes we have deferred ack */
1395 
1396         uint32_t tcp_max_swnd;          /* Maximum swnd we have seen */
1397         int64_t tcp_rexmit_fire_time;
1398         int64_t tcp_dack_fire_time;
1399         int64_t tcp_ka_fire_time;
1400         int64_t tcp_http_ka_fire_time;
1401 
1402         int32_t tcp_keepalive_intrvl;   /* Zero means don't bother */
1403         int32_t tcp_ka_probe_sent;
1404         int32_t tcp_ka_last_intrvl;
1405 
1406 #define TCP_DACK_TIMER          0x1
1407 #define TCP_REXMIT_TIMER        0x2
1408 #define TCP_KA_TIMER            0x4
1409 #define TCP_HTTP_KA_TIMER       0x8
1410         int16_t         tcp_running_timer;
1411         int16_t         tcp_pending_timer;
1412 
1413 #ifdef  CONNP_T_TRACE_ON
1414         connp_t *pkt_tp;                /* Packet tracing pointer */
1415         connp_t pkt_tv[CONNP_TV_SZ];    /* Packet tracing vector */
1416 #endif  /* CONNP_T_TRACE_ON */
1417 
1418 } nca_conn_t;
1419 
1420 /*
1421  * Active stack support parameters to control what ports NCA can use.
1422  * They are declared in ncaproto.c
1423  */
1424 extern struct nca_tbf_s *nca_tcp_port;
1425 extern in_port_t tcp_lo_port;
1426 extern in_port_t tcp_hi_port;
1427 
1428 /*
1429  * nca_conn_t.http_persist values and corresponding HTTP header strings are
1430  * used to determine the connection persistent state of a connection and
1431  * any HTTP header which needs to be sent.
1432  */
1433 
1434 #define PERSIST_NONE            0       /* Not persistent */
1435 
1436 #define PERSIST_CLOSE           1       /* Was persistent, send close header */
1437 #define PERSIST_TRUE            2       /* Connection is HTTP persistent */
1438 #define PERSIST_KA              3       /* Persistent, send Keep-Alive header */
1439 #define PERSIST_UPCALL          4       /* Insert "Connection: close" on */
1440                                         /* upcall and clear flag */
1441 
1442 #define PERSIST_HDR_NONE        "\r\n"
1443 #define PERSIST_HDR_CLOSE       "Connection: close\r\n\r\n"
1444 #define PERSIST_HDR_KA          "Connection: Keep-Alive\r\n\r\n"
1445 
1446 /*
1447  * nca_conn_t nca_squeue_ctl() flag values:
1448  */
1449 
1450 #define CONN_MISS_DONE          0x0001  /* The conn miss processing is done */
1451 #define IF_TIME_WAIT            0x0002  /* A TIME_WAIT has fired */
1452 #define IF_TCP_TIMER            0x0003  /* A TCP TIMER has fired */
1453 #define NCA_CONN_TCP_TIMER      0x0004  /* A TCP TIMER needs to be execed */
1454 #define IF_TCP_CONNECT          0x0005  /* TCP connection request */
1455 #define IF_TCP_SEND             0x0006  /* A new send request. */
1456 
1457 #define IF_TCP_DIRECT_TO        0x0010  /* A TCP direct i/o, step 1 */
1458 #define IF_TCP_DIRECT_FROM      0x0012  /* A TCP direct i/o, step 2 */
1459 #define IF_TCP_DIRECT_TEE       0x0001  /* If a tee else a splice */
1460 #define IF_TCP_DIRECT_CLOSE     0x001F  /* A TCP direct i/o close */
1461 
1462 #define NCA_CONN_T_STK_DEPTH    7       /* max stack backtrace depth */
1463 
1464 struct conn_ts {
1465         nca_conn_t      *conn;
1466         unsigned action;
1467         int     ref;
1468         int     cpu;
1469         pc_t    stk[NCA_CONN_T_STK_DEPTH + 1];
1470 };
1471 
1472 #undef  NCA_CONN_T_TRACE_ON
1473 
1474 #ifdef  NCA_CONN_T_TRACE_ON
1475 
1476 /*
1477  * adb:
1478  * 32 bit
1479  *      *conn_tp,0t4096-(((*conn_tp)-con_tv)%0t48)/PXDDnPnPnPnPnPnPnPnPnn
1480  *      con_tv,((*conn_tp)-con_tv)%0t48/PXDDnPnPnPnPnPnPnPnPnn
1481  * 64 bit
1482  *      *conn_tp,0t4096-(((*conn_tp)-con_tv)%0t56)/PXDDnXnXnXnXnXnXnXnXnn
1483  *      con_tv,((*conn_tp)-con_tv)%0t56/PXDDnXnXnXnXnXnXnXnXnn
1484  */
1485 
1486 #define NCA_CONN_T_REFINIT      0x10000000      /* CONN_REF init() |ref value */
1487 #define NCA_CONN_T_REFINIT1     0x11000000      /* CONN_REF init() |ref value */
1488 #define NCA_CONN_T_REFINIT2     0x12000000      /* CONN_REF init() |ref value */
1489 #define NCA_CONN_T_REFNOTCP     0x13000000 /* CONN_REF no longer tcp_refed */
1490 #define NCA_CONN_T_REFHOLD      0x1A000000      /* CONN_REFHOLD() | ref value */
1491 #define NCA_CONN_T_REFRELE      0x1F000000      /* CONN_REFRELE() | ref value */
1492 
1493 #define NCA_CONN_T_HTTPCALL     0x20000000      /* call http() | rbytes */
1494 #define NCA_CONN_T_HTTPRET1     0x21000000      /* return http() */
1495 #define NCA_CONN_T_HTTPRET2     0x22000000      /* return ! http() */
1496 
1497 #define NCA_CONN_T_MISSDONE     0x30000000      /* CONN_MISS_DONE */
1498 #define NCA_CONN_T_TCPTIMER     0x31000000      /* NCA_CONN_TCP_TIMER */
1499 #define NCA_CONN_T_XMIT_END     0x32000000      /* xmit_end() | tcp_unsent */
1500 #define NCA_CONN_T_XMIT_BAD     0x33000000 /* xmit_end() bad state |tcp_state */
1501 #define NCA_CONN_T_XMIT_DEF     0x34000000      /* xmit_end() deferred */
1502 #define NCA_CONN_T_TIME_WAIT 0x35000000 /* done: tcp_state == TCPS_TIME_WAIT */
1503 #define NCA_CONN_T_PKT_IN       0x36000000      /* tcp_input() | flags */
1504 #define NCA_CONN_T_PKT_OUT      0x37000000      /* tcp_input() | flags */
1505 
1506 #define NCA_CONN_T_DIRECT       0x40000000      /* tcp_direct() from conn_t */
1507 #define NCA_CONN_T_DIRECT1      0x41000000      /* tcp_direct() to conn_t */
1508 #define NCA_CONN_T_DIRECT2      0x42000000      /* IF_TCP_DIRECT_TO | TEE */
1509 #define NCA_CONN_T_DIRECT3      0x43000000      /* IF_TCP_DIRECT_FROM | TEE */
1510 #define NCA_CONN_T_DIRECT4      0x44000000      /* tcp_close() */
1511 #define NCA_CONN_T_DIRECT5      0x45000000      /* IF_TCP_DIRECT_CLOSE */
1512                                                 /* from|tcp_state */
1513 #define NCA_CONN_T_DIRECT6      0x46000000      /* IF_TCP_DIRECT_CLOSE to */
1514 
1515 #if defined(__i386) || defined(__amd64)
1516 #define NCA_CONN_T_TRACE_STK() {                                        \
1517         _ix = getpcstack(&_p->stk[0], NCA_CONN_T_STK_DEPTH + 1); \
1518         if (_ix < NCA_CONN_T_STK_DEPTH + 1) {                                \
1519                 _p->stk[_ix + 1] = 0;                                        \
1520         }                                                               \
1521 }
1522 #else
1523 #define NCA_CONN_T_TRACE_STK() {                                        \
1524         _p->stk[0] = (pc_t)callee();                                 \
1525         _ix = getpcstack(&_p->stk[1], NCA_CONN_T_STK_DEPTH);             \
1526         if (_ix < NCA_CONN_T_STK_DEPTH) {                            \
1527                 _p->stk[_ix + 1] = 0;                                        \
1528         }                                                               \
1529 }
1530 #endif
1531 
1532 #define CON_TV_SZ 4096
1533 
1534 extern struct conn_ts con_tv[CON_TV_SZ];
1535 extern struct conn_ts *conn_tp;
1536 
1537 #define NCA_CONN_T_TRACE(p, a) {                                        \
1538         struct conn_ts *_p;                                             \
1539         struct conn_ts *_np;                                            \
1540         int    _ix;                                                     \
1541                                                                         \
1542         do {                                                            \
1543                 _p = conn_tp;                                   \
1544                 if ((_np = _p + 1) == &con_tv[CON_TV_SZ])   \
1545                         _np = con_tv;                           \
1546         } while (casptr(&conn_tp, _p, _np) != _p);                  \
1547         _p->conn = (p);                                                      \
1548         _p->action = (a);                                            \
1549         _p->ref = (p)->ref;                                               \
1550         _p->cpu = CPU->cpu_seqid;                                 \
1551         NCA_CONN_T_TRACE_STK();                                         \
1552 }
1553 
1554 #else   /* NCA_CONN_T_TRACE_ON */
1555 
1556 #define NCA_CONN_T_TRACE(p, a)
1557 
1558 #endif  /* NCA_CONN_T_TRACE_ON */
1559 
1560 
1561 #define CONN_REFHOLD(connp) {                                           \
1562                                                                         \
1563         NCA_CONN_T_TRACE((connp), NCA_CONN_T_REFHOLD | ((connp)->ref + 1)); \
1564                                                                         \
1565         if ((connp)->ref <= 0)                                            \
1566                 panic("nca CONN_REFHOLD: %p has no references",         \
1567                     (void *)(connp));                                   \
1568         (connp)->ref++;                                                      \
1569 }
1570 
1571 #define CONN_REFRELE(connp) {                                           \
1572                                                                         \
1573         NCA_CONN_T_TRACE((connp), NCA_CONN_T_REFRELE | ((connp)->ref - 1)); \
1574                                                                         \
1575         if ((connp)->tcp_refed) {                                    \
1576                 if ((connp)->ref == 1)                                       \
1577                         panic("nca CONN_REFRELE: %p "                   \
1578                             "has only tcp_refed reference",             \
1579                             (void *)(connp));                           \
1580                 if ((connp)->ref < 1)                                     \
1581                         panic("nca CONN_REFRELE: %p has no references", \
1582                             (void *)(connp));                           \
1583         } else {                                                        \
1584                 if ((connp)->ref <= 0)                                    \
1585                         panic("nca CONN_REFRELE: %p has no references", \
1586                             (void *)(connp));                           \
1587         }                                                               \
1588         (connp)->ref--;                                                      \
1589         if ((connp)->ref == 0) {                                     \
1590                 /* Last ref of a nca_conn_t, so free it */              \
1591                 kmutex_t *lock = &(connp)->hashfanout->lock;          \
1592                 mutex_enter(lock);                                      \
1593                 nca_conn_free(connp);                                   \
1594                 /* Note: nca_conn_free exits lock */                    \
1595         }                                                               \
1596 }
1597 
1598 /*
1599  * The nca_io2_shadow_t is used by the kernel to contian a copy of a user-
1600  * land nca_io2_t and the the user-land nca_io2_t address and size.
1601  */
1602 
1603 typedef struct nca_io2_shadow_s {
1604         nca_io2_t       io;             /* copy of user-land nca_io2_t */
1605         void            *data_ptr;      /* copy of door_arg_t.data_ptr */
1606         size_t          data_size;      /* copy of door_arg_t.data_size */
1607 } nca_io2_shadow_t;
1608 
1609 #define SHADOW_NONE     0x00            /* nca_io2_t.shadow NONE */
1610 #define SHADOW_DOORSRV  0x01            /* nca_io2_t.shadow door_srv() */
1611 #define SHADOW_NCAFS    0x02            /* nca_io2_t.shadow NCAfs */
1612 
1613 
1614 /*
1615  * Given a ptr to a nca_io2_t, a field and the field_length, write data
1616  * into buffer (Note: word aligned offsets).
1617  */
1618 #define NCA_IO_WDATA(val, vsize, p, n_used, len, off)           \
1619         /*CONSTCOND*/                                           \
1620         if ((val) == NULL) {                                    \
1621                 (p)->len = vsize;                            \
1622                 (p)->off = 0;                                        \
1623         } else {                                                \
1624                 (p)->len = (vsize);                          \
1625                 (p)->off = ((n_used) + sizeof (uint32_t) - 1) &  \
1626                                 (~(sizeof (uint32_t) - 1));     \
1627                 bcopy((char *)(val),                            \
1628                     ((char *)(p) + (p)->off), (vsize));              \
1629                 (n_used) = (p)->off + (p)->len;                   \
1630         }
1631 
1632 /*
1633  * Given a ptr to an nca_io2_t, a field length member name, append data to
1634  * it in the buffer. Note: must be the last field a WDATA() was done for.
1635  *
1636  * Note: a NULL NCA_IO_WDATA() can be followed by a NCA_IO_ADATA() only if
1637  *              vsize was == -1.
1638  *
1639  */
1640 #define NCA_IO_ADATA(val, vsize, p, n_used, len, off)           \
1641         if ((p)->len == -1) {                                        \
1642                 (p)->len = 0;                                        \
1643                 (p)->off = ((n_used) + sizeof (uint32_t) - 1) &  \
1644                 (~(sizeof (uint32_t) - 1));                     \
1645         }                                                       \
1646         bcopy((char *)(val), ((char *)(p) + \
1647             (p)->off + (p)->len), (vsize));                       \
1648         (p)->len += (vsize);                                 \
1649         (n_used) += (vsize);
1650 
1651 /*
1652  * Given a ptr to a nca_io2_t and a field construct a pointer.
1653  */
1654 #define NCA_IO_PDATA(p, off) ((char *)(p) + (p)->off)
1655 
1656 
1657 #ifndef isdigit
1658 #define isdigit(c) ((c) >= '0' && (c) <= '9')
1659 #endif
1660 
1661 #ifndef tolower
1662 #define tolower(c) ((c) >= 'A' && (c) <= 'Z' ? (c) | 0x20 : (c))
1663 #endif
1664 
1665 #ifndef isalpha
1666 #define isalpha(c) (((c) >= 'A' && (c) <= 'Z') || ((c) >= 'a' && (c) <= 'z'))
1667 #endif
1668 
1669 #ifndef isspace
1670 #define isspace(c) ((c) == ' ' || (c) == '\t' || (c) == '\n' || \
1671                     (c) == '\r' || (c) == '\f' || (c) == '\013')
1672 #endif
1673 
1674 extern char *strnchr(const char *, int, size_t);
1675 extern char *strnstr(const char *, const char *, size_t);
1676 extern char *strncasestr(const char *, const char *, size_t);
1677 extern char *strrncasestr(const char *, const char *, size_t);
1678 extern int atoin(const char *, size_t);
1679 extern int digits(int);
1680 
1681 extern void nca_conn_free(nca_conn_t *);
1682 extern void nca_logit_off(void);
1683 extern void node_fr(node_t *);
1684 
1685 extern nca_squeue_t *nca_squeue_init(nca_squeue_t *, uint32_t,
1686     processorid_t, void (*)(), void *, void (*)(), clock_t, pri_t);
1687 extern void nca_squeue_fini(nca_squeue_t *);
1688 extern void nca_squeue_enter(nca_squeue_t *, mblk_t *, void *);
1689 extern void nca_squeue_fill(nca_squeue_t *, mblk_t *, void *);
1690 extern mblk_t *nca_squeue_remove(nca_squeue_t *);
1691 extern void nca_squeue_worker(nca_squeue_t *);
1692 extern mblk_t *nca_squeue_ctl(mblk_t *, void *, unsigned short);
1693 extern void nca_squeue_signal(nca_squeue_t *);
1694 extern void nca_squeue_exit(nca_squeue_t *);
1695 extern void sqfan_init(sqfan_t *, uint32_t, uint32_t, uint32_t);
1696 extern nca_squeue_t *sqfan_ixinit(sqfan_t *, uint32_t, nca_squeue_t *, uint32_t,
1697     processorid_t, void (*)(), void *, void (*)(), clock_t, pri_t);
1698 extern void sqfan_fini(sqfan_t *);
1699 extern void sqfan_fill(sqfan_t *, mblk_t *, void *);
1700 extern mblk_t *sqfan_remove(sqfan_t *);
1701 extern void nca_squeue_nointr(nca_squeue_t *, mblk_t *, void *, int);
1702 extern void nca_squeue_pause(nca_squeue_t *, mblk_t *, void *, int, boolean_t);
1703 extern void nca_squeue_willproxy(nca_squeue_t *);
1704 extern void nca_squeue_proxy(nca_squeue_t *, nca_squeue_t *);
1705 extern void nca_squeue_bind(nca_squeue_t *, uint32_t, processorid_t);
1706 
1707 extern int nca_tcp_clean_death(nca_conn_t *, int);
1708 extern nca_conn_t *nca_tcp_connect(ipaddr_t, in_port_t, boolean_t);
1709 extern void nca_tcp_send(nca_conn_t *, mblk_t *);
1710 extern void nca_tcp_direct(nca_conn_t *, nca_conn_t *, uint32_t);
1711 
1712 /* Functions prototypes from ncadoorsrv.c */
1713 extern node_t *nca_node_flush(node_t *);
1714 extern void nca_downcall_service(void *, door_arg_t *, void (**)(void *,
1715     void *), void **, int *);
1716 extern node_t *ctag_lookup(uint64_t, unsigned *);
1717 extern node_t *node_replace(node_t *, nca_conn_t *);
1718 extern node_t *node_temp(node_t *, nca_conn_t *);
1719 extern void find_ctags(node_t *, nca_io2_t *, int *);
1720 extern void nca_ncafs_srv(nca_io2_t *, struct uio *, queue_t *);
1721 extern boolean_t nca_reclaim_vlru(void);
1722 extern boolean_t nca_reclaim_plru(boolean_t, boolean_t);
1723 
1724 /*
1725  * NCA_COUNTER() is used to add a signed long value to a unsigned long
1726  * counter, in general these counters are used to maintain NCA state.
1727  *
1728  * NCA_DEBUG_COUNTER() is used like NCA_COUNTER() but for counters used
1729  * to maintain additional debug state, by default these counters aren't
1730  * updated unless the global value nca_debug_counter is set to a value
1731  * other then zero.
1732  *
1733  * Also, if NCA_COUNTER_TRACE is defined a time ordered wrapping trace
1734  * buffer is maintained with hrtime_t stamps, counter address, value to
1735  * add, and new value entries for all NCA_COUNTER() and NCA_DEBUG_COUNTER()
1736  * use.
1737  */
1738 
1739 #undef  NCA_COUNTER_TRACE
1740 
1741 #ifdef  NCA_COUNTER_TRACE
1742 
1743 #define NCA_COUNTER_TRACE_SZ    1024
1744 
1745 typedef struct nca_counter_s {
1746         hrtime_t        t;
1747         unsigned long   *p;
1748         unsigned long   v;
1749         unsigned long   nv;
1750 } nca_counter_t;
1751 
1752 extern nca_counter_t nca_counter_tv[];
1753 extern nca_counter_t *nca_counter_tp;
1754 
1755 #define NCA_COUNTER(_p, _v) {                                           \
1756         unsigned long   *p = _p;                                        \
1757         long            v = _v;                                         \
1758         unsigned long   _nv;                                            \
1759         nca_counter_t   *_otp;                                          \
1760         nca_counter_t   *_ntp;                                          \
1761                                                                         \
1762         _nv = atomic_add_long_nv(p, v);                                 \
1763         do {                                                            \
1764                 _otp = nca_counter_tp;                                  \
1765                 _ntp = _otp + 1;                                        \
1766                 if (_ntp == &nca_counter_tv[NCA_COUNTER_TRACE_SZ])  \
1767                         _ntp = nca_counter_tv;                          \
1768         } while (casptr((void *)&nca_counter_tp, (void *)_otp,              \
1769             (void *)_ntp) != (void *)_otp);                             \
1770         _ntp->t = gethrtime();                                               \
1771         _ntp->p = p;                                                 \
1772         _ntp->v = v;                                                 \
1773         _ntp->nv = _nv;                                                      \
1774 }
1775 
1776 #else   /* NCA_COUNTER_TRACE */
1777 
1778 #define NCA_COUNTER(p, v) atomic_add_long((p), (v))
1779 
1780 #endif  /* NCA_COUNTER_TRACE */
1781 
1782 
1783 /*
1784  * This is the buf used in upcall to httpd.
1785  */
1786 typedef struct {
1787         uintptr_t       tid;
1788         char            *buf;
1789 } http_buf_table_t;
1790 
1791 /*
1792  * URI and filename hash, a simple static hash bucket array of singly
1793  * linked grounded lists is used with a hashing algorithm which has
1794  * proven to have good distribution properities for strings of ...
1795  *
1796  * Note: NCA_HASH_SZ must be a prime number.
1797  */
1798 
1799 #define NCA_HASH_SZ     8053
1800 #define NCA_HASH_MASK   0xFFFFFF
1801 #define HASH_IX(s, l, hix, hsz) { \
1802         char *cp = (s); \
1803         int len = (l); \
1804                         \
1805         (hix) = 0; \
1806         while (len-- > 0) { \
1807                 (hix) = (hix) * 33 + *cp++; \
1808                 (hix) &= NCA_HASH_MASK; \
1809         } \
1810         (hix) %= (hsz); \
1811 }
1812 
1813 /*
1814  * CTAG hash.
1815  */
1816 #define NCA_CTAGHASH_SZ 4096
1817 #define CTAGHASH_IX(t, ix) ((ix) = (t) % NCA_CTAGHASH_SZ)
1818 
1819 /*
1820  * VNODE hash.
1821  *
1822  * Note: NCA_VNODEHASH_SZ must be a P2Ps() value.
1823  */
1824 #define NCA_VNODEHASH_SZ 12281
1825 #define VNODEHASH_IX(p, ix) ((ix) = (((uintptr_t)p >> 27) ^ \
1826         ((uintptr_t)p >> 17) ^ ((uintptr_t)p >> 11) ^ (uintptr_t)p) % \
1827         ncavnodehash_sz)
1828 
1829 extern pgcnt_t nca_ppmax;
1830 extern pgcnt_t nca_vpmax;
1831 extern pgcnt_t nca_pplim;
1832 extern pgcnt_t nca_vplim;
1833 extern pgcnt_t nca_ppmem;
1834 extern pgcnt_t nca_vpmem;
1835 extern ssize_t nca_kbmem;
1836 extern ssize_t nca_spmem;
1837 extern ssize_t nca_ckmem;
1838 extern ssize_t nca_mbmem;
1839 extern ssize_t nca_cbmem;
1840 extern ssize_t nca_lbmem;
1841 extern size_t  nca_maxkmem;
1842 extern uint32_t nca_use_segmap;
1843 
1844 extern ulong_t nca_hits;
1845 extern ulong_t nca_file;
1846 extern ulong_t nca_ctag;
1847 extern ulong_t nca_miss;
1848 
1849 extern ulong_t nca_hit304;
1850 extern ulong_t nca_hitnoV;
1851 extern ulong_t nca_hitnoVfast;
1852 extern ulong_t nca_hitnoVtemp;
1853 
1854 extern ulong_t nca_filehits;
1855 extern ulong_t nca_filenoV;
1856 extern ulong_t nca_filenoVfast;
1857 extern ulong_t nca_filemiss;
1858 
1859 extern ulong_t nca_missURI;
1860 extern ulong_t nca_missQ;
1861 extern ulong_t nca_missSAFE;
1862 extern ulong_t nca_missnoV;
1863 extern ulong_t nca_missnotcp;
1864 extern ulong_t nca_missfail;
1865 extern ulong_t nca_misstemp;
1866 extern ulong_t nca_missnohash;
1867 extern ulong_t nca_missclean;
1868 extern ulong_t nca_missadvisory;
1869 extern ulong_t nca_missadvNoA;
1870 extern ulong_t nca_missERROR;
1871 
1872 extern ulong_t nca_ERROR;
1873 extern ulong_t nca_flushnode;
1874 extern ulong_t nca_replacenode;
1875 extern ulong_t nca_tempnode;
1876 
1877 extern ulong_t nca_fail304;
1878 
1879 extern ulong_t nca_nocache1;
1880 extern ulong_t nca_nocache2;
1881 extern ulong_t nca_nocache3;
1882 extern ulong_t nca_nocache4;
1883 extern ulong_t nca_nocache5;
1884 extern ulong_t nca_nocache6;
1885 extern ulong_t nca_nocache6nomp;
1886 extern ulong_t nca_nocache7;
1887 extern ulong_t nca_nocache8;
1888 extern ulong_t nca_nocache9;
1889 extern ulong_t nca_nocache10;
1890 extern ulong_t nca_nocache11;
1891 extern ulong_t nca_nocache12;
1892 extern ulong_t nca_nocache13;
1893 extern ulong_t nca_nocache14;
1894 extern ulong_t nca_nocache15;
1895 extern ulong_t nca_nodes;
1896 extern ulong_t nca_desballoc;
1897 
1898 extern ulong_t nca_plrucnt;
1899 extern ulong_t nca_vlrucnt;
1900 extern ulong_t nca_rpcall;
1901 extern ulong_t nca_rvcall;
1902 extern ulong_t nca_rpbusy;
1903 extern ulong_t nca_rvbusy;
1904 extern ulong_t nca_rpfail;
1905 extern ulong_t nca_rpempty;
1906 extern ulong_t nca_rvempty;
1907 extern ulong_t nca_rpdone;
1908 extern ulong_t nca_rvdone;
1909 extern ulong_t nca_rmdone;
1910 extern ulong_t nca_rkdone;
1911 extern ulong_t nca_rsdone;
1912 extern ulong_t nca_rndone;
1913 extern ulong_t nca_rpnone;
1914 extern ulong_t nca_rvnone;
1915 extern ulong_t nca_rmnone;
1916 extern ulong_t nca_rknone;
1917 extern ulong_t nca_rsnone;
1918 extern ulong_t nca_rnh;
1919 extern ulong_t nca_ref[];
1920 extern ulong_t nca_vmap_rpcall;
1921 
1922 extern ulong_t nca_node_kmem_fail1;
1923 extern ulong_t nca_node_kmem_fail2;
1924 
1925 extern ulong_t doorsrv_nopreempt;
1926 extern ulong_t doorsrv_badconnect;
1927 extern ulong_t doorsrv_invaladvise;
1928 extern ulong_t doorsrv_notupcall;
1929 extern ulong_t doorsrv_badadvise;
1930 extern ulong_t doorsrv_cksum;
1931 extern ulong_t doorsrv_error;
1932 extern ulong_t doorsrv_op;
1933 extern ulong_t doorsrv_badtee;
1934 extern ulong_t doorsrv_badio;
1935 extern ulong_t doorsrv_sz;
1936 
1937 extern ulong_t nca_allocfail;
1938 extern ulong_t nca_mapinfail;
1939 extern ulong_t nca_mapinfail1;
1940 extern ulong_t nca_mapinfail2;
1941 extern ulong_t nca_mapinfail3;
1942 
1943 extern ulong_t nca_httpd_http;
1944 extern ulong_t nca_httpd_badsz;
1945 extern ulong_t nca_httpd_nosz;
1946 extern ulong_t nca_httpd_filename;
1947 extern ulong_t nca_httpd_filename1;
1948 extern ulong_t nca_httpd_filename2;
1949 extern ulong_t nca_httpd_trailer;
1950 extern ulong_t nca_httpd_preempt;
1951 extern ulong_t nca_httpd_downcall;
1952 extern ulong_t nca_early_downcall;
1953 extern ulong_t nca_httpd_more;
1954 
1955 ulong_t nca_logit_noupcall;
1956 
1957 ulong_t nca_logit;
1958 ulong_t nca_logit_nomp;
1959 ulong_t nca_logit_no;
1960 ulong_t nca_logit_NULL;
1961 ulong_t nca_logit_fail;
1962 
1963 ulong_t nca_logit_flush_NULL1;
1964 ulong_t nca_logit_flush_NULL2;
1965 
1966 ulong_t nca_logger_NULL1;
1967 ulong_t nca_logger_NULL2;
1968 
1969 ulong_t nca_log_buf_alloc_NULL;
1970 ulong_t nca_log_buf_alloc_fail;
1971 ulong_t nca_log_buf_alloc_part;
1972 
1973 ulong_t nca_log_buf_dup;
1974 
1975 extern ulong_t nca_upcalls;
1976 extern ulong_t nca_ncafs_upcalls;
1977 
1978 extern ulong_t nca_conn_count;
1979 extern ulong_t nca_conn_kmem;
1980 extern ulong_t nca_conn_kmem_fail;
1981 extern ulong_t nca_conn_allocb_fail;
1982 extern ulong_t nca_conn_tw;
1983 extern ulong_t nca_conn_tw1;
1984 extern ulong_t nca_conn_tw2;
1985 extern ulong_t nca_conn_reinit_cnt;
1986 extern ulong_t nca_conn_NULL1;
1987 extern ulong_t nca_conn_Q0;
1988 extern ulong_t nca_conn_FLAGS;
1989 
1990 extern ulong_t tcpwronginq;
1991 extern ulong_t ipsendup;
1992 extern ulong_t ipwrongcpu;
1993 extern ulong_t iponcpu;
1994 
1995 extern ulong_t nca_tcp_xmit_null;
1996 extern ulong_t nca_tcp_xmit_null1;
1997 
1998 extern ulong_t tw_on;
1999 extern ulong_t tw_fire;
2000 extern ulong_t tw_fire1;
2001 extern ulong_t tw_fire2;
2002 extern ulong_t tw_fire3;
2003 extern ulong_t tw_add;
2004 extern ulong_t tw_add1;
2005 extern ulong_t tw_delete;
2006 extern ulong_t tw_reclaim;
2007 extern ulong_t tw_reap;
2008 extern ulong_t tw_reap1;
2009 extern ulong_t tw_reap2;
2010 extern ulong_t tw_reap3;
2011 extern ulong_t tw_reap4;
2012 extern ulong_t tw_reap5;
2013 extern ulong_t tw_timer;
2014 extern ulong_t tw_timer1;
2015 extern ulong_t tw_timer2;
2016 extern ulong_t tw_timer3;
2017 extern ulong_t tw_timer4;
2018 extern ulong_t tw_timer5;
2019 
2020 extern ulong_t ti_on;
2021 extern ulong_t ti_fire;
2022 extern ulong_t ti_fire1;
2023 extern ulong_t ti_fire2;
2024 extern ulong_t ti_fire3;
2025 extern ulong_t ti_fire4;
2026 extern ulong_t ti_add;
2027 extern ulong_t ti_add1;
2028 extern ulong_t ti_add2;
2029 extern ulong_t ti_add3;
2030 extern ulong_t ti_add4;
2031 extern ulong_t ti_add5;
2032 extern ulong_t ti_add_reuse;
2033 extern ulong_t ti_delete;
2034 extern ulong_t ti_delete1;
2035 extern ulong_t ti_delete2;
2036 extern ulong_t ti_reap;
2037 extern ulong_t ti_reap1;
2038 extern ulong_t ti_reap2;
2039 extern ulong_t ti_reap3;
2040 extern ulong_t ti_reap4;
2041 extern ulong_t ti_reap5;
2042 extern ulong_t ti_timer;
2043 extern ulong_t ti_timer1;
2044 extern ulong_t ti_timer2;
2045 extern ulong_t ti_timer3;
2046 extern ulong_t ti_timer4;
2047 extern ulong_t ti_timer5;
2048 extern ulong_t ti_timer6;
2049 
2050 extern uint32_t nca_conn_q;
2051 extern uint32_t nca_conn_q0;
2052 extern uint32_t nca_conn_req_max_q;
2053 extern uint32_t nca_conn_req_max_q0;
2054 
2055 extern char nca_resp_500[];
2056 extern ssize_t nca_resp_500_sz;
2057 
2058 extern uint32_t ncaurihash_sz;
2059 extern uint32_t ncafilehash_sz;
2060 extern uint32_t ncactaghash_sz;
2061 extern uint32_t ncavnodehash_sz;
2062 extern nodef_t *ncaurihash;
2063 extern nodef_t *ncafilehash;
2064 extern nodef_t *ncavnodehash;
2065 extern nodef_t *ncactaghash;
2066 extern char nca_httpd_door_path[];
2067 extern char nca_httpd_downdoor_path[];
2068 extern door_handle_t nca_downcall_door_hand;
2069 extern uint32_t n_http_buf_size;
2070 extern door_handle_t nca_httpd_door_hand;
2071 extern sqfan_t nca_miss_fanout1;
2072 extern sqfan_t nca_miss_fanout2;
2073 extern nca_door_t nca_httpd_door;
2074 extern int nca_downdoor_created;
2075 extern int n_http_buf_table;
2076 extern http_buf_table_t *g_http_buf_table;
2077 extern struct kmem_cache *node_cache;
2078 #ifdef DEBUG
2079 extern node_t *nca_http_response(nca_conn_t *, const char *, int, char *, int,
2080                     uint_t, const char *);
2081 extern node_t *nca_http_response_node(nca_conn_t *, const char *, int, node_t *,
2082                     const char *);
2083 #else
2084 extern node_t *nca_http_response(nca_conn_t *, const char *, int, char *, int,
2085                     uint_t);
2086 extern node_t *nca_http_response_node(nca_conn_t *, const char *, int,
2087     node_t *);
2088 #endif
2089 extern void nca_node_del(node_t *);
2090 extern void nca_node_uncache(node_t *);
2091 extern node_t *nca_node_add(char *, int, nodef_t *, int);
2092 extern node_t *node_create(int, boolean_t, char *, int);
2093 extern void nca_reclaim_phys(node_t *, boolean_t, boolean_t);
2094 extern boolean_t nca_http_pmap(node_t *);
2095 extern boolean_t nca_http_vmap(node_t *, int);
2096 extern time_t nca_http_date(char *);
2097 extern node_t *nca_httpd_data(node_t *, nca_conn_t *, nca_io2_t *, int);
2098 extern void nca_missed(node_t *, mblk_t *, nca_squeue_t *);
2099 extern void nca_miss_conn_mv(node_t *, nca_conn_t *);
2100 extern void nca_miss_conn_fr(node_t *, nca_conn_t *);
2101 extern void nca_http_logit(nca_conn_t *);
2102 extern void nca_http_error(nca_conn_t *);
2103 extern void nca_node_xmit(node_t *, nca_conn_t *);
2104 
2105 /*
2106  * It contains data for forwarding data to application programs.
2107  * For door case, doorhandle is the upcall door handle and listenerq
2108  * is NULL; for ncafs, listenerq is the upcall listener queue and
2109  * doorhandle is NULL. listenning is always B_TRUE for door and it is
2110  * B_TRUE for ncafs only after the listen system call has been issued.
2111  */
2112 typedef struct nca_listener_s {
2113         boolean_t       listenning;     /* is ready for accepting connection */
2114         door_handle_t   doorhandle;     /* door handle or NULL for ncafs */
2115         queue_t         *listenerq;     /* upcall queue or NULL for door */
2116 } nca_listener_t;
2117 
2118 /*
2119  * Returned values of nca_isnca_data.
2120  * NOT_NCA_DATA:        not NCA data.
2121  * NCA_DATA_ANY_ADDR:   NCA data, matches INADDR_ANY.
2122  * NCA_DATA_ADDR:       NCA data, match an IP address.
2123  */
2124 #define NOT_NCA_DATA            0
2125 #define NCA_DATA_ANY_ADDR       1
2126 #define NCA_DATA_ADDR           2
2127 
2128 extern uint32_t ipportrehashcount1;
2129 extern uint32_t ipportrehashcount2;
2130 extern uint32_t ipportbucketcnt;
2131 extern uint32_t ipporttablesize;
2132 extern uint32_t ncafscount;
2133 extern uint32_t doorcount;
2134 extern int      ip_virtual_hosting;
2135 
2136 extern nca_listener_t *nca_listener_find(ipaddr_t, uint16_t);
2137 extern nca_listener_t *nca_listener_find2(ipaddr_t, uint16_t);
2138 extern int              nca_isnca_data(ipaddr_t, uint16_t);
2139 extern int              nca_listener_add(ipaddr_t, uint16_t, void *, boolean_t);
2140 extern int              nca_listener_del(ipaddr_t, uint16_t);
2141 extern void             nca_listener_report(mblk_t *);
2142 
2143 #ifdef  __cplusplus
2144 }
2145 #endif
2146 
2147 #endif  /* _INET_NCA_H */