1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 #include <sys/types.h>
  27 #include <sys/socket.h>
  28 #include <sys/sysmacros.h>
  29 #include <sys/fm/protocol.h>
  30 
  31 #include <netinet/in.h>
  32 #include <arpa/inet.h>
  33 
  34 #include <strings.h>
  35 #include <unistd.h>
  36 #include <pthread.h>
  37 #include <fcntl.h>
  38 #include <errno.h>
  39 #include <netdb.h>
  40 #include <poll.h>
  41 #include <stdarg.h>
  42 
  43 #include <fm/fmd_api.h>
  44 
  45 #define IP_MAGIC        "\177FMA" /* magic string identifying a packet header */
  46 #define IP_MAGLEN       4       /* length of magic string */
  47 #define IP_DEBUG_OFF    0       /* No informational debugging printed */
  48 #define IP_DEBUG_FINE   1       /* Basic debug information printed (default) */
  49 #define IP_DEBUG_FINER  2       /* More debug information printed. */
  50 #define IP_DEBUG_FINEST 3       /* All debug information printed */
  51 
  52 typedef struct ip_hdr {
  53         char iph_magic[IP_MAGLEN]; /* magic string */
  54         uint32_t iph_size;      /* packed size */
  55 } ip_hdr_t;
  56 
  57 typedef struct ip_buf {
  58         void *ipb_buf;          /* data buffer */
  59         size_t ipb_size;        /* size of buffer */
  60 } ip_buf_t;
  61 
  62 typedef struct ip_cinfo {           /* Connection specific information */
  63         struct addrinfo *ipc_addr;  /* Connection address(es) */
  64         char *ipc_name;             /* The name of the server or interface */
  65         int ipc_retry;              /* The number of connection retries */
  66         boolean_t ipc_accept;       /* Will connection accept clients */
  67         id_t ipc_timer;             /* FMD timer id for connection */
  68         struct ip_cinfo *ipc_next;  /* Next conneciton in list */
  69 } ip_cinfo_t;
  70 
  71 typedef struct ip_xprt {
  72         fmd_xprt_t *ipx_xprt;   /* transport handle */
  73         int ipx_flags;          /* transport flags */
  74         int ipx_fd;             /* socket file descriptor */
  75         int ipx_done;           /* flag indicating connection closed */
  76         pthread_t ipx_tid;      /* recv-side auxiliary thread */
  77         ip_buf_t ipx_sndbuf;    /* buffer for sending events */
  78         ip_buf_t ipx_rcvbuf;    /* buffer for receiving events */
  79         ip_cinfo_t *ipx_cinfo;  /* info for reconnect */
  80         id_t ipx_spnd_timer;    /* connection suspend timer */
  81         char *ipx_addr;         /* address:port of remote connection */
  82         struct ip_xprt *ipx_next;       /* next ip_xprt in global list */
  83 } ip_xprt_t;
  84 
  85 #define IPX_ID(a) ((a)->ipx_addr == NULL ? "(Not connected)" : (a)->ipx_addr)
  86 
  87 typedef struct ip_stat {
  88         fmd_stat_t ips_accfail; /* failed accepts */
  89         fmd_stat_t ips_badmagic; /* invalid packet headers */
  90         fmd_stat_t ips_packfail; /* failed packs */
  91         fmd_stat_t ips_unpackfail; /* failed unpacks */
  92 } ip_stat_t;
  93 
  94 static void ip_xprt_create(fmd_xprt_t *, int, int, ip_cinfo_t *, char *);
  95 static void ip_xprt_destroy(ip_xprt_t *);
  96 
  97 static ip_stat_t ip_stat = {
  98         { "accfail", FMD_TYPE_UINT64, "failed accepts" },
  99         { "badmagic", FMD_TYPE_UINT64, "invalid packet headers" },
 100         { "packfail", FMD_TYPE_UINT64, "failed packs" },
 101         { "unpackfail", FMD_TYPE_UINT64, "failed unpacks" },
 102 };
 103 
 104 static fmd_hdl_t *ip_hdl;       /* module handle */
 105 static pthread_mutex_t ip_lock; /* lock for ip_xps list */
 106 static ip_xprt_t *ip_xps;       /* list of active transports */
 107 static pthread_mutex_t ip_conns_lock;   /* lock for ip_conns list */
 108 static ip_cinfo_t *ip_conns;    /* list of all configured connection info */
 109 static nvlist_t *ip_auth;       /* authority to use for transport(s) */
 110 static size_t ip_size;          /* default buffer size */
 111 static volatile int ip_quit;    /* signal to quit */
 112 static int ip_qlen;             /* queue length for listen(3SOCKET) */
 113 static int ip_mtbf;             /* mtbf for simulating packet drop */
 114 static int ip_external;         /* set transport to be "external" */
 115 static int ip_no_remote_repair; /* disallow remote repair */
 116 static int ip_hconly;           /* only cache faults that are hc-scheme */
 117 static int ip_rdonly;           /* force transport to be rdonly */
 118 static int ip_hc_present_only;  /* only cache faults if hc-scheme and present */
 119 static char *ip_domain_name;    /* set domain name for received list.suspects */
 120 static hrtime_t ip_burp;        /* make mtbf slower by adding this much delay */
 121 static int ip_translate;        /* call fmd_xprt_translate() before sending */
 122 static char *ip_port;           /* port to connect to (or bind to if server) */
 123 static int ip_retry;            /* retry count for ip_xprt_setup() -1=forever */
 124 static hrtime_t ip_sleep;       /* sleep delay for ip_xprt_setup() */
 125 static int ip_debug_level;      /* level for printing debug messages */
 126 
 127 /*
 128  * Prints a debug message to the fmd debug framework if the debug level is set
 129  * to at least the given level.
 130  */
 131 static void
 132 ip_debug(int level, char *fmt, ...)
 133 {
 134         if (ip_debug_level >= level) {
 135                 va_list args;
 136                 va_start(args, fmt);
 137                 fmd_hdl_vdebug(ip_hdl, fmt, args);
 138                 va_end(args);
 139         }
 140 }
 141 
 142 /*
 143  * Allocate space in ipx_sndbuf for a header and a packed XDR encoding of
 144  * the specified nvlist, and then send the buffer to our remote peer.
 145  */
 146 static int
 147 ip_fmdo_send(fmd_hdl_t *hdl, fmd_xprt_t *xp, fmd_event_t *ep, nvlist_t *nvl)
 148 {
 149         ip_xprt_t *ipx;
 150         size_t size, nvsize;
 151         char *buf, *nvbuf;
 152         ip_hdr_t *iph;
 153         ssize_t r, n;
 154         int err;
 155 
 156         if (xp == NULL) {
 157                 ip_debug(IP_DEBUG_FINE, "ip_fmdo_send failed: xp=NULL\n");
 158                 return (FMD_SEND_FAILED);
 159         }
 160         ipx = fmd_xprt_getspecific(hdl, xp);
 161 
 162         /*
 163          * For testing purposes, if ip_mtbf is non-zero, use this to pseudo-
 164          * randomly simulate the need for retries.  If ip_burp is also set,
 165          * then we also suspend the transport for a bit and wake it up again.
 166          */
 167         if (ip_mtbf != 0 && gethrtime() % ip_mtbf == 0) {
 168                 if (ip_burp != 0) {
 169                         ip_debug(IP_DEBUG_FINE, "burping ipx %s", IPX_ID(ipx));
 170                         ipx->ipx_flags |= FMD_XPRT_SUSPENDED;
 171                         ipx->ipx_spnd_timer = fmd_timer_install(
 172                             ip_hdl, ipx, NULL, ip_burp);
 173                         fmd_xprt_suspend(ip_hdl, xp);
 174                 }
 175                 return (FMD_SEND_RETRY);
 176         }
 177 
 178         if (ip_translate && (nvl = fmd_xprt_translate(hdl, xp, ep)) == NULL) {
 179                 fmd_hdl_error(hdl, "failed to translate event %p", (void *)ep);
 180                 return (FMD_SEND_FAILED);
 181         }
 182 
 183         (void) nvlist_size(nvl, &nvsize, NV_ENCODE_XDR);
 184         size = r = sizeof (ip_hdr_t) + nvsize;
 185 
 186         if (ipx->ipx_sndbuf.ipb_size < size) {
 187                 fmd_hdl_free(hdl, ipx->ipx_sndbuf.ipb_buf,
 188                     ipx->ipx_sndbuf.ipb_size);
 189                 ipx->ipx_sndbuf.ipb_size = P2ROUNDUP(size, 16);
 190                 ipx->ipx_sndbuf.ipb_buf = fmd_hdl_alloc(hdl,
 191                     ipx->ipx_sndbuf.ipb_size, FMD_SLEEP);
 192         }
 193 
 194         buf = ipx->ipx_sndbuf.ipb_buf;
 195         iph = (ip_hdr_t *)(uintptr_t)buf;
 196         nvbuf = buf + sizeof (ip_hdr_t);
 197 
 198         bcopy(IP_MAGIC, iph->iph_magic, IP_MAGLEN);
 199         iph->iph_size = htonl(nvsize);
 200         err = nvlist_pack(nvl, &nvbuf, &nvsize, NV_ENCODE_XDR, 0);
 201 
 202         if (ip_translate)
 203                 nvlist_free(nvl);
 204 
 205         if (err != 0) {
 206                 fmd_hdl_error(ip_hdl, "failed to pack event for "
 207                     "transport %p: %s\n", (void *)ipx->ipx_xprt, strerror(err));
 208                 ip_stat.ips_packfail.fmds_value.ui64++;
 209                 return (FMD_SEND_FAILED);
 210         }
 211 
 212         while (!ip_quit && r != 0) {
 213                 if ((n = send(ipx->ipx_fd, buf, r, 0)) < 0) {
 214                         if (errno != EINTR && errno != EWOULDBLOCK) {
 215                                 ip_debug(IP_DEBUG_FINE,
 216                                     "failed to send to %s", IPX_ID(ipx));
 217                                 return (FMD_SEND_FAILED);
 218                         }
 219                         continue;
 220                 }
 221                 buf += n;
 222                 r -= n;
 223         }
 224 
 225         ip_debug(IP_DEBUG_FINEST, "Sent event %d bytes to %s",
 226             size, IPX_ID(ipx));
 227         return (FMD_SEND_SUCCESS);
 228 }
 229 
 230 /*
 231  * Sends events over transports that are configured read only.  When the module
 232  * is in read only mode it will receive all events and only send events that
 233  * have a subscription set.
 234  *
 235  * The configuration file will have to set prop ip_rdonly true and also
 236  * subscribe for events that are desired to be sent over the transport in order
 237  * for this function to be used.
 238  */
 239 /* ARGSUSED */
 240 static void
 241 ip_fmdo_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
 242 {
 243         int err;
 244         ip_xprt_t *ipx;
 245 
 246         if (ip_rdonly && !ip_quit) {
 247                 (void) pthread_mutex_lock(&ip_lock);
 248 
 249                 for (ipx = ip_xps; ipx != NULL; ipx = ipx->ipx_next) {
 250                         err = ip_fmdo_send(hdl, ipx->ipx_xprt, ep, nvl);
 251                         while (FMD_SEND_RETRY == err) {
 252                                 err = ip_fmdo_send(hdl, ipx->ipx_xprt, ep, nvl);
 253                         }
 254                 }
 255                 (void) pthread_mutex_unlock(&ip_lock);
 256         }
 257 }
 258 
 259 /*
 260  * Receive a chunk of data of the specified size from our remote peer.  The
 261  * data is received into ipx_rcvbuf, and then a pointer to the buffer is
 262  * returned.  NOTE: The data is only valid until the next call to ip_xprt_recv.
 263  * If the connection breaks or ip_quit is set during receive, NULL is returned.
 264  */
 265 static void *
 266 ip_xprt_recv(ip_xprt_t *ipx, size_t size)
 267 {
 268         char *buf = ipx->ipx_rcvbuf.ipb_buf;
 269         ssize_t n, r = size;
 270 
 271         if (ipx->ipx_rcvbuf.ipb_size < size) {
 272                 fmd_hdl_free(ip_hdl, ipx->ipx_rcvbuf.ipb_buf,
 273                     ipx->ipx_rcvbuf.ipb_size);
 274                 ipx->ipx_rcvbuf.ipb_size = P2ROUNDUP(size, 16);
 275                 ipx->ipx_rcvbuf.ipb_buf = buf = fmd_hdl_alloc(ip_hdl,
 276                     ipx->ipx_rcvbuf.ipb_size, FMD_SLEEP);
 277         }
 278 
 279         while (!ip_quit && r != 0) {
 280                 if ((n = recv(ipx->ipx_fd, buf, r, MSG_WAITALL)) == 0) {
 281                         ipx->ipx_done++;
 282                         return (NULL);
 283                 }
 284 
 285                 if (n < 0) {
 286                         if (errno != EINTR && errno != EWOULDBLOCK) {
 287                                 ip_debug(IP_DEBUG_FINE,
 288                                     "failed to recv on ipx %s", IPX_ID(ipx));
 289                         }
 290                         continue;
 291                 }
 292                 /* Reset retry counter after a successful connection */
 293                 if (ipx->ipx_cinfo) {
 294                         ipx->ipx_cinfo->ipc_retry = ip_retry;
 295                 }
 296 
 297                 buf += n;
 298                 r -= n;
 299         }
 300 
 301         return (r ? NULL: ipx->ipx_rcvbuf.ipb_buf);
 302 }
 303 
 304 /*
 305  * Sets the address/port of the remote connection in the connection info struct
 306  * This is called after a TCP session has been set up with a known remote
 307  * address (sap)
 308  */
 309 static void
 310 ip_xprt_set_addr(ip_xprt_t *ipx, const struct sockaddr *sap)
 311 {
 312         const struct sockaddr_in6 *sin6 = (const void *)sap;
 313         const struct sockaddr_in *sin = (const void *)sap;
 314 
 315         char buf[INET6_ADDRSTRLEN + 16];
 316         struct in_addr v4addr;
 317         in_port_t port;
 318         int n;
 319 
 320         ip_debug(IP_DEBUG_FINER, "Enter ip_xprt_set_addr");
 321 
 322         if (sap->sa_family == AF_INET6 &&
 323             IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
 324                 IN6_V4MAPPED_TO_INADDR(&sin6->sin6_addr, &v4addr);
 325                 (void) inet_ntop(AF_INET, &v4addr, buf, sizeof (buf));
 326                 port = ntohs(sin6->sin6_port);
 327         } else if (sap->sa_family == AF_INET6) {
 328                 (void) inet_ntop(AF_INET6, &sin6->sin6_addr, buf, sizeof (buf));
 329                 port = ntohs(sin6->sin6_port);
 330         } else {
 331                 (void) inet_ntop(AF_INET, &sin->sin_addr, buf, sizeof (buf));
 332                 port = ntohs(sin->sin_port);
 333         }
 334 
 335         n = strlen(buf);
 336         (void) snprintf(buf + n, sizeof (buf) - n, ":%u", port);
 337 
 338         if (ipx->ipx_addr)
 339                 fmd_hdl_strfree(ip_hdl, ipx->ipx_addr);
 340         ipx->ipx_addr = fmd_hdl_strdup(ip_hdl, buf, FMD_SLEEP);
 341         ip_debug(IP_DEBUG_FINE, "connection addr is %s on %p",
 342             ipx->ipx_addr, (void *)ipx);
 343 }
 344 
 345 static nvlist_t *
 346 ip_xprt_auth(ip_xprt_t *ipx)
 347 {
 348         nvlist_t *nvl;
 349         int err;
 350 
 351         ip_debug(IP_DEBUG_FINER, "Enter ip_xprt_auth");
 352 
 353         if (ip_auth != NULL)
 354                 err = nvlist_dup(ip_auth, &nvl, 0);
 355         else
 356                 err = nvlist_alloc(&nvl, 0, 0);
 357 
 358         if (err != 0) {
 359                 fmd_hdl_abort(ip_hdl, "failed to create nvlist for "
 360                     "authority: %s\n", strerror(err));
 361         }
 362 
 363         if (ip_auth != NULL)
 364                 return (nvl);
 365 
 366         ip_debug(IP_DEBUG_FINE, "ip_authority %s=%s\n",
 367             FM_FMRI_AUTH_SERVER, ipx->ipx_addr);
 368 
 369         (void) nvlist_add_uint8(nvl, FM_VERSION, FM_FMRI_AUTH_VERSION);
 370         (void) nvlist_add_string(nvl, FM_FMRI_AUTH_SERVER, ipx->ipx_addr);
 371 
 372         return (nvl);
 373 }
 374 
 375 static void
 376 ip_xprt_accept(ip_xprt_t *ipx)
 377 {
 378         struct sockaddr_storage sa;
 379         socklen_t salen = sizeof (sa);
 380         fmd_xprt_t *xp;
 381         int fd;
 382 
 383         ip_debug(IP_DEBUG_FINER, "Enter ip_xprt_accept");
 384 
 385         if ((fd = accept(ipx->ipx_fd, (struct sockaddr *)&sa, &salen)) == -1) {
 386                 fmd_hdl_error(ip_hdl, "failed to accept connection");
 387                 ip_stat.ips_accfail.fmds_value.ui64++;
 388                 return;
 389         }
 390         ip_debug(IP_DEBUG_FINE, "Accepted socket on fd %d", fd);
 391 
 392         ip_xprt_set_addr(ipx, (struct sockaddr *)&sa);
 393         xp = fmd_xprt_open(ip_hdl, ipx->ipx_flags,
 394             ip_xprt_auth(ipx), NULL);
 395         ip_xprt_create(xp, fd, ipx->ipx_flags, ipx->ipx_cinfo, ipx->ipx_addr);
 396 }
 397 
 398 static void
 399 ip_xprt_recv_event(ip_xprt_t *ipx)
 400 {
 401         ip_hdr_t *iph;
 402         nvlist_t *nvl;
 403         size_t size;
 404         void *buf;
 405         int err;
 406 
 407         if ((iph = ip_xprt_recv(ipx, sizeof (ip_hdr_t))) == NULL)
 408                 return; /* connection broken */
 409 
 410         if (bcmp(iph->iph_magic, IP_MAGIC, IP_MAGLEN) != 0) {
 411                 fmd_hdl_error(ip_hdl,
 412                     "invalid hdr magic %x.%x.%x.%x from transport %s\n",
 413                     iph->iph_magic[0], iph->iph_magic[1], iph->iph_magic[2],
 414                     iph->iph_magic[3], IPX_ID(ipx));
 415                 ip_stat.ips_badmagic.fmds_value.ui64++;
 416                 return;
 417         }
 418 
 419         size = ntohl(iph->iph_size);
 420 
 421         if ((buf = ip_xprt_recv(ipx, size)) == NULL)
 422                 return; /* connection broken */
 423 
 424         if ((err = nvlist_unpack(buf, size, &nvl, 0)) != 0) {
 425                 fmd_hdl_error(ip_hdl, "failed to unpack event from "
 426                     "transport %s: %s\n",
 427                     IPX_ID(ipx), strerror(err));
 428                 ip_stat.ips_unpackfail.fmds_value.ui64++;
 429         } else {
 430                 if (ip_domain_name)
 431                         fmd_xprt_add_domain(ip_hdl, nvl, ip_domain_name);
 432                 fmd_xprt_post(ip_hdl, ipx->ipx_xprt, nvl, 0);
 433         }
 434 
 435         if (fmd_xprt_error(ip_hdl, ipx->ipx_xprt)) {
 436                 fmd_hdl_error(ip_hdl, "protocol error on transport %p",
 437                     (void *)ipx->ipx_xprt);
 438                 ipx->ipx_done++;
 439         }
 440         ip_debug(IP_DEBUG_FINEST, "Recv event %d bytes from %s",
 441             size, IPX_ID(ipx));
 442 }
 443 
 444 static void
 445 ip_xprt_thread(void *arg)
 446 {
 447         ip_xprt_t *ipx = arg;
 448         struct sockaddr_storage sa;
 449         socklen_t salen = sizeof (sa);
 450         struct pollfd pfd;
 451 
 452         ip_debug(IP_DEBUG_FINER, "Enter ip_xprt_thread");
 453 
 454         while (!ip_quit && !ipx->ipx_done) {
 455                 if (ipx->ipx_xprt != NULL || (ipx->ipx_flags & FMD_XPRT_ACCEPT))
 456                         pfd.events = POLLIN;
 457                 else
 458                         pfd.events = POLLOUT;
 459 
 460                 pfd.fd = ipx->ipx_fd;
 461                 pfd.revents = 0;
 462 
 463                 if (poll(&pfd, 1, -1) <= 0)
 464                         continue; /* loop around and check ip_quit */
 465 
 466                 if (pfd.revents & (POLLHUP | POLLERR)) {
 467                         ip_debug(IP_DEBUG_FINE, "hangup fd %d\n", ipx->ipx_fd);
 468                         break;
 469                 }
 470 
 471                 if (pfd.revents & POLLOUT) {
 472                         /*
 473                          * Once we're connected, there's no reason to have our
 474                          * calls to recv() and send() be non-blocking since we
 475                          * we have separate threads for each: clear O_NONBLOCK.
 476                          */
 477                         (void) fcntl(ipx->ipx_fd, F_SETFL,
 478                             fcntl(ipx->ipx_fd, F_GETFL, 0) & ~O_NONBLOCK);
 479 
 480                         if (getpeername(ipx->ipx_fd, (struct sockaddr *)&sa,
 481                             &salen) != 0) {
 482                                 ip_debug(IP_DEBUG_FINE,
 483                                     "Not connected, no remote name for fd %d. "
 484                                     " Will retry.",
 485                                     ipx->ipx_fd);
 486                                 bzero(&sa, sizeof (sa));
 487                                 break;
 488                         }
 489                         ip_xprt_set_addr(ipx, (struct sockaddr *)&sa);
 490                         ipx->ipx_xprt = fmd_xprt_open(ip_hdl, ipx->ipx_flags,
 491                             ip_xprt_auth(ipx), ipx);
 492 
 493                         ip_debug(IP_DEBUG_FINE, "connect fd %d ipx %p",
 494                             ipx->ipx_fd, (void *)ipx);
 495                         continue;
 496                 }
 497 
 498                 if (pfd.revents & POLLIN) {
 499                         if (ipx->ipx_xprt == NULL)
 500                                 ip_xprt_accept(ipx);
 501                         else
 502                                 ip_xprt_recv_event(ipx);
 503                 }
 504         }
 505 
 506         ipx->ipx_cinfo->ipc_timer = fmd_timer_install(ip_hdl, ipx, NULL, 0);
 507         ip_debug(IP_DEBUG_FINE, "close fd %d (timer %d)", ipx->ipx_fd,
 508             (int)ipx->ipx_cinfo->ipc_timer);
 509 }
 510 
 511 static void
 512 ip_xprt_create(fmd_xprt_t *xp, int fd, int flags, ip_cinfo_t *cinfo, char *addr)
 513 {
 514         ip_xprt_t *ipx = fmd_hdl_zalloc(ip_hdl, sizeof (ip_xprt_t), FMD_SLEEP);
 515 
 516         ip_debug(IP_DEBUG_FINER, "Enter ip_xprt_create %p", (void *)ipx);
 517 
 518         ipx->ipx_xprt = xp;
 519         ipx->ipx_flags = flags;
 520         ipx->ipx_fd = fd;
 521         ipx->ipx_tid = fmd_thr_create(ip_hdl, ip_xprt_thread, ipx);
 522         ipx->ipx_cinfo = cinfo;
 523         ipx->ipx_addr = fmd_hdl_strdup(ip_hdl, addr, FMD_SLEEP);
 524 
 525         if (ipx->ipx_xprt != NULL)
 526                 fmd_xprt_setspecific(ip_hdl, ipx->ipx_xprt, ipx);
 527 
 528         (void) pthread_mutex_lock(&ip_lock);
 529 
 530         ipx->ipx_next = ip_xps;
 531         ip_xps = ipx;
 532 
 533         (void) pthread_mutex_unlock(&ip_lock);
 534 }
 535 
 536 static void
 537 ip_xprt_destroy(ip_xprt_t *ipx)
 538 {
 539         ip_xprt_t *ipp, **ppx = &ip_xps;
 540 
 541         ip_debug(IP_DEBUG_FINER, "Enter ip_xprt_destory %s %p",
 542             IPX_ID(ipx), (void *)ipx);
 543 
 544         (void) pthread_mutex_lock(&ip_lock);
 545 
 546         for (ipp = *ppx; ipp != NULL; ipp = ipp->ipx_next) {
 547                 if (ipp != ipx)
 548                         ppx = &ipp->ipx_next;
 549                 else
 550                         break;
 551         }
 552 
 553         if (ipp != ipx) {
 554                 (void) pthread_mutex_unlock(&ip_lock);
 555                 fmd_hdl_abort(ip_hdl, "ipx %p not on xps list\n", (void *)ipx);
 556         }
 557 
 558         *ppx = ipx->ipx_next;
 559         ipx->ipx_next = NULL;
 560 
 561         (void) pthread_mutex_unlock(&ip_lock);
 562 
 563         if (ipx->ipx_spnd_timer)
 564                 fmd_timer_remove(ip_hdl, ipx->ipx_spnd_timer);
 565 
 566         fmd_thr_signal(ip_hdl, ipx->ipx_tid);
 567         fmd_thr_destroy(ip_hdl, ipx->ipx_tid);
 568 
 569         if (ipx->ipx_xprt != NULL)
 570                 fmd_xprt_close(ip_hdl, ipx->ipx_xprt);
 571 
 572         fmd_hdl_free(ip_hdl, ipx->ipx_sndbuf.ipb_buf, ipx->ipx_sndbuf.ipb_size);
 573         fmd_hdl_free(ip_hdl, ipx->ipx_rcvbuf.ipb_buf, ipx->ipx_rcvbuf.ipb_size);
 574 
 575         (void) close(ipx->ipx_fd);
 576         if (ipx->ipx_addr) {
 577                 fmd_hdl_strfree(ip_hdl, ipx->ipx_addr);
 578                 ipx->ipx_addr = NULL;
 579         }
 580         fmd_hdl_free(ip_hdl, ipx, sizeof (ip_xprt_t));
 581 }
 582 
 583 /*
 584  * Loop through the addresses in the connection info structure that were
 585  * created by getaddrinfo() in ip_setup_addr during initialization (_fmd_init)
 586  * and for each one attempt to create a socket and initialize it.  If we are
 587  * successful, return zero.  If we fail, we check ip_retry: if it is non-zero
 588  * we return the last errno and let our caller retry ip_xprt_setup() later.  If
 589  * ip_retry reaches zero, we call fmd_hdl_abort() with an appropriate message.
 590  */
 591 static int
 592 ip_xprt_setup(fmd_hdl_t *hdl, ip_cinfo_t *cinfo)
 593 {
 594         int err, fd, oflags, xflags, optval = 1;
 595         struct addrinfo *aip;
 596         const char *s1, *s2;
 597         struct addrinfo *ail = cinfo->ipc_addr;
 598 
 599         ip_debug(IP_DEBUG_FINER, "Enter ip_xprt_setup %s\n",
 600             cinfo->ipc_name == NULL ? "localhost" : cinfo->ipc_name);
 601 
 602         /*
 603          * Set up flags as specified in the .conf file. Note that these are
 604          * mostly only used for testing purposes, allowing the transport to
 605          * be set up in various modes.
 606          */
 607         xflags = (ip_rdonly == FMD_B_TRUE) ? FMD_XPRT_RDONLY : FMD_XPRT_RDWR;
 608         if (cinfo->ipc_accept)
 609                 xflags |= FMD_XPRT_ACCEPT;
 610         if (ip_external == FMD_B_TRUE)
 611                 xflags |= FMD_XPRT_EXTERNAL;
 612         if (ip_no_remote_repair == FMD_B_TRUE)
 613                 xflags |= FMD_XPRT_NO_REMOTE_REPAIR;
 614         if (ip_hconly == FMD_B_TRUE)
 615                 xflags |= FMD_XPRT_HCONLY;
 616         if (ip_hc_present_only == FMD_B_TRUE)
 617                 xflags |= FMD_XPRT_HC_PRESENT_ONLY;
 618 
 619         for (aip = ail; aip != NULL; aip = aip->ai_next) {
 620                 if (aip->ai_family != AF_INET && aip->ai_family != AF_INET6)
 621                         continue; /* ignore anything that isn't IPv4 or IPv6 */
 622 
 623                 if ((fd = socket(aip->ai_family,
 624                     aip->ai_socktype, aip->ai_protocol)) == -1) {
 625                         err = errno;
 626                         continue;
 627                 }
 628 
 629                 oflags = fcntl(fd, F_GETFL, 0);
 630                 (void) fcntl(fd, F_SETFL, oflags | O_NONBLOCK);
 631 
 632                 if (xflags & FMD_XPRT_ACCEPT) {
 633                         err = setsockopt(fd, SOL_SOCKET,
 634                             SO_REUSEADDR, &optval, sizeof (optval)) != 0 ||
 635                             bind(fd, aip->ai_addr, aip->ai_addrlen) != 0 ||
 636                             listen(fd, ip_qlen) != 0;
 637                 } else {
 638                         err = connect(fd, aip->ai_addr, aip->ai_addrlen);
 639                         if (err)
 640                                 err = errno;
 641                         if (err == EINPROGRESS)
 642                                 err = 0;
 643                 }
 644 
 645                 if (err == 0) {
 646                         ip_xprt_create(NULL, fd, xflags, cinfo, NULL);
 647                         ip_debug(IP_DEBUG_FINER, "Exit ip_xprt_setup");
 648                         return (0);
 649                 }
 650 
 651                 ip_debug(IP_DEBUG_FINE, "Error=%d errno=%d", err, errno);
 652 
 653                 err = errno;
 654                 (void) close(fd);
 655         }
 656 
 657         if (cinfo->ipc_name != NULL) {
 658                 s1 = "failed to connect to";
 659                 s2 = cinfo->ipc_name;
 660         } else {
 661                 s1 = "failed to listen on";
 662                 s2 = ip_port;
 663         }
 664 
 665         if (err == EACCES || cinfo->ipc_retry-- == 0)
 666                 fmd_hdl_abort(hdl, "%s %s: %s\n", s1, s2, strerror(err));
 667 
 668         ip_debug(IP_DEBUG_FINE, "%s %s: %s (will retry)\n",
 669             s1, s2, strerror(err));
 670         ip_debug(IP_DEBUG_FINER, "Exit ip_xprt_setup");
 671         return (err);
 672 }
 673 
 674 /*
 675  * Free address based resources
 676  */
 677 static void
 678 ip_addr_cleanup()
 679 {
 680         ip_cinfo_t *conn;
 681 
 682         (void) pthread_mutex_lock(&ip_conns_lock);
 683         conn = ip_conns;
 684         while (conn != NULL) {
 685                 ip_conns = conn->ipc_next;
 686                 if (conn->ipc_addr != NULL)
 687                         freeaddrinfo(conn->ipc_addr);
 688                 conn->ipc_addr = NULL;
 689                 if (conn->ipc_timer)
 690                         fmd_timer_remove(ip_hdl, conn->ipc_timer);
 691                 fmd_hdl_strfree(ip_hdl, conn->ipc_name);
 692                 fmd_hdl_free(ip_hdl, conn, sizeof (ip_cinfo_t));
 693                 conn = ip_conns;
 694         }
 695         (void) pthread_mutex_unlock(&ip_conns_lock);
 696 
 697         fmd_prop_free_string(ip_hdl, ip_port);
 698 }
 699 
 700 static boolean_t
 701 ip_argis_cinfo(void *arg)
 702 {
 703         boolean_t exists = B_FALSE;
 704         ip_cinfo_t *conn;
 705 
 706         (void) pthread_mutex_lock(&ip_conns_lock);
 707         for (conn = ip_conns; conn != NULL; conn = conn->ipc_next) {
 708                 if (conn == arg) {
 709                         exists = B_TRUE;
 710                         break;
 711                 }
 712         }
 713         (void) pthread_mutex_unlock(&ip_conns_lock);
 714 
 715         return (exists);
 716 }
 717 
 718 
 719 static ip_cinfo_t *
 720 ip_create_cinfo(char *server, boolean_t accept)
 721 {
 722         int err;
 723         struct addrinfo aih;
 724         ip_cinfo_t *cinfo = fmd_hdl_zalloc(
 725             ip_hdl, sizeof (ip_cinfo_t), FMD_NOSLEEP);
 726 
 727         if (cinfo == NULL)
 728                 return (NULL);
 729 
 730         cinfo->ipc_accept = accept;
 731         cinfo->ipc_retry = ip_retry;
 732         if (server != NULL) {
 733                 cinfo->ipc_name = fmd_hdl_strdup(ip_hdl, server, FMD_NOSLEEP);
 734                 if (cinfo->ipc_name == NULL) {
 735                         fmd_hdl_free(ip_hdl, cinfo, sizeof (ip_cinfo_t));
 736                         return (NULL);
 737                 }
 738         }
 739 
 740         bzero(&aih, sizeof (aih));
 741         aih.ai_flags = AI_ADDRCONFIG;
 742         aih.ai_family = AF_UNSPEC;
 743         aih.ai_socktype = SOCK_STREAM;
 744         if (server != NULL) {
 745                 ip_debug(IP_DEBUG_FINE, "resolving %s:%s\n", server, ip_port);
 746         } else {
 747                 aih.ai_flags |= AI_PASSIVE;
 748                 cinfo->ipc_name = fmd_hdl_strdup(
 749                     ip_hdl, "localhost", FMD_NOSLEEP);
 750                 if (cinfo->ipc_name == NULL) {
 751                         fmd_hdl_free(ip_hdl, cinfo, sizeof (ip_cinfo_t));
 752                         return (NULL);
 753                 }
 754         }
 755 
 756         err = getaddrinfo(server, ip_port, &aih, &cinfo->ipc_addr);
 757         if (err != 0) {
 758                 fmd_hdl_error(ip_hdl, "failed to resolve host %s port %s: %s\n",
 759                     cinfo->ipc_name, ip_port, gai_strerror(err));
 760                 cinfo->ipc_addr = NULL;
 761                 fmd_hdl_strfree(ip_hdl, cinfo->ipc_name);
 762                 fmd_hdl_free(ip_hdl, cinfo, sizeof (ip_cinfo_t));
 763                 cinfo = NULL;
 764         }
 765         return (cinfo);
 766 }
 767 
 768 /*
 769  * Setup a single ip address for ip connection.
 770  * If unable to setup any of the addresses then all addresses will be cleaned up
 771  * and non-zero will be returned.
 772  */
 773 static int
 774 ip_setup_addr(char *server, boolean_t accept)
 775 {
 776         int err = 0;
 777         ip_cinfo_t *cinfo = ip_create_cinfo(server, accept);
 778 
 779         if (cinfo == NULL) {
 780                 ip_addr_cleanup();
 781                 err++;
 782         } else {
 783                 (void) pthread_mutex_lock(&ip_conns_lock);
 784                 cinfo->ipc_next = ip_conns;
 785                 ip_conns = cinfo;
 786                 (void) pthread_mutex_unlock(&ip_conns_lock);
 787         }
 788         return (err);
 789 }
 790 
 791 /*
 792  * Setup a ip addresses for an ip connection.  The address can be a comma
 793  * separated list of addresses as well.
 794  * If unable to setup any of the addresses then all addresses will be cleaned up
 795  * and non-zero will be returned.
 796  */
 797 static int
 798 ip_setup_addrs(char *server, boolean_t accept)
 799 {
 800         int err = 0;
 801         char *addr = server;
 802         char *p;
 803 
 804         for (p = server; *p != '\0'; p++) {
 805                 if (*p == ',') {
 806                         *p = '\0';
 807                         err = ip_setup_addr(addr, accept);
 808                         *p = ',';
 809                         if (err)
 810                                 return (err);
 811                         addr = ++p;
 812                         if (*addr == '\0')
 813                                 break;
 814                 }
 815         }
 816         if (*addr != '\0') {
 817                 err = ip_setup_addr(addr, accept);
 818         }
 819         return (err);
 820 }
 821 
 822 /*
 823  * Starts all connections for each configured network address.  If there is an
 824  * error starting a connection a timer will be started for a retry.
 825  */
 826 static void
 827 ip_start_connections()
 828 {
 829         ip_cinfo_t *conn;
 830 
 831         (void) pthread_mutex_lock(&ip_conns_lock);
 832         for (conn = ip_conns; conn != NULL; conn = conn->ipc_next) {
 833                 if (ip_xprt_setup(ip_hdl, conn) != 0) {
 834                         conn->ipc_timer = fmd_timer_install(ip_hdl, conn, NULL,
 835                             ip_sleep);
 836                 }
 837         }
 838         (void) pthread_mutex_unlock(&ip_conns_lock);
 839 }
 840 
 841 /*
 842  * Timeout handler for the transport module.  We use these types of timeouts:
 843  *
 844  * (a) arg is ip_cinfo_t: attempt ip_xprt_setup(), re-install timeout to retry
 845  * (b) arg is ip_xprt_t, FMD_XPRT_SUSPENDED: call fmd_xprt_resume() on arg
 846  * (c) arg is ip_xprt_t, !FMD_XPRT_SUSPENDED: call ip_xprt_destroy() on arg
 847  * (d) arg is NULL, ignore as this shouldn't happen
 848  *
 849  * Case (c) is required as we need to cause the module's main thread, which
 850  * runs this timeout handler, to join with the transport's auxiliary thread.
 851  * If the connection is a client then a timer will be installed to retry
 852  * connecting to the server.
 853  */
 854 static void
 855 ip_timeout(fmd_hdl_t *hdl, id_t id, void *arg) {
 856         int install_timer;
 857         ip_cinfo_t *cinfo;
 858         ip_xprt_t *ipx;
 859 
 860         if (arg == NULL) {
 861                 fmd_hdl_error(hdl, "ip_timeout failed because hg arg is NULL");
 862         } else if (ip_argis_cinfo(arg)) {
 863                 ip_debug(IP_DEBUG_FINER,
 864                         "Enter ip_timeout (a) install new timer");
 865                 cinfo = arg;
 866                 if ((ip_xprt_setup(hdl, arg) != 0) && !ip_quit)
 867                         cinfo->ipc_timer = fmd_timer_install(
 868                                 hdl, cinfo, NULL, ip_sleep);
 869                 else
 870                         cinfo->ipc_timer = NULL;
 871         } else {
 872                 ipx = arg;
 873                 if (ipx->ipx_flags & FMD_XPRT_SUSPENDED) {
 874                         ipx->ipx_spnd_timer = NULL;
 875                         ip_debug(IP_DEBUG_FINE, "timer %d waking ipx %p",
 876                                 (int)id, arg);
 877                         ipx->ipx_flags &= ~FMD_XPRT_SUSPENDED;
 878                         fmd_xprt_resume(hdl, ipx->ipx_xprt);
 879                 } else {
 880                         ip_debug(IP_DEBUG_FINE, "timer %d closing ipx %p",
 881                                 (int)id, arg);
 882                         cinfo = ipx->ipx_cinfo;
 883                         install_timer = (ipx->ipx_flags & FMD_XPRT_ACCEPT) !=
 884                                 FMD_XPRT_ACCEPT;
 885                         ip_xprt_destroy(ipx);
 886                         if (install_timer && !ip_quit)
 887                                 cinfo->ipc_timer = fmd_timer_install(
 888                                         hdl, cinfo, NULL, ip_sleep);
 889                         else
 890                                 cinfo->ipc_timer = NULL;
 891                 }
 892         }
 893 }
 894 
 895 static const fmd_prop_t fmd_props[] = {
 896         { "ip_authority", FMD_TYPE_STRING, NULL },
 897         { "ip_bufsize", FMD_TYPE_SIZE, "4k" },
 898         { "ip_burp", FMD_TYPE_TIME, "0" },
 899         { "ip_enable", FMD_TYPE_BOOL, "false" },
 900         { "ip_mtbf", FMD_TYPE_INT32, "0" },
 901         { "ip_external", FMD_TYPE_BOOL, "true" },
 902         { "ip_no_remote_repair", FMD_TYPE_BOOL, "true" },
 903         { "ip_hconly", FMD_TYPE_BOOL, "false" },
 904         { "ip_rdonly", FMD_TYPE_BOOL, "false" },
 905         { "ip_hc_present_only", FMD_TYPE_BOOL, "false" },
 906         { "ip_domain_name", FMD_TYPE_STRING, NULL },
 907         { "ip_port", FMD_TYPE_STRING, "664" },
 908         { "ip_qlen", FMD_TYPE_INT32, "32" },
 909         { "ip_retry", FMD_TYPE_INT32, "-1" },       /* -1=forever */
 910         { "ip_server", FMD_TYPE_STRING, NULL },     /* server name */
 911         { "ip_sleep", FMD_TYPE_TIME, "10s" },
 912         { "ip_translate", FMD_TYPE_BOOL, "false" },
 913         { "ip_bind_addr", FMD_TYPE_STRING, NULL },  /* network interface addr */
 914         { "ip_debug_level", FMD_TYPE_INT32, "1" },  /* debug levels 0-3 */
 915         { NULL, 0, NULL }
 916 };
 917 
 918 static const fmd_hdl_ops_t fmd_ops = {
 919         ip_fmdo_recv,           /* fmdo_recv */
 920         ip_timeout,             /* fmdo_timeout */
 921         NULL,                   /* fmdo_close */
 922         NULL,                   /* fmdo_stats */
 923         NULL,                   /* fmdo_gc */
 924         ip_fmdo_send,           /* fmdo_send */
 925 };
 926 
 927 static const fmd_hdl_info_t fmd_info = {
 928         "IP Transport Agent", "1.0", &fmd_ops, fmd_props
 929 };
 930 
 931 /*
 932  * Initialize the ip-transport module as either a server or a client.  Note
 933  * that the ip-transport module is not enabled by default under Solaris:
 934  * at present we require a developer or tool to "setprop ip_enable true".
 935  * If ip-transport is needed in the future out-of-the-box on one or more Sun
 936  * platforms, the code to check 'ip_enable' should be replaced with:
 937  *
 938  * (a) configuring ip-transport to operate in client mode by default,
 939  * (b) a platform-specific configuration mechanism, or
 940  * (c) a means to assure security and prevent denial-of-service attacks.
 941  *
 942  * Note that (c) is only an issue when the transport module operates
 943  * in server mode (i.e. with the ip_server property set to NULL) on a
 944  * generic Solaris system which may be exposed directly to the Internet.
 945  * The property ip_bind_addr can be used to define a private network interface
 946  * to use so that the service is not exposed to the Internet.
 947  */
 948 void
 949 _fmd_init(fmd_hdl_t *hdl)
 950 {
 951         char *addr, *auth, *p, *q, *r, *s;
 952         int err;
 953 
 954         if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0)
 955                 return; /* failed to register handle */
 956 
 957         if (fmd_prop_get_int32(hdl, "ip_enable") == FMD_B_FALSE) {
 958                 fmd_hdl_unregister(hdl);
 959                 return;
 960         }
 961 
 962         (void) fmd_stat_create(hdl, FMD_STAT_NOALLOC,
 963             sizeof (ip_stat) / sizeof (fmd_stat_t), (fmd_stat_t *)&ip_stat);
 964 
 965         ip_hdl = hdl;
 966         (void) pthread_mutex_init(&ip_lock, NULL);
 967 
 968         ip_burp = fmd_prop_get_int64(hdl, "ip_burp");
 969         ip_mtbf = fmd_prop_get_int32(hdl, "ip_mtbf");
 970         ip_external = fmd_prop_get_int32(hdl, "ip_external");
 971         ip_no_remote_repair = fmd_prop_get_int32(hdl, "ip_no_remote_repair");
 972         ip_hconly = fmd_prop_get_int32(hdl, "ip_hconly");
 973         ip_rdonly = fmd_prop_get_int32(hdl, "ip_rdonly");
 974         ip_hc_present_only = fmd_prop_get_int32(hdl, "ip_hc_present_only");
 975         ip_domain_name = fmd_prop_get_string(hdl, "ip_domain_name");
 976         ip_qlen = fmd_prop_get_int32(hdl, "ip_qlen");
 977         ip_retry = fmd_prop_get_int32(hdl, "ip_retry");
 978         ip_sleep = fmd_prop_get_int64(hdl, "ip_sleep");
 979         ip_translate = fmd_prop_get_int32(hdl, "ip_translate");
 980 
 981         ip_size = (size_t)fmd_prop_get_int64(hdl, "ip_bufsize");
 982         ip_size = MAX(ip_size, sizeof (ip_hdr_t));
 983         ip_port = fmd_prop_get_string(hdl, "ip_port");
 984         ip_debug_level = fmd_prop_get_int32(hdl, "ip_debug_level");
 985 
 986         ip_conns = NULL;
 987         addr = fmd_prop_get_string(hdl, "ip_bind_addr");
 988         if (addr != NULL) {
 989                 err = ip_setup_addrs(addr, B_TRUE);
 990                 if (err) {
 991                         fmd_hdl_abort(hdl, "Unable to setup ip_bind_addr %s",
 992                             addr);
 993                         return;
 994                 }
 995                 fmd_prop_free_string(hdl, addr);
 996         }
 997         addr = fmd_prop_get_string(hdl, "ip_server");
 998         if (addr != NULL) {
 999                 err = ip_setup_addrs(addr, B_FALSE);
1000                 if (err) {
1001                         fmd_hdl_abort(hdl, "Unable to setup ip_server %s",
1002                             addr);
1003                         return;
1004                 }
1005                 fmd_prop_free_string(hdl, addr);
1006         }
1007 
1008         /*
1009          * If no specific connecitons configured then set up general server
1010          * listening on all network ports.
1011          */
1012         if (ip_conns == NULL) {
1013                 if (ip_setup_addr(NULL, B_TRUE) != 0) {
1014                         fmd_hdl_abort(hdl, "Unable to setup server.");
1015                         return;
1016                 }
1017         }
1018 
1019         /*
1020          * If ip_authority is set, tokenize this string and turn it into an
1021          * FMA authority represented as a name-value pair list.  We will use
1022          * this authority for all transports created by this module.  If
1023          * ip_authority isn't set, we'll compute authorities on the fly.
1024          */
1025         if ((auth = fmd_prop_get_string(hdl, "ip_authority")) != NULL) {
1026                 (void) nvlist_alloc(&ip_auth, 0, 0);
1027                 (void) nvlist_add_uint8(ip_auth,
1028                     FM_VERSION, FM_FMRI_AUTH_VERSION);
1029 
1030                 s = strdupa(auth);
1031                 fmd_prop_free_string(hdl, auth);
1032 
1033                 for (p = strtok_r(s, ",", &q); p != NULL;
1034                     p = strtok_r(NULL, ",", &q)) {
1035 
1036                         if ((r = strchr(p, '=')) == NULL) {
1037                                 ip_addr_cleanup();
1038                                 fmd_hdl_abort(hdl, "ip_authority element <%s> "
1039                                     "must be in <name>=<value> form\n", p);
1040                         }
1041 
1042                         *r = '\0';
1043                         (void) nvlist_add_string(ip_auth, p, r + 1);
1044                         *r = '=';
1045                 }
1046         }
1047 
1048         ip_start_connections();
1049 }
1050 
1051 void
1052 _fmd_fini(fmd_hdl_t *hdl)
1053 {
1054         ip_quit++; /* set quit flag before signalling auxiliary threads */
1055 
1056         while (ip_xps != NULL)
1057                 ip_xprt_destroy(ip_xps);
1058 
1059         nvlist_free(ip_auth);
1060 
1061         ip_addr_cleanup();
1062 
1063         if (ip_domain_name != NULL)
1064                 fmd_prop_free_string(ip_hdl, ip_domain_name);
1065 
1066         fmd_hdl_unregister(hdl);
1067 }