1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * 25 * Portions Copyright 2008 Denis Cheng 26 */ 27 28 #include "config.h" 29 #include "filebench.h" 30 #include "flowop.h" 31 #include "threadflow.h" /* For aiolist definition */ 32 33 #ifndef HAVE_OFF64_T 34 /* 35 * We are probably on linux. 36 * According to http://www.suse.de/~aj/linux_lfs.html, defining the 37 * above, automatically changes type of off_t to off64_t. so let 38 * us use only off_t as off64_t is not defined 39 */ 40 #define off64_t off_t 41 #endif /* HAVE_OFF64_T */ 42 43 #include <fcntl.h> 44 #include <stdio.h> 45 #include <stdlib.h> 46 #include <unistd.h> 47 #include <libgen.h> 48 #include <sys/mman.h> 49 #include <sys/stat.h> 50 #include <sys/types.h> 51 #include <sys/param.h> 52 #include <sys/resource.h> 53 54 #include "filebench.h" 55 #include "fsplug.h" 56 57 #ifdef HAVE_AIO 58 #include <aio.h> 59 #endif /* HAVE_AIO */ 60 61 #ifdef HAVE_LIBAIO_H 62 #include <libaio.h> 63 #endif /* HAVE_LIBAIO_H */ 64 65 #ifndef HAVE_AIOCB64_T 66 #define aiocb64 aiocb 67 #endif /* HAVE_AIOCB64_T */ 68 69 /* 70 * These routines implement local file access. They are placed into a 71 * vector of functions that are called by all I/O operations in fileset.c 72 * and flowop_library.c. This represents the default file system plug-in, 73 * and may be replaced by vectors for other file system plug-ins. 74 */ 75 76 static int fb_lfs_freemem(fb_fdesc_t *fd, off64_t size); 77 static int fb_lfs_open(fb_fdesc_t *, char *, int, int); 78 static int fb_lfs_pread(fb_fdesc_t *, caddr_t, fbint_t, off64_t); 79 static int fb_lfs_read(fb_fdesc_t *, caddr_t, fbint_t); 80 static int fb_lfs_pwrite(fb_fdesc_t *, caddr_t, fbint_t, off64_t); 81 static int fb_lfs_write(fb_fdesc_t *, caddr_t, fbint_t); 82 static int fb_lfs_lseek(fb_fdesc_t *, off64_t, int); 83 static int fb_lfs_truncate(fb_fdesc_t *, off64_t); 84 static int fb_lfs_rename(const char *, const char *); 85 static int fb_lfs_close(fb_fdesc_t *); 86 static int fb_lfs_link(const char *, const char *); 87 static int fb_lfs_symlink(const char *, const char *); 88 static int fb_lfs_unlink(char *); 89 static ssize_t fb_lfs_readlink(const char *, char *, size_t); 90 static int fb_lfs_mkdir(char *, int); 91 static int fb_lfs_rmdir(char *); 92 static DIR *fb_lfs_opendir(char *); 93 static struct dirent *fb_lfs_readdir(DIR *); 94 static int fb_lfs_closedir(DIR *); 95 static int fb_lfs_fsync(fb_fdesc_t *); 96 static int fb_lfs_stat(char *, struct stat64 *); 97 static int fb_lfs_fstat(fb_fdesc_t *, struct stat64 *); 98 static int fb_lfs_access(const char *, int); 99 static void fb_lfs_recur_rm(char *); 100 101 static fsplug_func_t fb_lfs_funcs = 102 { 103 "locfs", 104 fb_lfs_freemem, /* flush page cache */ 105 fb_lfs_open, /* open */ 106 fb_lfs_pread, /* pread */ 107 fb_lfs_read, /* read */ 108 fb_lfs_pwrite, /* pwrite */ 109 fb_lfs_write, /* write */ 110 fb_lfs_lseek, /* lseek */ 111 fb_lfs_truncate, /* ftruncate */ 112 fb_lfs_rename, /* rename */ 113 fb_lfs_close, /* close */ 114 fb_lfs_link, /* link */ 115 fb_lfs_symlink, /* symlink */ 116 fb_lfs_unlink, /* unlink */ 117 fb_lfs_readlink, /* readlink */ 118 fb_lfs_mkdir, /* mkdir */ 119 fb_lfs_rmdir, /* rmdir */ 120 fb_lfs_opendir, /* opendir */ 121 fb_lfs_readdir, /* readdir */ 122 fb_lfs_closedir, /* closedir */ 123 fb_lfs_fsync, /* fsync */ 124 fb_lfs_stat, /* stat */ 125 fb_lfs_fstat, /* fstat */ 126 fb_lfs_access, /* access */ 127 fb_lfs_recur_rm /* recursive rm */ 128 }; 129 130 #ifdef HAVE_AIO 131 /* 132 * Local file system asynchronous IO flowops are in this module, as 133 * they have a number of local file system specific features. 134 */ 135 static int fb_lfsflow_aiowrite(threadflow_t *threadflow, flowop_t *flowop); 136 static int fb_lfsflow_aiowait(threadflow_t *threadflow, flowop_t *flowop); 137 138 static flowop_proto_t fb_lfsflow_funcs[] = { 139 FLOW_TYPE_AIO, FLOW_ATTR_WRITE, "aiowrite", flowop_init_generic, 140 fb_lfsflow_aiowrite, flowop_destruct_generic, 141 FLOW_TYPE_AIO, 0, "aiowait", flowop_init_generic, 142 fb_lfsflow_aiowait, flowop_destruct_generic 143 }; 144 145 #endif /* HAVE_AIO */ 146 147 /* 148 * Initialize this processes I/O functions vector to point to 149 * the vector of local file system I/O functions 150 */ 151 void 152 fb_lfs_funcvecinit(void) 153 { 154 fs_functions_vec = &fb_lfs_funcs; 155 } 156 157 /* 158 * Initialize those flowops whose implementation is file system 159 * specific. 160 */ 161 void 162 fb_lfs_flowinit(void) 163 { 164 int nops; 165 166 /* 167 * re-initialize the I/O functions vector while we are at 168 * it as it may have been redefined since the process was 169 * created, at least if this is the master processes 170 */ 171 fb_lfs_funcvecinit(); 172 173 #ifdef HAVE_AIO 174 nops = sizeof (fb_lfsflow_funcs) / sizeof (flowop_proto_t); 175 flowop_flow_init(fb_lfsflow_funcs, nops); 176 #endif /* HAVE_AIO */ 177 } 178 179 /* 180 * Frees up memory mapped file region of supplied size. The 181 * file descriptor "fd" indicates which memory mapped file. 182 * If successful, returns 0. Otherwise returns -1 if "size" 183 * is zero, or -1 times the number of times msync() failed. 184 */ 185 static int 186 fb_lfs_freemem(fb_fdesc_t *fd, off64_t size) 187 { 188 off64_t left; 189 int ret = 0; 190 191 for (left = size; left > 0; left -= MMAP_SIZE) { 192 off64_t thismapsize; 193 caddr_t addr; 194 195 thismapsize = MIN(MMAP_SIZE, left); 196 addr = mmap64(0, thismapsize, PROT_READ|PROT_WRITE, 197 MAP_SHARED, fd->fd_num, size - left); 198 ret += msync(addr, thismapsize, MS_INVALIDATE); 199 (void) munmap(addr, thismapsize); 200 } 201 return (ret); 202 } 203 204 /* 205 * Does a posix pread. Returns what the pread() returns. 206 */ 207 static int 208 fb_lfs_pread(fb_fdesc_t *fd, caddr_t iobuf, fbint_t iosize, off64_t fileoffset) 209 { 210 return (pread64(fd->fd_num, iobuf, iosize, fileoffset)); 211 } 212 213 /* 214 * Does a posix read. Returns what the read() returns. 215 */ 216 static int 217 fb_lfs_read(fb_fdesc_t *fd, caddr_t iobuf, fbint_t iosize) 218 { 219 return (read(fd->fd_num, iobuf, iosize)); 220 } 221 222 #ifdef HAVE_AIO 223 224 /* 225 * Asynchronous write section. An Asynchronous IO element 226 * (aiolist_t) is used to associate the asynchronous write request with 227 * its subsequent completion. This element includes a aiocb64 struct 228 * that is used by posix aio_xxx calls to track the asynchronous writes. 229 * The flowops aiowrite and aiowait result in calls to these posix 230 * aio_xxx system routines to do the actual asynchronous write IO 231 * operations. 232 */ 233 234 235 /* 236 * Allocates an asynchronous I/O list (aio, of type 237 * aiolist_t) element. Adds it to the flowop thread's 238 * threadflow aio list. Returns a pointer to the element. 239 */ 240 static aiolist_t * 241 aio_allocate(flowop_t *flowop) 242 { 243 aiolist_t *aiolist; 244 245 if ((aiolist = malloc(sizeof (aiolist_t))) == NULL) { 246 filebench_log(LOG_ERROR, "malloc aiolist failed"); 247 filebench_shutdown(1); 248 } 249 250 /* Add to list */ 251 if (flowop->fo_thread->tf_aiolist == NULL) { 252 flowop->fo_thread->tf_aiolist = aiolist; 253 aiolist->al_next = NULL; 254 } else { 255 aiolist->al_next = flowop->fo_thread->tf_aiolist; 256 flowop->fo_thread->tf_aiolist = aiolist; 257 } 258 return (aiolist); 259 } 260 261 /* 262 * Searches for the aiolist element that has a matching 263 * completion block, aiocb. If none found returns FILEBENCH_ERROR. If 264 * found, removes the aiolist element from flowop thread's 265 * list and returns FILEBENCH_OK. 266 */ 267 static int 268 aio_deallocate(flowop_t *flowop, struct aiocb64 *aiocb) 269 { 270 aiolist_t *aiolist = flowop->fo_thread->tf_aiolist; 271 aiolist_t *previous = NULL; 272 aiolist_t *match = NULL; 273 274 if (aiocb == NULL) { 275 filebench_log(LOG_ERROR, "null aiocb deallocate"); 276 return (FILEBENCH_OK); 277 } 278 279 while (aiolist) { 280 if (aiocb == &(aiolist->al_aiocb)) { 281 match = aiolist; 282 break; 283 } 284 previous = aiolist; 285 aiolist = aiolist->al_next; 286 } 287 288 if (match == NULL) 289 return (FILEBENCH_ERROR); 290 291 /* Remove from the list */ 292 if (previous) 293 previous->al_next = match->al_next; 294 else 295 flowop->fo_thread->tf_aiolist = match->al_next; 296 297 return (FILEBENCH_OK); 298 } 299 300 /* 301 * Emulate posix aiowrite(). Determines which file to use, 302 * either one file of a fileset, or the file associated 303 * with a fileobj, allocates and fills an aiolist_t element 304 * for the write, and issues the asynchronous write. This 305 * operation is only valid for random IO, and returns an 306 * error if the flowop is set for sequential IO. Returns 307 * FILEBENCH_OK on success, FILEBENCH_NORSC if iosetup can't 308 * obtain a file to open, and FILEBENCH_ERROR on any 309 * encountered error. 310 */ 311 static int 312 fb_lfsflow_aiowrite(threadflow_t *threadflow, flowop_t *flowop) 313 { 314 caddr_t iobuf; 315 fbint_t wss; 316 fbint_t iosize; 317 fb_fdesc_t *fdesc; 318 int ret; 319 320 iosize = avd_get_int(flowop->fo_iosize); 321 322 if ((ret = flowoplib_iosetup(threadflow, flowop, &wss, &iobuf, 323 &fdesc, iosize)) != FILEBENCH_OK) 324 return (ret); 325 326 if (avd_get_bool(flowop->fo_random)) { 327 uint64_t fileoffset; 328 struct aiocb64 *aiocb; 329 aiolist_t *aiolist; 330 331 if (filebench_randomno64(&fileoffset, 332 wss, iosize, NULL) == -1) { 333 filebench_log(LOG_ERROR, 334 "file size smaller than IO size for thread %s", 335 flowop->fo_name); 336 return (FILEBENCH_ERROR); 337 } 338 339 aiolist = aio_allocate(flowop); 340 aiolist->al_type = AL_WRITE; 341 aiocb = &aiolist->al_aiocb; 342 343 aiocb->aio_fildes = fdesc->fd_num; 344 aiocb->aio_buf = iobuf; 345 aiocb->aio_nbytes = (size_t)iosize; 346 aiocb->aio_offset = (off64_t)fileoffset; 347 aiocb->aio_reqprio = 0; 348 349 filebench_log(LOG_DEBUG_IMPL, 350 "aio fd=%d, bytes=%llu, offset=%llu", 351 fdesc->fd_num, (u_longlong_t)iosize, 352 (u_longlong_t)fileoffset); 353 354 flowop_beginop(threadflow, flowop); 355 if (aio_write64(aiocb) < 0) { 356 filebench_log(LOG_ERROR, "aiowrite failed: %s", 357 strerror(errno)); 358 filebench_shutdown(1); 359 } 360 flowop_endop(threadflow, flowop, iosize); 361 } else { 362 return (FILEBENCH_ERROR); 363 } 364 365 return (FILEBENCH_OK); 366 } 367 368 369 370 #define MAXREAP 4096 371 372 /* 373 * Emulate posix aiowait(). Waits for the completion of half the 374 * outstanding asynchronous IOs, or a single IO, which ever is 375 * larger. The routine will return after a sufficient number of 376 * completed calls issued by any thread in the procflow have 377 * completed, or a 1 second timout elapses. All completed 378 * IO operations are deleted from the thread's aiolist. 379 */ 380 static int 381 fb_lfsflow_aiowait(threadflow_t *threadflow, flowop_t *flowop) 382 { 383 struct aiocb64 **worklist; 384 aiolist_t *aio = flowop->fo_thread->tf_aiolist; 385 int uncompleted = 0; 386 387 worklist = calloc(MAXREAP, sizeof (struct aiocb64 *)); 388 389 /* Count the list of pending aios */ 390 while (aio) { 391 uncompleted++; 392 aio = aio->al_next; 393 } 394 395 do { 396 uint_t ncompleted = 0; 397 uint_t todo; 398 struct timespec timeout; 399 int inprogress; 400 int i; 401 402 /* Wait for half of the outstanding requests */ 403 timeout.tv_sec = 1; 404 timeout.tv_nsec = 0; 405 406 if (uncompleted > MAXREAP) 407 todo = MAXREAP; 408 else 409 todo = uncompleted / 2; 410 411 if (todo == 0) 412 todo = 1; 413 414 flowop_beginop(threadflow, flowop); 415 416 #if (defined(HAVE_AIOWAITN) && defined(USE_PROCESS_MODEL)) 417 if (((aio_waitn64((struct aiocb64 **)worklist, 418 MAXREAP, &todo, &timeout)) == -1) && 419 errno && (errno != ETIME)) { 420 filebench_log(LOG_ERROR, 421 "aiowait failed: %s, outstanding = %d, " 422 "ncompleted = %d ", 423 strerror(errno), uncompleted, todo); 424 } 425 426 ncompleted = todo; 427 /* Take the completed I/Os from the list */ 428 inprogress = 0; 429 for (i = 0; i < ncompleted; i++) { 430 if ((aio_return64(worklist[i]) == -1) && 431 (errno == EINPROGRESS)) { 432 inprogress++; 433 continue; 434 } 435 if (aio_deallocate(flowop, worklist[i]) 436 == FILEBENCH_ERROR) { 437 filebench_log(LOG_ERROR, "Could not remove " 438 "aio from list "); 439 flowop_endop(threadflow, flowop, 0); 440 return (FILEBENCH_ERROR); 441 } 442 } 443 444 uncompleted -= ncompleted; 445 uncompleted += inprogress; 446 447 #else 448 449 for (ncompleted = 0, inprogress = 0, 450 aio = flowop->fo_thread->tf_aiolist; 451 ncompleted < todo, aio != NULL; aio = aio->al_next) { 452 int result = aio_error64(&aio->al_aiocb); 453 454 if (result == EINPROGRESS) { 455 inprogress++; 456 continue; 457 } 458 459 if ((aio_return64(&aio->al_aiocb) == -1) || result) { 460 filebench_log(LOG_ERROR, "aio failed: %s", 461 strerror(result)); 462 continue; 463 } 464 465 ncompleted++; 466 467 if (aio_deallocate(flowop, &aio->al_aiocb) < 0) { 468 filebench_log(LOG_ERROR, "Could not remove " 469 "aio from list "); 470 flowop_endop(threadflow, flowop, 0); 471 return (FILEBENCH_ERROR); 472 } 473 } 474 475 uncompleted -= ncompleted; 476 477 #endif 478 filebench_log(LOG_DEBUG_SCRIPT, 479 "aio2 completed %d ios, uncompleted = %d, inprogress = %d", 480 ncompleted, uncompleted, inprogress); 481 482 } while (uncompleted > MAXREAP); 483 484 flowop_endop(threadflow, flowop, 0); 485 486 free(worklist); 487 488 return (FILEBENCH_OK); 489 } 490 491 #endif /* HAVE_AIO */ 492 493 /* 494 * Does an open64 of a file. Inserts the file descriptor number returned 495 * by open() into the supplied filebench fd. Returns FILEBENCH_OK on 496 * successs, and FILEBENCH_ERROR on failure. 497 */ 498 499 static int 500 fb_lfs_open(fb_fdesc_t *fd, char *path, int flags, int perms) 501 { 502 if ((fd->fd_num = open64(path, flags, perms)) < 0) 503 return (FILEBENCH_ERROR); 504 else 505 return (FILEBENCH_OK); 506 } 507 508 /* 509 * Does an unlink (delete) of a file. 510 */ 511 static int 512 fb_lfs_unlink(char *path) 513 { 514 return (unlink(path)); 515 } 516 517 /* 518 * Does a readlink of a symbolic link. 519 */ 520 static ssize_t 521 fb_lfs_readlink(const char *path, char *buf, size_t buf_size) 522 { 523 return (readlink(path, buf, buf_size)); 524 } 525 526 /* 527 * Does fsync of a file. Returns with fsync return info. 528 */ 529 static int 530 fb_lfs_fsync(fb_fdesc_t *fd) 531 { 532 return (fsync(fd->fd_num)); 533 } 534 535 /* 536 * Do a posix lseek of a file. Return what lseek() returns. 537 */ 538 static int 539 fb_lfs_lseek(fb_fdesc_t *fd, off64_t offset, int whence) 540 { 541 return (lseek64(fd->fd_num, offset, whence)); 542 } 543 544 /* 545 * Do a posix rename of a file. Return what rename() returns. 546 */ 547 static int 548 fb_lfs_rename(const char *old, const char *new) 549 { 550 return (rename(old, new)); 551 } 552 553 554 /* 555 * Do a posix close of a file. Return what close() returns. 556 */ 557 static int 558 fb_lfs_close(fb_fdesc_t *fd) 559 { 560 return (close(fd->fd_num)); 561 } 562 563 /* 564 * Use mkdir to create a directory. 565 */ 566 static int 567 fb_lfs_mkdir(char *path, int perm) 568 { 569 return (mkdir(path, perm)); 570 } 571 572 /* 573 * Use rmdir to delete a directory. Returns what rmdir() returns. 574 */ 575 static int 576 fb_lfs_rmdir(char *path) 577 { 578 return (rmdir(path)); 579 } 580 581 /* 582 * does a recursive rm to remove an entire directory tree (i.e. a fileset). 583 * Supplied with the path to the root of the tree. 584 */ 585 static void 586 fb_lfs_recur_rm(char *path) 587 { 588 char cmd[MAXPATHLEN]; 589 590 (void) snprintf(cmd, sizeof (cmd), "rm -rf %s", path); 591 (void) system(cmd); 592 } 593 594 /* 595 * Does a posix opendir(), Returns a directory handle on success, 596 * NULL on failure. 597 */ 598 static DIR * 599 fb_lfs_opendir(char *path) 600 { 601 return (opendir(path)); 602 } 603 604 /* 605 * Does a readdir() call. Returns a pointer to a table of directory 606 * information on success, NULL on failure. 607 */ 608 static struct dirent * 609 fb_lfs_readdir(DIR *dirp) 610 { 611 return (readdir(dirp)); 612 } 613 614 /* 615 * Does a closedir() call. 616 */ 617 static int 618 fb_lfs_closedir(DIR *dirp) 619 { 620 return (closedir(dirp)); 621 } 622 623 /* 624 * Does an fstat of a file. 625 */ 626 static int 627 fb_lfs_fstat(fb_fdesc_t *fd, struct stat64 *statbufp) 628 { 629 return (fstat64(fd->fd_num, statbufp)); 630 } 631 632 /* 633 * Does a stat of a file. 634 */ 635 static int 636 fb_lfs_stat(char *path, struct stat64 *statbufp) 637 { 638 return (stat64(path, statbufp)); 639 } 640 641 /* 642 * Do a pwrite64 to a file. 643 */ 644 static int 645 fb_lfs_pwrite(fb_fdesc_t *fd, caddr_t iobuf, fbint_t iosize, off64_t offset) 646 { 647 return (pwrite64(fd->fd_num, iobuf, iosize, offset)); 648 } 649 650 /* 651 * Do a write to a file. 652 */ 653 static int 654 fb_lfs_write(fb_fdesc_t *fd, caddr_t iobuf, fbint_t iosize) 655 { 656 return (write(fd->fd_num, iobuf, iosize)); 657 } 658 659 /* 660 * Does a truncate operation and returns the result 661 */ 662 static int 663 fb_lfs_truncate(fb_fdesc_t *fd, off64_t fse_size) 664 { 665 #ifdef HAVE_FTRUNCATE64 666 return (ftruncate64(fd->fd_num, fse_size)); 667 #else 668 return (ftruncate(fd->fd_num, (off_t)fse_size)); 669 #endif 670 } 671 672 /* 673 * Does a link operation and returns the result 674 */ 675 static int 676 fb_lfs_link(const char *existing, const char *new) 677 { 678 return (link(existing, new)); 679 } 680 681 /* 682 * Does a symlink operation and returns the result 683 */ 684 static int 685 fb_lfs_symlink(const char *existing, const char *new) 686 { 687 return (symlink(existing, new)); 688 } 689 690 /* 691 * Does an access() check on a file. 692 */ 693 static int 694 fb_lfs_access(const char *path, int amode) 695 { 696 return (access(path, amode)); 697 }