github.com/afumu/libc@v0.0.6/musl/src/aio/aio.c (about) 1 #include <aio.h> 2 #include <pthread.h> 3 #include <semaphore.h> 4 #include <limits.h> 5 #include <errno.h> 6 #include <unistd.h> 7 #include <stdlib.h> 8 #include <sys/auxv.h> 9 #include "syscall.h" 10 #include "atomic.h" 11 #include "pthread_impl.h" 12 13 /* The following is a threads-based implementation of AIO with minimal 14 * dependence on implementation details. Most synchronization is 15 * performed with pthread primitives, but atomics and futex operations 16 * are used for notification in a couple places where the pthread 17 * primitives would be inefficient or impractical. 18 * 19 * For each fd with outstanding aio operations, an aio_queue structure 20 * is maintained. These are reference-counted and destroyed by the last 21 * aio worker thread to exit. Accessing any member of the aio_queue 22 * structure requires a lock on the aio_queue. Adding and removing aio 23 * queues themselves requires a write lock on the global map object, 24 * a 4-level table mapping file descriptor numbers to aio queues. A 25 * read lock on the map is used to obtain locks on existing queues by 26 * excluding destruction of the queue by a different thread while it is 27 * being locked. 28 * 29 * Each aio queue has a list of active threads/operations. Presently there 30 * is a one to one relationship between threads and operations. The only 31 * members of the aio_thread structure which are accessed by other threads 32 * are the linked list pointers, op (which is immutable), running (which 33 * is updated atomically), and err (which is synchronized via running), 34 * so no locking is necessary. Most of the other other members are used 35 * for sharing data between the main flow of execution and cancellation 36 * cleanup handler. 37 * 38 * Taking any aio locks requires having all signals blocked. This is 39 * necessary because aio_cancel is needed by close, and close is required 40 * to be async-signal safe. All aio worker threads run with all signals 41 * blocked permanently. 42 */ 43 44 struct aio_thread { 45 pthread_t td; 46 struct aiocb *cb; 47 struct aio_thread *next, *prev; 48 struct aio_queue *q; 49 volatile int running; 50 int err, op; 51 ssize_t ret; 52 }; 53 54 struct aio_queue { 55 int fd, seekable, append, ref, init; 56 pthread_mutex_t lock; 57 pthread_cond_t cond; 58 struct aio_thread *head; 59 }; 60 61 struct aio_args { 62 struct aiocb *cb; 63 struct aio_queue *q; 64 int op; 65 sem_t sem; 66 }; 67 68 static pthread_rwlock_t maplock = PTHREAD_RWLOCK_INITIALIZER; 69 static struct aio_queue *****map; 70 static volatile int aio_fd_cnt; 71 volatile int __aio_fut; 72 73 static struct aio_queue *__aio_get_queue(int fd, int need) 74 { 75 if (fd < 0) { 76 errno = EBADF; 77 return 0; 78 } 79 int a=fd>>24; 80 unsigned char b=fd>>16, c=fd>>8, d=fd; 81 struct aio_queue *q = 0; 82 pthread_rwlock_rdlock(&maplock); 83 if ((!map || !map[a] || !map[a][b] || !map[a][b][c] || !(q=map[a][b][c][d])) && need) { 84 pthread_rwlock_unlock(&maplock); 85 if (fcntl(fd, F_GETFD) < 0) return 0; 86 pthread_rwlock_wrlock(&maplock); 87 if (!map) map = calloc(sizeof *map, (-1U/2+1)>>24); 88 if (!map) goto out; 89 if (!map[a]) map[a] = calloc(sizeof **map, 256); 90 if (!map[a]) goto out; 91 if (!map[a][b]) map[a][b] = calloc(sizeof ***map, 256); 92 if (!map[a][b]) goto out; 93 if (!map[a][b][c]) map[a][b][c] = calloc(sizeof ****map, 256); 94 if (!map[a][b][c]) goto out; 95 if (!(q = map[a][b][c][d])) { 96 map[a][b][c][d] = q = calloc(sizeof *****map, 1); 97 if (q) { 98 q->fd = fd; 99 pthread_mutex_init(&q->lock, 0); 100 pthread_cond_init(&q->cond, 0); 101 a_inc(&aio_fd_cnt); 102 } 103 } 104 } 105 if (q) pthread_mutex_lock(&q->lock); 106 out: 107 pthread_rwlock_unlock(&maplock); 108 return q; 109 } 110 111 static void __aio_unref_queue(struct aio_queue *q) 112 { 113 if (q->ref > 1) { 114 q->ref--; 115 pthread_mutex_unlock(&q->lock); 116 return; 117 } 118 119 /* This is potentially the last reference, but a new reference 120 * may arrive since we cannot free the queue object without first 121 * taking the maplock, which requires releasing the queue lock. */ 122 pthread_mutex_unlock(&q->lock); 123 pthread_rwlock_wrlock(&maplock); 124 pthread_mutex_lock(&q->lock); 125 if (q->ref == 1) { 126 int fd=q->fd; 127 int a=fd>>24; 128 unsigned char b=fd>>16, c=fd>>8, d=fd; 129 map[a][b][c][d] = 0; 130 a_dec(&aio_fd_cnt); 131 pthread_rwlock_unlock(&maplock); 132 pthread_mutex_unlock(&q->lock); 133 free(q); 134 } else { 135 q->ref--; 136 pthread_rwlock_unlock(&maplock); 137 pthread_mutex_unlock(&q->lock); 138 } 139 } 140 141 static void cleanup(void *ctx) 142 { 143 struct aio_thread *at = ctx; 144 struct aio_queue *q = at->q; 145 struct aiocb *cb = at->cb; 146 struct sigevent sev = cb->aio_sigevent; 147 148 /* There are four potential types of waiters we could need to wake: 149 * 1. Callers of aio_cancel/close. 150 * 2. Callers of aio_suspend with a single aiocb. 151 * 3. Callers of aio_suspend with a list. 152 * 4. AIO worker threads waiting for sequenced operations. 153 * Types 1-3 are notified via atomics/futexes, mainly for AS-safety 154 * considerations. Type 4 is notified later via a cond var. */ 155 156 cb->__ret = at->ret; 157 if (a_swap(&at->running, 0) < 0) 158 __wake(&at->running, -1, 1); 159 if (a_swap(&cb->__err, at->err) != EINPROGRESS) 160 __wake(&cb->__err, -1, 1); 161 if (a_swap(&__aio_fut, 0)) 162 __wake(&__aio_fut, -1, 1); 163 164 pthread_mutex_lock(&q->lock); 165 166 if (at->next) at->next->prev = at->prev; 167 if (at->prev) at->prev->next = at->next; 168 else q->head = at->next; 169 170 /* Signal aio worker threads waiting for sequenced operations. */ 171 pthread_cond_broadcast(&q->cond); 172 173 __aio_unref_queue(q); 174 175 if (sev.sigev_notify == SIGEV_SIGNAL) { 176 siginfo_t si = { 177 .si_signo = sev.sigev_signo, 178 .si_value = sev.sigev_value, 179 .si_code = SI_ASYNCIO, 180 .si_pid = getpid(), 181 .si_uid = getuid() 182 }; 183 __syscall(SYS_rt_sigqueueinfo, si.si_pid, si.si_signo, &si); 184 } 185 if (sev.sigev_notify == SIGEV_THREAD) { 186 a_store(&__pthread_self()->cancel, 0); 187 sev.sigev_notify_function(sev.sigev_value); 188 } 189 } 190 191 static void *io_thread_func(void *ctx) 192 { 193 struct aio_thread at, *p; 194 195 struct aio_args *args = ctx; 196 struct aiocb *cb = args->cb; 197 int fd = cb->aio_fildes; 198 int op = args->op; 199 void *buf = (void *)cb->aio_buf; 200 size_t len = cb->aio_nbytes; 201 off_t off = cb->aio_offset; 202 203 struct aio_queue *q = args->q; 204 ssize_t ret; 205 206 pthread_mutex_lock(&q->lock); 207 sem_post(&args->sem); 208 209 at.op = op; 210 at.running = 1; 211 at.ret = -1; 212 at.err = ECANCELED; 213 at.q = q; 214 at.td = __pthread_self(); 215 at.cb = cb; 216 at.prev = 0; 217 if ((at.next = q->head)) at.next->prev = &at; 218 q->head = &at; 219 220 if (!q->init) { 221 int seekable = lseek(fd, 0, SEEK_CUR) >= 0; 222 q->seekable = seekable; 223 q->append = !seekable || (fcntl(fd, F_GETFL) & O_APPEND); 224 q->init = 1; 225 } 226 227 pthread_cleanup_push(cleanup, &at); 228 229 /* Wait for sequenced operations. */ 230 if (op!=LIO_READ && (op!=LIO_WRITE || q->append)) { 231 for (;;) { 232 for (p=at.next; p && p->op!=LIO_WRITE; p=p->next); 233 if (!p) break; 234 pthread_cond_wait(&q->cond, &q->lock); 235 } 236 } 237 238 pthread_mutex_unlock(&q->lock); 239 240 switch (op) { 241 case LIO_WRITE: 242 ret = q->append ? write(fd, buf, len) : pwrite(fd, buf, len, off); 243 break; 244 case LIO_READ: 245 ret = !q->seekable ? read(fd, buf, len) : pread(fd, buf, len, off); 246 break; 247 case O_SYNC: 248 ret = fsync(fd); 249 break; 250 case O_DSYNC: 251 ret = fdatasync(fd); 252 break; 253 } 254 at.ret = ret; 255 at.err = ret<0 ? errno : 0; 256 257 pthread_cleanup_pop(1); 258 259 return 0; 260 } 261 262 static size_t io_thread_stack_size = MINSIGSTKSZ+2048; 263 static pthread_once_t init_stack_size_once; 264 265 static void init_stack_size() 266 { 267 unsigned long val = __getauxval(AT_MINSIGSTKSZ); 268 if (val > MINSIGSTKSZ) io_thread_stack_size = val + 512; 269 } 270 271 static int submit(struct aiocb *cb, int op) 272 { 273 int ret = 0; 274 pthread_attr_t a; 275 sigset_t allmask, origmask; 276 pthread_t td; 277 struct aio_queue *q = __aio_get_queue(cb->aio_fildes, 1); 278 struct aio_args args = { .cb = cb, .op = op, .q = q }; 279 sem_init(&args.sem, 0, 0); 280 281 if (!q) { 282 if (errno != EBADF) errno = EAGAIN; 283 cb->__ret = -1; 284 cb->__err = errno; 285 return -1; 286 } 287 q->ref++; 288 pthread_mutex_unlock(&q->lock); 289 290 if (cb->aio_sigevent.sigev_notify == SIGEV_THREAD) { 291 if (cb->aio_sigevent.sigev_notify_attributes) 292 a = *cb->aio_sigevent.sigev_notify_attributes; 293 else 294 pthread_attr_init(&a); 295 } else { 296 pthread_once(&init_stack_size_once, init_stack_size); 297 pthread_attr_init(&a); 298 pthread_attr_setstacksize(&a, io_thread_stack_size); 299 pthread_attr_setguardsize(&a, 0); 300 } 301 pthread_attr_setdetachstate(&a, PTHREAD_CREATE_DETACHED); 302 sigfillset(&allmask); 303 pthread_sigmask(SIG_BLOCK, &allmask, &origmask); 304 cb->__err = EINPROGRESS; 305 if (pthread_create(&td, &a, io_thread_func, &args)) { 306 pthread_mutex_lock(&q->lock); 307 __aio_unref_queue(q); 308 cb->__err = errno = EAGAIN; 309 cb->__ret = ret = -1; 310 } 311 pthread_sigmask(SIG_SETMASK, &origmask, 0); 312 313 if (!ret) { 314 while (sem_wait(&args.sem)); 315 } 316 317 return ret; 318 } 319 320 int aio_read(struct aiocb *cb) 321 { 322 return submit(cb, LIO_READ); 323 } 324 325 int aio_write(struct aiocb *cb) 326 { 327 return submit(cb, LIO_WRITE); 328 } 329 330 int aio_fsync(int op, struct aiocb *cb) 331 { 332 if (op != O_SYNC && op != O_DSYNC) { 333 errno = EINVAL; 334 return -1; 335 } 336 return submit(cb, op); 337 } 338 339 ssize_t aio_return(struct aiocb *cb) 340 { 341 return cb->__ret; 342 } 343 344 int aio_error(const struct aiocb *cb) 345 { 346 a_barrier(); 347 return cb->__err & 0x7fffffff; 348 } 349 350 int aio_cancel(int fd, struct aiocb *cb) 351 { 352 sigset_t allmask, origmask; 353 int ret = AIO_ALLDONE; 354 struct aio_thread *p; 355 struct aio_queue *q; 356 357 /* Unspecified behavior case. Report an error. */ 358 if (cb && fd != cb->aio_fildes) { 359 errno = EINVAL; 360 return -1; 361 } 362 363 sigfillset(&allmask); 364 pthread_sigmask(SIG_BLOCK, &allmask, &origmask); 365 366 errno = ENOENT; 367 if (!(q = __aio_get_queue(fd, 0))) { 368 if (errno == EBADF) ret = -1; 369 goto done; 370 } 371 372 for (p = q->head; p; p = p->next) { 373 if (cb && cb != p->cb) continue; 374 /* Transition target from running to running-with-waiters */ 375 if (a_cas(&p->running, 1, -1)) { 376 pthread_cancel(p->td); 377 __wait(&p->running, 0, -1, 1); 378 if (p->err == ECANCELED) ret = AIO_CANCELED; 379 } 380 } 381 382 pthread_mutex_unlock(&q->lock); 383 done: 384 pthread_sigmask(SIG_SETMASK, &origmask, 0); 385 return ret; 386 } 387 388 int __aio_close(int fd) 389 { 390 a_barrier(); 391 if (aio_fd_cnt) aio_cancel(fd, 0); 392 return fd; 393 } 394 395 weak_alias(aio_cancel, aio_cancel64); 396 weak_alias(aio_error, aio_error64); 397 weak_alias(aio_fsync, aio_fsync64); 398 weak_alias(aio_read, aio_read64); 399 weak_alias(aio_write, aio_write64); 400 weak_alias(aio_return, aio_return64);