github.com/moontrade/nogc@v0.1.7/sync/RWSpinLock.h (about) 1 /* 2 * Copyright (c) Facebook, Inc. and its affiliates. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /* 18 * N.B. You most likely do _not_ want to use RWSpinLock or any other 19 * kind of spinlock. Use SharedMutex instead. 20 * 21 * In short, spinlocks in preemptive multi-tasking operating systems 22 * have serious problems and fast mutexes like SharedMutex are almost 23 * certainly the better choice, because letting the OS scheduler put a 24 * thread to sleep is better for system responsiveness and throughput 25 * than wasting a timeslice repeatedly querying a lock held by a 26 * thread that's blocked, and you can't prevent userspace 27 * programs blocking. 28 * 29 * Spinlocks in an operating system kernel make much more sense than 30 * they do in userspace. 31 * 32 * ------------------------------------------------------------------- 33 * 34 * Two Read-Write spin lock implementations. 35 * 36 * Ref: http://locklessinc.com/articles/locks 37 * 38 * Both locks here are faster than pthread_rwlock and have very low 39 * overhead (usually 20-30ns). They don't use any system mutexes and 40 * are very compact (4/8 bytes), so are suitable for per-instance 41 * based locking, particularly when contention is not expected. 42 * 43 * For a spinlock, RWSpinLock is a reasonable choice. (See the note 44 * about for why a spin lock is frequently a bad idea generally.) 45 * RWSpinLock has minimal overhead, and comparable contention 46 * performance when the number of competing threads is less than or 47 * equal to the number of logical CPUs. Even as the number of 48 * threads gets larger, RWSpinLock can still be very competitive in 49 * READ, although it is slower on WRITE, and also inherently unfair 50 * to writers. 51 * 52 * RWTicketSpinLock shows more balanced READ/WRITE performance. If 53 * your application really needs a lot more threads, and a 54 * higher-priority writer, prefer one of the RWTicketSpinLock locks. 55 * 56 * Caveats: 57 * 58 * RWTicketSpinLock locks can only be used with GCC on x86/x86-64 59 * based systems. 60 * 61 * RWTicketSpinLock<32> only allows up to 2^8 - 1 concurrent 62 * readers and writers. 63 * 64 * RWTicketSpinLock<64> only allows up to 2^16 - 1 concurrent 65 * readers and writers. 66 * 67 * RWTicketSpinLock<..., true> (kFavorWriter = true, that is, strict 68 * writer priority) is NOT reentrant, even for lock_shared(). 69 * 70 * The lock will not grant any new shared (read) accesses while a thread 71 * attempting to acquire the lock in write mode is blocked. (That is, 72 * if the lock is held in shared mode by N threads, and a thread attempts 73 * to acquire it in write mode, no one else can acquire it in shared mode 74 * until these N threads release the lock and then the blocked thread 75 * acquires and releases the exclusive lock.) This also applies for 76 * attempts to reacquire the lock in shared mode by threads that already 77 * hold it in shared mode, making the lock non-reentrant. 78 * 79 * RWSpinLock handles 2^30 - 1 concurrent readers. 80 * 81 * @author Xin Liu <xliux@fb.com> 82 */ 83 84 #pragma once 85 86 /* 87 ======================================================================== 88 Benchmark on (Intel(R) Xeon(R) CPU L5630 @ 2.13GHz) 8 cores(16 HTs) 89 ======================================================================== 90 91 ------------------------------------------------------------------------------ 92 1. Single thread benchmark (read/write lock + unlock overhead) 93 Benchmark Iters Total t t/iter iter/sec 94 ------------------------------------------------------------------------------- 95 * BM_RWSpinLockRead 100000 1.786 ms 17.86 ns 53.4M 96 +30.5% BM_RWSpinLockWrite 100000 2.331 ms 23.31 ns 40.91M 97 +85.7% BM_RWTicketSpinLock32Read 100000 3.317 ms 33.17 ns 28.75M 98 +96.0% BM_RWTicketSpinLock32Write 100000 3.5 ms 35 ns 27.25M 99 +85.6% BM_RWTicketSpinLock64Read 100000 3.315 ms 33.15 ns 28.77M 100 +96.0% BM_RWTicketSpinLock64Write 100000 3.5 ms 35 ns 27.25M 101 +85.7% BM_RWTicketSpinLock32FavorWriterRead 100000 3.317 ms 33.17 ns 28.75M 102 +29.7% BM_RWTicketSpinLock32FavorWriterWrite 100000 2.316 ms 23.16 ns 41.18M 103 +85.3% BM_RWTicketSpinLock64FavorWriterRead 100000 3.309 ms 33.09 ns 28.82M 104 +30.2% BM_RWTicketSpinLock64FavorWriterWrite 100000 2.325 ms 23.25 ns 41.02M 105 + 175% BM_PThreadRWMutexRead 100000 4.917 ms 49.17 ns 19.4M 106 + 166% BM_PThreadRWMutexWrite 100000 4.757 ms 47.57 ns 20.05M 107 108 ------------------------------------------------------------------------------ 109 2. Contention Benchmark 90% read 10% write 110 Benchmark hits average min max sigma 111 ------------------------------------------------------------------------------ 112 ---------- 8 threads ------------ 113 RWSpinLock Write 142666 220ns 78ns 40.8us 269ns 114 RWSpinLock Read 1282297 222ns 80ns 37.7us 248ns 115 RWTicketSpinLock Write 85692 209ns 71ns 17.9us 252ns 116 RWTicketSpinLock Read 769571 215ns 78ns 33.4us 251ns 117 pthread_rwlock_t Write 84248 2.48us 99ns 269us 8.19us 118 pthread_rwlock_t Read 761646 933ns 101ns 374us 3.25us 119 120 ---------- 16 threads ------------ 121 RWSpinLock Write 124236 237ns 78ns 261us 801ns 122 RWSpinLock Read 1115807 236ns 78ns 2.27ms 2.17us 123 RWTicketSpinLock Write 81781 231ns 71ns 31.4us 351ns 124 RWTicketSpinLock Read 734518 238ns 78ns 73.6us 379ns 125 pthread_rwlock_t Write 83363 7.12us 99ns 785us 28.1us 126 pthread_rwlock_t Read 754978 2.18us 101ns 1.02ms 14.3us 127 128 ---------- 50 threads ------------ 129 RWSpinLock Write 131142 1.37us 82ns 7.53ms 68.2us 130 RWSpinLock Read 1181240 262ns 78ns 6.62ms 12.7us 131 RWTicketSpinLock Write 83045 397ns 73ns 7.01ms 31.5us 132 RWTicketSpinLock Read 744133 386ns 78ns 11ms 31.4us 133 pthread_rwlock_t Write 80849 112us 103ns 4.52ms 263us 134 pthread_rwlock_t Read 728698 24us 101ns 7.28ms 194us 135 136 */ 137 138 #include "Portability.h" 139 #include "Asm.h" 140 141 #if defined(__GNUC__) && (defined(__i386) || FOLLY_X64 || defined(ARCH_K8)) 142 #define RW_SPINLOCK_USE_X86_INTRINSIC_ 143 #include <x86intrin.h> 144 #elif defined(_MSC_VER) && defined(FOLLY_X64) 145 #define RW_SPINLOCK_USE_X86_INTRINSIC_ 146 #elif FOLLY_AARCH64 147 #define RW_SPINLOCK_USE_X86_INTRINSIC_ 148 #else 149 #undef RW_SPINLOCK_USE_X86_INTRINSIC_ 150 #endif 151 152 // iOS doesn't define _mm_cvtsi64_si128 and friends 153 #if (FOLLY_SSE >= 2) && !FOLLY_MOBILE && FOLLY_X64 154 #define RW_SPINLOCK_USE_SSE_INSTRUCTIONS_ 155 #else 156 #undef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_ 157 #endif 158 159 #include <algorithm> 160 #include <atomic> 161 #include <thread> 162 163 #include "Likely.h" 164 165 166 /* 167 * A simple, small (4-bytes), but unfair rwlock. Use it when you want 168 * a nice writer and don't expect a lot of write/read contention, or 169 * when you need small rwlocks since you are creating a large number 170 * of them. 171 * 172 * Note that the unfairness here is extreme: if the lock is 173 * continually accessed for read, writers will never get a chance. If 174 * the lock can be that highly contended this class is probably not an 175 * ideal choice anyway. 176 * 177 * It currently implements most of the Lockable, SharedLockable and 178 * UpgradeLockable concepts except the TimedLockable related locking/unlocking 179 * interfaces. 180 */ 181 class RWSpinLock { 182 enum : int32_t { READER = 4, UPGRADED = 2, WRITER = 1 }; 183 184 public: 185 constexpr RWSpinLock() : bits_(0) {} 186 187 RWSpinLock(RWSpinLock const&) = delete; 188 RWSpinLock& operator=(RWSpinLock const&) = delete; 189 190 // Lockable Concept 191 void lock() { 192 uint_fast32_t count = 0; 193 while (!LIKELY(try_lock())) { 194 if (++count > 1000) { 195 std::this_thread::yield(); 196 } 197 } 198 } 199 200 // Writer is responsible for clearing up both the UPGRADED and WRITER bits. 201 void unlock() { 202 static_assert(READER > WRITER + UPGRADED, "wrong bits!"); 203 bits_.fetch_and(~(WRITER | UPGRADED), std::memory_order_release); 204 } 205 206 // SharedLockable Concept 207 void lock_shared() { 208 uint_fast32_t count = 0; 209 while (!LIKELY(try_lock_shared())) { 210 if (++count > 1000) { 211 std::this_thread::yield(); 212 } 213 } 214 } 215 216 void unlock_shared() { bits_.fetch_add(-READER, std::memory_order_release); } 217 218 // Downgrade the lock from writer status to reader status. 219 void unlock_and_lock_shared() { 220 bits_.fetch_add(READER, std::memory_order_acquire); 221 unlock(); 222 } 223 224 // UpgradeLockable Concept 225 void lock_upgrade() { 226 uint_fast32_t count = 0; 227 while (!try_lock_upgrade()) { 228 if (++count > 1000) { 229 std::this_thread::yield(); 230 } 231 } 232 } 233 234 void unlock_upgrade() { 235 bits_.fetch_add(-UPGRADED, std::memory_order_acq_rel); 236 } 237 238 // unlock upgrade and try to acquire write lock 239 void unlock_upgrade_and_lock() { 240 int64_t count = 0; 241 while (!try_unlock_upgrade_and_lock()) { 242 if (++count > 1000) { 243 std::this_thread::yield(); 244 } 245 } 246 } 247 248 // unlock upgrade and read lock atomically 249 void unlock_upgrade_and_lock_shared() { 250 bits_.fetch_add(READER - UPGRADED, std::memory_order_acq_rel); 251 } 252 253 // write unlock and upgrade lock atomically 254 void unlock_and_lock_upgrade() { 255 // need to do it in two steps here -- as the UPGRADED bit might be OR-ed at 256 // the same time when other threads are trying do try_lock_upgrade(). 257 bits_.fetch_or(UPGRADED, std::memory_order_acquire); 258 bits_.fetch_add(-WRITER, std::memory_order_release); 259 } 260 261 // Attempt to acquire writer permission. Return false if we didn't get it. 262 bool try_lock() { 263 int32_t expect = 0; 264 return bits_.compare_exchange_strong( 265 expect, WRITER, std::memory_order_acq_rel); 266 } 267 268 // Try to get reader permission on the lock. This can fail if we 269 // find out someone is a writer or upgrader. 270 // Setting the UPGRADED bit would allow a writer-to-be to indicate 271 // its intention to write and block any new readers while waiting 272 // for existing readers to finish and release their read locks. This 273 // helps avoid starving writers (promoted from upgraders). 274 bool try_lock_shared() { 275 // fetch_add is considerably (100%) faster than compare_exchange, 276 // so here we are optimizing for the common (lock success) case. 277 int32_t value = bits_.fetch_add(READER, std::memory_order_acquire); 278 if (UNLIKELY(value & (WRITER | UPGRADED))) { 279 bits_.fetch_add(-READER, std::memory_order_release); 280 return false; 281 } 282 return true; 283 } 284 285 // try to unlock upgrade and write lock atomically 286 bool try_unlock_upgrade_and_lock() { 287 int32_t expect = UPGRADED; 288 return bits_.compare_exchange_strong( 289 expect, WRITER, std::memory_order_acq_rel); 290 } 291 292 // try to acquire an upgradable lock. 293 bool try_lock_upgrade() { 294 int32_t value = bits_.fetch_or(UPGRADED, std::memory_order_acquire); 295 296 // Note: when failed, we cannot flip the UPGRADED bit back, 297 // as in this case there is either another upgrade lock or a write lock. 298 // If it's a write lock, the bit will get cleared up when that lock's done 299 // with unlock(). 300 return ((value & (UPGRADED | WRITER)) == 0); 301 } 302 303 // mainly for debugging purposes. 304 int32_t bits() const { return bits_.load(std::memory_order_acquire); } 305 306 class FOLLY_NODISCARD ReadHolder; 307 class FOLLY_NODISCARD UpgradedHolder; 308 class FOLLY_NODISCARD WriteHolder; 309 310 class FOLLY_NODISCARD ReadHolder { 311 public: 312 explicit ReadHolder(RWSpinLock* lock) : lock_(lock) { 313 if (lock_) { 314 lock_->lock_shared(); 315 } 316 } 317 318 explicit ReadHolder(RWSpinLock& lock) : lock_(&lock) { 319 lock_->lock_shared(); 320 } 321 322 ReadHolder(ReadHolder&& other) noexcept : lock_(other.lock_) { 323 other.lock_ = nullptr; 324 } 325 326 // down-grade 327 explicit ReadHolder(UpgradedHolder&& upgraded) : lock_(upgraded.lock_) { 328 upgraded.lock_ = nullptr; 329 if (lock_) { 330 lock_->unlock_upgrade_and_lock_shared(); 331 } 332 } 333 334 explicit ReadHolder(WriteHolder&& writer) : lock_(writer.lock_) { 335 writer.lock_ = nullptr; 336 if (lock_) { 337 lock_->unlock_and_lock_shared(); 338 } 339 } 340 341 ReadHolder& operator=(ReadHolder&& other) { 342 using std::swap; 343 swap(lock_, other.lock_); 344 return *this; 345 } 346 347 ReadHolder(const ReadHolder& other) = delete; 348 ReadHolder& operator=(const ReadHolder& other) = delete; 349 350 ~ReadHolder() { 351 if (lock_) { 352 lock_->unlock_shared(); 353 } 354 } 355 356 void reset(RWSpinLock* lock = nullptr) { 357 if (lock == lock_) { 358 return; 359 } 360 if (lock_) { 361 lock_->unlock_shared(); 362 } 363 lock_ = lock; 364 if (lock_) { 365 lock_->lock_shared(); 366 } 367 } 368 369 void swap(ReadHolder* other) { std::swap(lock_, other->lock_); } 370 371 private: 372 friend class UpgradedHolder; 373 friend class WriteHolder; 374 RWSpinLock* lock_; 375 }; 376 377 class FOLLY_NODISCARD UpgradedHolder { 378 public: 379 explicit UpgradedHolder(RWSpinLock* lock) : lock_(lock) { 380 if (lock_) { 381 lock_->lock_upgrade(); 382 } 383 } 384 385 explicit UpgradedHolder(RWSpinLock& lock) : lock_(&lock) { 386 lock_->lock_upgrade(); 387 } 388 389 explicit UpgradedHolder(WriteHolder&& writer) { 390 lock_ = writer.lock_; 391 writer.lock_ = nullptr; 392 if (lock_) { 393 lock_->unlock_and_lock_upgrade(); 394 } 395 } 396 397 UpgradedHolder(UpgradedHolder&& other) noexcept : lock_(other.lock_) { 398 other.lock_ = nullptr; 399 } 400 401 UpgradedHolder& operator=(UpgradedHolder&& other) { 402 using std::swap; 403 swap(lock_, other.lock_); 404 return *this; 405 } 406 407 UpgradedHolder(const UpgradedHolder& other) = delete; 408 UpgradedHolder& operator=(const UpgradedHolder& other) = delete; 409 410 ~UpgradedHolder() { 411 if (lock_) { 412 lock_->unlock_upgrade(); 413 } 414 } 415 416 void reset(RWSpinLock* lock = nullptr) { 417 if (lock == lock_) { 418 return; 419 } 420 if (lock_) { 421 lock_->unlock_upgrade(); 422 } 423 lock_ = lock; 424 if (lock_) { 425 lock_->lock_upgrade(); 426 } 427 } 428 429 void swap(UpgradedHolder* other) { 430 using std::swap; 431 swap(lock_, other->lock_); 432 } 433 434 private: 435 friend class WriteHolder; 436 friend class ReadHolder; 437 RWSpinLock* lock_; 438 }; 439 440 class FOLLY_NODISCARD WriteHolder { 441 public: 442 explicit WriteHolder(RWSpinLock* lock) : lock_(lock) { 443 if (lock_) { 444 lock_->lock(); 445 } 446 } 447 448 explicit WriteHolder(RWSpinLock& lock) : lock_(&lock) { lock_->lock(); } 449 450 // promoted from an upgrade lock holder 451 explicit WriteHolder(UpgradedHolder&& upgraded) { 452 lock_ = upgraded.lock_; 453 upgraded.lock_ = nullptr; 454 if (lock_) { 455 lock_->unlock_upgrade_and_lock(); 456 } 457 } 458 459 WriteHolder(WriteHolder&& other) noexcept : lock_(other.lock_) { 460 other.lock_ = nullptr; 461 } 462 463 WriteHolder& operator=(WriteHolder&& other) { 464 using std::swap; 465 swap(lock_, other.lock_); 466 return *this; 467 } 468 469 WriteHolder(const WriteHolder& other) = delete; 470 WriteHolder& operator=(const WriteHolder& other) = delete; 471 472 ~WriteHolder() { 473 if (lock_) { 474 lock_->unlock(); 475 } 476 } 477 478 void reset(RWSpinLock* lock = nullptr) { 479 if (lock == lock_) { 480 return; 481 } 482 if (lock_) { 483 lock_->unlock(); 484 } 485 lock_ = lock; 486 if (lock_) { 487 lock_->lock(); 488 } 489 } 490 491 void swap(WriteHolder* other) { 492 using std::swap; 493 swap(lock_, other->lock_); 494 } 495 496 private: 497 friend class ReadHolder; 498 friend class UpgradedHolder; 499 RWSpinLock* lock_; 500 }; 501 502 private: 503 std::atomic<int32_t> bits_; 504 }; 505 506 #ifdef RW_SPINLOCK_USE_X86_INTRINSIC_ 507 // A more balanced Read-Write spin lock implemented based on GCC intrinsics. 508 509 namespace detail { 510 template <size_t kBitWidth> 511 struct RWTicketIntTrait { 512 static_assert( 513 kBitWidth == 32 || kBitWidth == 64, 514 "bit width has to be either 32 or 64 "); 515 }; 516 517 template <> 518 struct RWTicketIntTrait<64> { 519 typedef uint64_t FullInt; 520 typedef uint32_t HalfInt; 521 typedef uint16_t QuarterInt; 522 523 #ifdef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_ 524 static __m128i make128(const uint16_t v[4]) { 525 return _mm_set_epi16( 526 0, 0, 0, 0, short(v[3]), short(v[2]), short(v[1]), short(v[0])); 527 } 528 static inline __m128i fromInteger(uint64_t from) { 529 return _mm_cvtsi64_si128(int64_t(from)); 530 } 531 static inline uint64_t toInteger(__m128i in) { 532 return uint64_t(_mm_cvtsi128_si64(in)); 533 } 534 static inline uint64_t addParallel(__m128i in, __m128i kDelta) { 535 return toInteger(_mm_add_epi16(in, kDelta)); 536 } 537 #endif 538 }; 539 540 template <> 541 struct RWTicketIntTrait<32> { 542 typedef uint32_t FullInt; 543 typedef uint16_t HalfInt; 544 typedef uint8_t QuarterInt; 545 546 #ifdef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_ 547 static __m128i make128(const uint8_t v[4]) { 548 // clang-format off 549 return _mm_set_epi8( 550 0, 0, 0, 0, 551 0, 0, 0, 0, 552 0, 0, 0, 0, 553 char(v[3]), char(v[2]), char(v[1]), char(v[0])); 554 // clang-format on 555 } 556 static inline __m128i fromInteger(uint32_t from) { 557 return _mm_cvtsi32_si128(int32_t(from)); 558 } 559 static inline uint32_t toInteger(__m128i in) { 560 return uint32_t(_mm_cvtsi128_si32(in)); 561 } 562 static inline uint32_t addParallel(__m128i in, __m128i kDelta) { 563 return toInteger(_mm_add_epi8(in, kDelta)); 564 } 565 #endif 566 }; 567 } // namespace detail 568 569 template <size_t kBitWidth, bool kFavorWriter = true> 570 class RWTicketSpinLockT { 571 typedef detail::RWTicketIntTrait<kBitWidth> IntTraitType; 572 typedef typename detail::RWTicketIntTrait<kBitWidth>::FullInt FullInt; 573 typedef typename detail::RWTicketIntTrait<kBitWidth>::HalfInt HalfInt; 574 typedef typename detail::RWTicketIntTrait<kBitWidth>::QuarterInt QuarterInt; 575 576 union RWTicket { 577 constexpr RWTicket() : whole(0) {} 578 FullInt whole; 579 HalfInt readWrite; 580 __extension__ struct { 581 QuarterInt write; 582 QuarterInt read; 583 QuarterInt users; 584 }; 585 } ticket; 586 587 private: // Some x64-specific utilities for atomic access to ticket. 588 template <class T> 589 static T load_acquire(T* addr) { 590 T t = *addr; // acquire barrier 591 asm_volatile_memory(); 592 return t; 593 } 594 595 template <class T> 596 static void store_release(T* addr, T v) { 597 asm_volatile_memory(); 598 *addr = v; // release barrier 599 } 600 601 public: 602 constexpr RWTicketSpinLockT() {} 603 604 RWTicketSpinLockT(RWTicketSpinLockT const&) = delete; 605 RWTicketSpinLockT& operator=(RWTicketSpinLockT const&) = delete; 606 607 void lock() { 608 if (kFavorWriter) { 609 writeLockAggressive(); 610 } else { 611 writeLockNice(); 612 } 613 } 614 615 /* 616 * Both try_lock and try_lock_shared diverge in our implementation from the 617 * lock algorithm described in the link above. 618 * 619 * In the read case, it is undesirable that the readers could wait 620 * for another reader (before increasing ticket.read in the other 621 * implementation). Our approach gives up on 622 * first-come-first-serve, but our benchmarks showed improve 623 * performance for both readers and writers under heavily contended 624 * cases, particularly when the number of threads exceeds the number 625 * of logical CPUs. 626 * 627 * We have writeLockAggressive() using the original implementation 628 * for a writer, which gives some advantage to the writer over the 629 * readers---for that path it is guaranteed that the writer will 630 * acquire the lock after all the existing readers exit. 631 */ 632 bool try_lock() { 633 RWTicket t; 634 FullInt old = t.whole = load_acquire(&ticket.whole); 635 if (t.users != t.write) { 636 return false; 637 } 638 ++t.users; 639 return __sync_bool_compare_and_swap(&ticket.whole, old, t.whole); 640 } 641 642 /* 643 * Call this if you want to prioritize writer to avoid starvation. 644 * Unlike writeLockNice, immediately acquires the write lock when 645 * the existing readers (arriving before the writer) finish their 646 * turns. 647 */ 648 void writeLockAggressive() { 649 // std::this_thread::yield() is needed here to avoid a pathology if the 650 // number of threads attempting concurrent writes is >= the number of real 651 // cores allocated to this process. This is less likely than the 652 // corresponding situation in lock_shared(), but we still want to 653 // avoid it 654 uint_fast32_t count = 0; 655 QuarterInt val = __sync_fetch_and_add(&ticket.users, 1); 656 while (val != load_acquire(&ticket.write)) { 657 asm_volatile_pause(); 658 if (UNLIKELY(++count > 1000)) { 659 std::this_thread::yield(); 660 } 661 } 662 } 663 664 // Call this when the writer should be nicer to the readers. 665 void writeLockNice() { 666 // Here it doesn't cpu-relax the writer. 667 // 668 // This is because usually we have many more readers than the 669 // writers, so the writer has less chance to get the lock when 670 // there are a lot of competing readers. The aggressive spinning 671 // can help to avoid starving writers. 672 // 673 // We don't worry about std::this_thread::yield() here because the caller 674 // has already explicitly abandoned fairness. 675 while (!try_lock()) { 676 } 677 } 678 679 // Atomically unlock the write-lock from writer and acquire the read-lock. 680 void unlock_and_lock_shared() { 681 QuarterInt val = __sync_fetch_and_add(&ticket.read, 1); 682 } 683 684 // Release writer permission on the lock. 685 void unlock() { 686 RWTicket t; 687 t.whole = load_acquire(&ticket.whole); 688 689 #ifdef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_ 690 FullInt old = t.whole; 691 // SSE2 can reduce the lock and unlock overhead by 10% 692 static const QuarterInt kDeltaBuf[4] = {1, 1, 0, 0}; // write/read/user 693 static const __m128i kDelta = IntTraitType::make128(kDeltaBuf); 694 __m128i m = IntTraitType::fromInteger(old); 695 t.whole = IntTraitType::addParallel(m, kDelta); 696 #else 697 ++t.read; 698 ++t.write; 699 #endif 700 store_release(&ticket.readWrite, t.readWrite); 701 } 702 703 void lock_shared() { 704 // std::this_thread::yield() is important here because we can't grab the 705 // shared lock if there is a pending writeLockAggressive, so we 706 // need to let threads that already have a shared lock complete 707 uint_fast32_t count = 0; 708 while (!LIKELY(try_lock_shared())) { 709 asm_volatile_pause(); 710 if (UNLIKELY((++count & 1023) == 0)) { 711 std::this_thread::yield(); 712 } 713 } 714 } 715 716 bool try_lock_shared() { 717 RWTicket t, old; 718 old.whole = t.whole = load_acquire(&ticket.whole); 719 old.users = old.read; 720 #ifdef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_ 721 // SSE2 may reduce the total lock and unlock overhead by 10% 722 static const QuarterInt kDeltaBuf[4] = {0, 1, 1, 0}; // write/read/user 723 static const __m128i kDelta = IntTraitType::make128(kDeltaBuf); 724 __m128i m = IntTraitType::fromInteger(old.whole); 725 t.whole = IntTraitType::addParallel(m, kDelta); 726 #else 727 ++t.read; 728 ++t.users; 729 #endif 730 return __sync_bool_compare_and_swap(&ticket.whole, old.whole, t.whole); 731 } 732 733 void unlock_shared() { __sync_fetch_and_add(&ticket.write, 1); } 734 735 class FOLLY_NODISCARD WriteHolder; 736 737 typedef RWTicketSpinLockT<kBitWidth, kFavorWriter> RWSpinLock; 738 class FOLLY_NODISCARD ReadHolder { 739 public: 740 ReadHolder(ReadHolder const&) = delete; 741 ReadHolder& operator=(ReadHolder const&) = delete; 742 743 explicit ReadHolder(RWSpinLock* lock) : lock_(lock) { 744 if (lock_) { 745 lock_->lock_shared(); 746 } 747 } 748 749 explicit ReadHolder(RWSpinLock& lock) : lock_(&lock) { 750 if (lock_) { 751 lock_->lock_shared(); 752 } 753 } 754 755 // atomically unlock the write-lock from writer and acquire the read-lock 756 explicit ReadHolder(WriteHolder* writer) : lock_(nullptr) { 757 std::swap(this->lock_, writer->lock_); 758 if (lock_) { 759 lock_->unlock_and_lock_shared(); 760 } 761 } 762 763 ~ReadHolder() { 764 if (lock_) { 765 lock_->unlock_shared(); 766 } 767 } 768 769 void reset(RWSpinLock* lock = nullptr) { 770 if (lock_) { 771 lock_->unlock_shared(); 772 } 773 lock_ = lock; 774 if (lock_) { 775 lock_->lock_shared(); 776 } 777 } 778 779 void swap(ReadHolder* other) { std::swap(this->lock_, other->lock_); } 780 781 private: 782 RWSpinLock* lock_; 783 }; 784 785 class FOLLY_NODISCARD WriteHolder { 786 public: 787 WriteHolder(WriteHolder const&) = delete; 788 WriteHolder& operator=(WriteHolder const&) = delete; 789 790 explicit WriteHolder(RWSpinLock* lock) : lock_(lock) { 791 if (lock_) { 792 lock_->lock(); 793 } 794 } 795 explicit WriteHolder(RWSpinLock& lock) : lock_(&lock) { 796 if (lock_) { 797 lock_->lock(); 798 } 799 } 800 801 ~WriteHolder() { 802 if (lock_) { 803 lock_->unlock(); 804 } 805 } 806 807 void reset(RWSpinLock* lock = nullptr) { 808 if (lock == lock_) { 809 return; 810 } 811 if (lock_) { 812 lock_->unlock(); 813 } 814 lock_ = lock; 815 if (lock_) { 816 lock_->lock(); 817 } 818 } 819 820 void swap(WriteHolder* other) { std::swap(this->lock_, other->lock_); } 821 822 private: 823 friend class ReadHolder; 824 RWSpinLock* lock_; 825 }; 826 }; 827 828 typedef RWTicketSpinLockT<32> RWTicketSpinLock32; 829 typedef RWTicketSpinLockT<64> RWTicketSpinLock64; 830 831 #endif // RW_SPINLOCK_USE_X86_INTRINSIC_ 832 833 834 #ifdef RW_SPINLOCK_USE_X86_INTRINSIC_ 835 #undef RW_SPINLOCK_USE_X86_INTRINSIC_ 836 #endif