diff --git a/src/Nazara/Core/TaskScheduler.cpp b/src/Nazara/Core/TaskScheduler.cpp index 8beb7d012..2c25aebb2 100644 --- a/src/Nazara/Core/TaskScheduler.cpp +++ b/src/Nazara/Core/TaskScheduler.cpp @@ -11,6 +11,11 @@ #include #include #include + +#ifdef NAZARA_WITH_TSAN +#include +#endif + #include namespace Nz @@ -89,12 +94,13 @@ namespace Nz { // FIXME: We can't use pop() because push() and pop() are not thread-safe (and push is called on another thread), but steal() is // is it an issue? - std::optional task = m_tasks.steal(); + TaskScheduler::Task* task = m_tasks.steal(); if (!task) { for (unsigned int workerIndex : randomWorkerIndices) { - if (task = m_owner.GetWorker(workerIndex).StealTask()) + task = m_owner.GetWorker(workerIndex).StealTask(); + if (task) break; } } @@ -107,8 +113,12 @@ namespace Nz idle = false; } - NAZARA_ASSUME(*task != nullptr); - (**task)(); +#ifdef NAZARA_WITH_TSAN + // Workaround for TSan false-positive + __tsan_acquire(taskPtr); +#endif + + (*task)(); } else { @@ -126,7 +136,7 @@ namespace Nz while (m_running.load(std::memory_order_relaxed)); } - std::optional StealTask() + TaskScheduler::Task* StealTask() { return m_tasks.steal(); } @@ -149,7 +159,7 @@ namespace Nz std::atomic_bool m_running; std::atomic_flag m_notifier; std::thread m_thread; //< std::jthread is not yet widely implemented - WorkStealingQueue m_tasks; + WorkStealingQueue m_tasks; TaskScheduler& m_owner; unsigned int m_workerIndex; }; @@ -189,6 +199,11 @@ namespace Nz std::size_t taskIndex; //< not used Task* taskPtr = m_tasks.Allocate(taskIndex, std::move(task)); +#ifdef NAZARA_WITH_TSAN + // Workaround for TSan false-positive + __tsan_release(taskPtr); +#endif + Worker& worker = m_workers[m_nextWorkerIndex++]; worker.AddTask(taskPtr); diff --git a/thirdparty/include/wsq.hpp b/thirdparty/include/wsq.hpp index c73b9ffb9..312f51a52 100644 --- a/thirdparty/include/wsq.hpp +++ b/thirdparty/include/wsq.hpp @@ -1,5 +1,10 @@ #pragma once +// This file comes from https://github.com/taskflow/work-stealing-queue +// and has been modified by Nazara author (SirLynix): +// - _top and _bottom atomics are now aligned to the double of the cacheline size +// - the queue allows to override the value type returned by pop/steal + #include #include #include @@ -20,7 +25,7 @@ available at https://www.di.ens.fr/~zappa/readings/ppopp13.pdf. Only the queue owner can perform pop and push operations, while others can steal data from the queue. */ -template +template > class WorkStealingQueue { struct Array { @@ -64,11 +69,11 @@ class WorkStealingQueue { // avoids false sharing between _top and _bottom #ifdef __cpp_lib_hardware_interference_size - alignas(std::hardware_destructive_interference_size) std::atomic _top; - alignas(std::hardware_destructive_interference_size) std::atomic _bottom; + alignas(std::hardware_destructive_interference_size * 2) std::atomic _top; + alignas(std::hardware_destructive_interference_size * 2) std::atomic _bottom; #else - alignas(64) std::atomic _top; - alignas(64) std::atomic _bottom; + alignas(64 * 2) std::atomic _top; + alignas(64 * 2) std::atomic _bottom; #endif std::atomic _array; std::vector _garbage; @@ -120,22 +125,22 @@ class WorkStealingQueue { @brief pops out an item from the queue Only the owner thread can pop out an item from the queue. - The return can be a @std_nullopt if this operation failed (empty queue). + The return can be a default-constructed V if this operation failed (empty queue). */ - std::optional pop(); + V pop(); /** @brief steals an item from the queue Any threads can try to steal an item from the queue. - The return can be a @std_nullopt if this operation failed (not necessary empty). + The return can be a default-constructed V if this operation failed (not necessary empty). */ - std::optional steal(); + V steal(); }; // Constructor -template -WorkStealingQueue::WorkStealingQueue(int64_t c) { +template +WorkStealingQueue::WorkStealingQueue(int64_t c) { assert(c && (!(c & (c-1)))); _top.store(0, std::memory_order_relaxed); _bottom.store(0, std::memory_order_relaxed); @@ -144,8 +149,8 @@ WorkStealingQueue::WorkStealingQueue(int64_t c) { } // Destructor -template -WorkStealingQueue::~WorkStealingQueue() { +template +WorkStealingQueue::~WorkStealingQueue() { for(auto a : _garbage) { delete a; } @@ -153,25 +158,25 @@ WorkStealingQueue::~WorkStealingQueue() { } // Function: empty -template -bool WorkStealingQueue::empty() const noexcept { +template +bool WorkStealingQueue::empty() const noexcept { int64_t b = _bottom.load(std::memory_order_relaxed); int64_t t = _top.load(std::memory_order_relaxed); return b <= t; } // Function: size -template -size_t WorkStealingQueue::size() const noexcept { +template +size_t WorkStealingQueue::size() const noexcept { int64_t b = _bottom.load(std::memory_order_relaxed); int64_t t = _top.load(std::memory_order_relaxed); return static_cast(b >= t ? b - t : 0); } // Function: push -template +template template -void WorkStealingQueue::push(O&& o) { +void WorkStealingQueue::push(O&& o) { int64_t b = _bottom.load(std::memory_order_relaxed); int64_t t = _top.load(std::memory_order_acquire); Array* a = _array.load(std::memory_order_relaxed); @@ -190,15 +195,15 @@ void WorkStealingQueue::push(O&& o) { } // Function: pop -template -std::optional WorkStealingQueue::pop() { +template +V WorkStealingQueue::pop() { int64_t b = _bottom.load(std::memory_order_relaxed) - 1; Array* a = _array.load(std::memory_order_relaxed); _bottom.store(b, std::memory_order_relaxed); std::atomic_thread_fence(std::memory_order_seq_cst); int64_t t = _top.load(std::memory_order_relaxed); - std::optional item; + V item = {}; if(t <= b) { item = a->pop(b); @@ -207,7 +212,7 @@ std::optional WorkStealingQueue::pop() { if(!_top.compare_exchange_strong(t, t+1, std::memory_order_seq_cst, std::memory_order_relaxed)) { - item = std::nullopt; + item = V{}; } _bottom.store(b + 1, std::memory_order_relaxed); } @@ -220,13 +225,13 @@ std::optional WorkStealingQueue::pop() { } // Function: steal -template -std::optional WorkStealingQueue::steal() { +template +V WorkStealingQueue::steal() { int64_t t = _top.load(std::memory_order_acquire); std::atomic_thread_fence(std::memory_order_seq_cst); int64_t b = _bottom.load(std::memory_order_acquire); - std::optional item; + V item = {}; if(t < b) { Array* a = _array.load(std::memory_order_consume); @@ -234,7 +239,7 @@ std::optional WorkStealingQueue::steal() { if(!_top.compare_exchange_strong(t, t+1, std::memory_order_seq_cst, std::memory_order_relaxed)) { - return std::nullopt; + return V{}; } } @@ -242,8 +247,8 @@ std::optional WorkStealingQueue::steal() { } // Function: capacity -template -int64_t WorkStealingQueue::capacity() const noexcept { +template +int64_t WorkStealingQueue::capacity() const noexcept { return _array.load(std::memory_order_relaxed)->capacity(); } diff --git a/xmake.lua b/xmake.lua index 163ff1ba6..8fb514304 100644 --- a/xmake.lua +++ b/xmake.lua @@ -251,6 +251,7 @@ for opt, policy in table.orderpairs(sanitizers) do option(opt, { description = "Enable " .. opt, default = false }) if has_config(opt) then + add_defines("NAZARA_WITH_" .. opt:upper()) set_policy("build.sanitizer." .. policy, true) end end