Core/TaskScheduler: Rework using atomics and WorkStealingQueue
This commit is contained in:
parent
5db0c4ed09
commit
7f1ef0fe41
|
|
@ -8,6 +8,7 @@
|
|||
#define NAZARA_CORE_TASKSCHEDULER_HPP
|
||||
|
||||
#include <NazaraUtils/Prerequisites.hpp>
|
||||
#include <NazaraUtils/MemoryPool.hpp>
|
||||
#include <Nazara/Core/Config.hpp>
|
||||
#include <atomic>
|
||||
#include <functional>
|
||||
|
|
@ -27,7 +28,7 @@ namespace Nz
|
|||
|
||||
void AddTask(Task&& task);
|
||||
|
||||
unsigned int GetWorkerCount() const;
|
||||
inline unsigned int GetWorkerCount() const;
|
||||
|
||||
void WaitForTasks();
|
||||
|
||||
|
|
@ -46,6 +47,8 @@ namespace Nz
|
|||
std::atomic_uint m_idleWorkerCount;
|
||||
std::size_t m_nextWorkerIndex;
|
||||
std::vector<Worker> m_workers;
|
||||
MemoryPool<Task> m_tasks;
|
||||
unsigned int m_workerCount;
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -6,6 +6,10 @@
|
|||
|
||||
namespace Nz
|
||||
{
|
||||
inline unsigned int TaskScheduler::GetWorkerCount() const
|
||||
{
|
||||
return m_workerCount;
|
||||
}
|
||||
}
|
||||
|
||||
#include <Nazara/Core/DebugOff.hpp>
|
||||
|
|
|
|||
|
|
@ -6,10 +6,10 @@
|
|||
#include <Nazara/Core/Core.hpp>
|
||||
#include <Nazara/Core/ThreadExt.hpp>
|
||||
#include <NazaraUtils/StackArray.hpp>
|
||||
#include <condition_variable>
|
||||
#include <mutex>
|
||||
#include <wsq.hpp>
|
||||
#include <new>
|
||||
#include <random>
|
||||
#include <semaphore>
|
||||
#include <thread>
|
||||
#include <Nazara/Core/Debug.hpp>
|
||||
|
||||
|
|
@ -53,30 +53,28 @@ namespace Nz
|
|||
~Worker()
|
||||
{
|
||||
m_running = false;
|
||||
m_conditionVariable.notify_one();
|
||||
if (!m_notifier.test_and_set())
|
||||
m_notifier.notify_one();
|
||||
|
||||
m_thread.join();
|
||||
}
|
||||
|
||||
bool AddTask(Task&& task)
|
||||
void AddTask(TaskScheduler::Task* task)
|
||||
{
|
||||
std::unique_lock lock(m_mutex, std::defer_lock);
|
||||
if (!lock.try_lock())
|
||||
return false;
|
||||
|
||||
m_tasks.push_back(std::move(task));
|
||||
lock.unlock();
|
||||
|
||||
m_conditionVariable.notify_one();
|
||||
return true;
|
||||
m_tasks.push(task);
|
||||
if (!m_notifier.test_and_set())
|
||||
m_notifier.notify_one();
|
||||
}
|
||||
|
||||
void Run()
|
||||
{
|
||||
bool idle = true;
|
||||
m_notifier.wait(false); // wait until task scheduler finishes initializing
|
||||
|
||||
StackArray<unsigned int> randomWorkerIndices = NazaraStackArrayNoInit(unsigned int, m_owner.GetWorkerCount() - 1);
|
||||
{
|
||||
unsigned int* indexPtr = randomWorkerIndices.data();
|
||||
for (unsigned int i = 0; i < randomWorkerIndices.size(); ++i)
|
||||
for (unsigned int i = 0; i < m_owner.GetWorkerCount(); ++i)
|
||||
{
|
||||
if (i != m_workerIndex)
|
||||
*indexPtr++ = i;
|
||||
|
|
@ -86,27 +84,9 @@ namespace Nz
|
|||
std::shuffle(randomWorkerIndices.begin(), randomWorkerIndices.end(), gen);
|
||||
}
|
||||
|
||||
bool idle = true;
|
||||
for (;;)
|
||||
do
|
||||
{
|
||||
std::unique_lock lock(m_mutex);
|
||||
|
||||
// Wait for tasks if we don't have any right now
|
||||
if (m_tasks.empty())
|
||||
{
|
||||
if (!idle)
|
||||
{
|
||||
m_owner.NotifyWorkerIdle();
|
||||
idle = true;
|
||||
}
|
||||
|
||||
m_conditionVariable.wait(lock, [this] { return !m_running || !m_tasks.empty(); });
|
||||
}
|
||||
|
||||
if (!m_running)
|
||||
break;
|
||||
|
||||
auto ExecuteTask = [&](TaskScheduler::Task& task)
|
||||
auto ExecuteTask = [&](TaskScheduler::Task* task)
|
||||
{
|
||||
if (idle)
|
||||
{
|
||||
|
|
@ -114,50 +94,44 @@ namespace Nz
|
|||
idle = false;
|
||||
}
|
||||
|
||||
task();
|
||||
(*task)();
|
||||
};
|
||||
|
||||
if (!m_tasks.empty())
|
||||
{
|
||||
TaskScheduler::Task task = std::move(m_tasks.front());
|
||||
m_tasks.erase(m_tasks.begin());
|
||||
|
||||
lock.unlock();
|
||||
|
||||
ExecuteTask(task);
|
||||
}
|
||||
// Wait for tasks if we don't have any right now
|
||||
std::optional<TaskScheduler::Task*> task = m_tasks.pop();
|
||||
if (task)
|
||||
ExecuteTask(*task);
|
||||
else
|
||||
{
|
||||
lock.unlock();
|
||||
|
||||
// Try to steal a task from another worker in a random order to avoid lock contention
|
||||
TaskScheduler::Task task;
|
||||
// Try to steal a task from another worker in a random order to avoid contention
|
||||
for (unsigned int workerIndex : randomWorkerIndices)
|
||||
{
|
||||
if (m_owner.GetWorker(workerIndex).StealTask(&task))
|
||||
if (task = m_owner.GetWorker(workerIndex).StealTask())
|
||||
{
|
||||
ExecuteTask(task);
|
||||
ExecuteTask(*task);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Note: it's possible for a thread to reach this point without executing a task (for example if another worker stole its only remaining task)
|
||||
if (!task)
|
||||
{
|
||||
if (!idle)
|
||||
{
|
||||
m_owner.NotifyWorkerIdle();
|
||||
idle = true;
|
||||
}
|
||||
|
||||
m_notifier.wait(false);
|
||||
m_notifier.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
while (m_running);
|
||||
}
|
||||
|
||||
bool StealTask(TaskScheduler::Task* task)
|
||||
std::optional<TaskScheduler::Task*> StealTask()
|
||||
{
|
||||
std::unique_lock lock(m_mutex, std::defer_lock);
|
||||
if (!lock.try_lock())
|
||||
return false;
|
||||
|
||||
if (m_tasks.empty())
|
||||
return false;
|
||||
|
||||
*task = std::move(m_tasks.front());
|
||||
m_tasks.erase(m_tasks.begin());
|
||||
return true;
|
||||
return m_tasks.steal();
|
||||
}
|
||||
|
||||
Worker& operator=(const Worker& worker) = delete;
|
||||
|
|
@ -169,10 +143,9 @@ namespace Nz
|
|||
|
||||
private:
|
||||
std::atomic_bool m_running;
|
||||
std::condition_variable m_conditionVariable;
|
||||
std::mutex m_mutex;
|
||||
std::atomic_flag m_notifier;
|
||||
std::thread m_thread;
|
||||
std::vector<TaskScheduler::Task> m_tasks;
|
||||
WorkStealingQueue<TaskScheduler::Task*> m_tasks;
|
||||
TaskScheduler& m_owner;
|
||||
unsigned int m_workerIndex;
|
||||
};
|
||||
|
|
@ -181,15 +154,17 @@ namespace Nz
|
|||
|
||||
TaskScheduler::TaskScheduler(unsigned int workerCount) :
|
||||
m_idle(true),
|
||||
m_nextWorkerIndex(0)
|
||||
m_nextWorkerIndex(0),
|
||||
m_tasks(256 * sizeof(Task)),
|
||||
m_workerCount(workerCount)
|
||||
{
|
||||
if (workerCount == 0)
|
||||
workerCount = std::max(Core::Instance()->GetHardwareInfo().GetCpuThreadCount(), 1u);
|
||||
if (m_workerCount == 0)
|
||||
m_workerCount = std::max(Core::Instance()->GetHardwareInfo().GetCpuThreadCount(), 1u);
|
||||
|
||||
m_idleWorkerCount = workerCount;
|
||||
m_idleWorkerCount = m_workerCount;
|
||||
|
||||
m_workers.reserve(workerCount);
|
||||
for (unsigned int i = 0; i < workerCount; ++i)
|
||||
m_workers.reserve(m_workerCount);
|
||||
for (unsigned int i = 0; i < m_workerCount; ++i)
|
||||
m_workers.emplace_back(*this, i);
|
||||
}
|
||||
|
||||
|
|
@ -202,25 +177,19 @@ namespace Nz
|
|||
{
|
||||
m_idle = false;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
Worker& randomWorker = m_workers[m_nextWorkerIndex];
|
||||
if (randomWorker.AddTask(std::move(task)))
|
||||
break;
|
||||
std::size_t taskIndex; //< not used
|
||||
|
||||
if (++m_nextWorkerIndex >= m_workers.size())
|
||||
m_nextWorkerIndex = 0;
|
||||
}
|
||||
}
|
||||
Worker& worker = m_workers[m_nextWorkerIndex++];
|
||||
worker.AddTask(m_tasks.Allocate(taskIndex, std::move(task)));
|
||||
|
||||
unsigned int TaskScheduler::GetWorkerCount() const
|
||||
{
|
||||
return static_cast<unsigned int>(m_workers.size());
|
||||
if (m_nextWorkerIndex >= m_workers.size())
|
||||
m_nextWorkerIndex = 0;
|
||||
}
|
||||
|
||||
void TaskScheduler::WaitForTasks()
|
||||
{
|
||||
m_idle.wait(false);
|
||||
m_tasks.Clear();
|
||||
}
|
||||
|
||||
auto TaskScheduler::GetWorker(unsigned int workerIndex) -> Worker&
|
||||
|
|
|
|||
|
|
@ -0,0 +1,249 @@
|
|||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <vector>
|
||||
#include <optional>
|
||||
#include <cassert>
|
||||
#include <new>
|
||||
|
||||
/**
|
||||
@class: WorkStealingQueue
|
||||
|
||||
@tparam T data type
|
||||
|
||||
@brief Lock-free unbounded single-producer multiple-consumer queue.
|
||||
|
||||
This class implements the work stealing queue described in the paper,
|
||||
"Correct and Efficient Work-Stealing for Weak Memory Models,"
|
||||
available at https://www.di.ens.fr/~zappa/readings/ppopp13.pdf.
|
||||
|
||||
Only the queue owner can perform pop and push operations,
|
||||
while others can steal data from the queue.
|
||||
*/
|
||||
template <typename T>
|
||||
class WorkStealingQueue {
|
||||
|
||||
struct Array {
|
||||
|
||||
int64_t C;
|
||||
int64_t M;
|
||||
std::atomic<T>* S;
|
||||
|
||||
explicit Array(int64_t c) :
|
||||
C {c},
|
||||
M {c-1},
|
||||
S {new std::atomic<T>[static_cast<size_t>(C)]} {
|
||||
}
|
||||
|
||||
~Array() {
|
||||
delete [] S;
|
||||
}
|
||||
|
||||
int64_t capacity() const noexcept {
|
||||
return C;
|
||||
}
|
||||
|
||||
template <typename O>
|
||||
void push(int64_t i, O&& o) noexcept {
|
||||
S[i & M].store(std::forward<O>(o), std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
T pop(int64_t i) noexcept {
|
||||
return S[i & M].load(std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
Array* resize(int64_t b, int64_t t) {
|
||||
Array* ptr = new Array {2*C};
|
||||
for(int64_t i=t; i!=b; ++i) {
|
||||
ptr->push(i, pop(i));
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
// avoids false sharing between _top and _bottom
|
||||
#ifdef __cpp_lib_hardware_interference_size
|
||||
alignas(std::hardware_destructive_interference_size) std::atomic<int64_t> _top;
|
||||
alignas(std::hardware_destructive_interference_size) std::atomic<int64_t> _bottom;
|
||||
#else
|
||||
alignas(64) std::atomic<int64_t> _top;
|
||||
alignas(64) std::atomic<int64_t> _bottom;
|
||||
#endif
|
||||
std::atomic<Array*> _array;
|
||||
std::vector<Array*> _garbage;
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
@brief constructs the queue with a given capacity
|
||||
|
||||
@param capacity the capacity of the queue (must be power of 2)
|
||||
*/
|
||||
explicit WorkStealingQueue(int64_t capacity = 1024);
|
||||
|
||||
/**
|
||||
@brief destructs the queue
|
||||
*/
|
||||
~WorkStealingQueue();
|
||||
|
||||
/**
|
||||
@brief queries if the queue is empty at the time of this call
|
||||
*/
|
||||
bool empty() const noexcept;
|
||||
|
||||
/**
|
||||
@brief queries the number of items at the time of this call
|
||||
*/
|
||||
size_t size() const noexcept;
|
||||
|
||||
/**
|
||||
@brief queries the capacity of the queue
|
||||
*/
|
||||
int64_t capacity() const noexcept;
|
||||
|
||||
/**
|
||||
@brief inserts an item to the queue
|
||||
|
||||
Only the owner thread can insert an item to the queue.
|
||||
The operation can trigger the queue to resize its capacity
|
||||
if more space is required.
|
||||
|
||||
@tparam O data type
|
||||
|
||||
@param item the item to perfect-forward to the queue
|
||||
*/
|
||||
template <typename O>
|
||||
void push(O&& item);
|
||||
|
||||
/**
|
||||
@brief pops out an item from the queue
|
||||
|
||||
Only the owner thread can pop out an item from the queue.
|
||||
The return can be a @std_nullopt if this operation failed (empty queue).
|
||||
*/
|
||||
std::optional<T> pop();
|
||||
|
||||
/**
|
||||
@brief steals an item from the queue
|
||||
|
||||
Any threads can try to steal an item from the queue.
|
||||
The return can be a @std_nullopt if this operation failed (not necessary empty).
|
||||
*/
|
||||
std::optional<T> steal();
|
||||
};
|
||||
|
||||
// Constructor
|
||||
template <typename T>
|
||||
WorkStealingQueue<T>::WorkStealingQueue(int64_t c) {
|
||||
assert(c && (!(c & (c-1))));
|
||||
_top.store(0, std::memory_order_relaxed);
|
||||
_bottom.store(0, std::memory_order_relaxed);
|
||||
_array.store(new Array{c}, std::memory_order_relaxed);
|
||||
_garbage.reserve(32);
|
||||
}
|
||||
|
||||
// Destructor
|
||||
template <typename T>
|
||||
WorkStealingQueue<T>::~WorkStealingQueue() {
|
||||
for(auto a : _garbage) {
|
||||
delete a;
|
||||
}
|
||||
delete _array.load();
|
||||
}
|
||||
|
||||
// Function: empty
|
||||
template <typename T>
|
||||
bool WorkStealingQueue<T>::empty() const noexcept {
|
||||
int64_t b = _bottom.load(std::memory_order_relaxed);
|
||||
int64_t t = _top.load(std::memory_order_relaxed);
|
||||
return b <= t;
|
||||
}
|
||||
|
||||
// Function: size
|
||||
template <typename T>
|
||||
size_t WorkStealingQueue<T>::size() const noexcept {
|
||||
int64_t b = _bottom.load(std::memory_order_relaxed);
|
||||
int64_t t = _top.load(std::memory_order_relaxed);
|
||||
return static_cast<size_t>(b >= t ? b - t : 0);
|
||||
}
|
||||
|
||||
// Function: push
|
||||
template <typename T>
|
||||
template <typename O>
|
||||
void WorkStealingQueue<T>::push(O&& o) {
|
||||
int64_t b = _bottom.load(std::memory_order_relaxed);
|
||||
int64_t t = _top.load(std::memory_order_acquire);
|
||||
Array* a = _array.load(std::memory_order_relaxed);
|
||||
|
||||
// queue is full
|
||||
if(a->capacity() - 1 < (b - t)) {
|
||||
Array* tmp = a->resize(b, t);
|
||||
_garbage.push_back(a);
|
||||
std::swap(a, tmp);
|
||||
_array.store(a, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
a->push(b, std::forward<O>(o));
|
||||
std::atomic_thread_fence(std::memory_order_release);
|
||||
_bottom.store(b + 1, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
// Function: pop
|
||||
template <typename T>
|
||||
std::optional<T> WorkStealingQueue<T>::pop() {
|
||||
int64_t b = _bottom.load(std::memory_order_relaxed) - 1;
|
||||
Array* a = _array.load(std::memory_order_relaxed);
|
||||
_bottom.store(b, std::memory_order_relaxed);
|
||||
std::atomic_thread_fence(std::memory_order_seq_cst);
|
||||
int64_t t = _top.load(std::memory_order_relaxed);
|
||||
|
||||
std::optional<T> item;
|
||||
|
||||
if(t <= b) {
|
||||
item = a->pop(b);
|
||||
if(t == b) {
|
||||
// the last item just got stolen
|
||||
if(!_top.compare_exchange_strong(t, t+1,
|
||||
std::memory_order_seq_cst,
|
||||
std::memory_order_relaxed)) {
|
||||
item = std::nullopt;
|
||||
}
|
||||
_bottom.store(b + 1, std::memory_order_relaxed);
|
||||
}
|
||||
}
|
||||
else {
|
||||
_bottom.store(b + 1, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
return item;
|
||||
}
|
||||
|
||||
// Function: steal
|
||||
template <typename T>
|
||||
std::optional<T> WorkStealingQueue<T>::steal() {
|
||||
int64_t t = _top.load(std::memory_order_acquire);
|
||||
std::atomic_thread_fence(std::memory_order_seq_cst);
|
||||
int64_t b = _bottom.load(std::memory_order_acquire);
|
||||
|
||||
std::optional<T> item;
|
||||
|
||||
if(t < b) {
|
||||
Array* a = _array.load(std::memory_order_consume);
|
||||
item = a->pop(t);
|
||||
if(!_top.compare_exchange_strong(t, t+1,
|
||||
std::memory_order_seq_cst,
|
||||
std::memory_order_relaxed)) {
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
return item;
|
||||
}
|
||||
|
||||
// Function: capacity
|
||||
template <typename T>
|
||||
int64_t WorkStealingQueue<T>::capacity() const noexcept {
|
||||
return _array.load(std::memory_order_relaxed)->capacity();
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue