Remove ThreadWorkPool, replace by WorkerThreadPool

The former needs to be allocated once per usage. The later is shared for all threads, which is more efficient. It can also be better debugged.
2024-07-20 18:10:40 +00:00 · 2022-07-23 19:12:41 +02:00 · 2022-07-23 19:12:41 +02:00 · c7255388e1
parent 3bd74cd67b
commit c7255388e1
31 changed files with 248 additions and 515 deletions
--- a/core/object/worker_thread_pool.cpp
+++ b/core/object/worker_thread_pool.cpp
@ -32,6 +32,13 @@

 #include "core/os/os.h"

+void WorkerThreadPool::Task::free_template_userdata() {
+	ERR_FAIL_COND(!template_userdata);
+	ERR_FAIL_COND(native_func_userdata == nullptr);
+	BaseTemplateUserdata *btu = (BaseTemplateUserdata *)native_func_userdata;
+	memdelete(btu);
+}
+
 WorkerThreadPool *WorkerThreadPool::singleton = nullptr;

 void WorkerThreadPool::_process_task_queue() {
@ -48,30 +55,36 @@ void WorkerThreadPool::_process_task(Task *p_task) {
 	if (p_task->group) {
 		// Handling a group
 		bool do_post = false;
-		if (p_task->native_group_func) {
-			while (true) {
-				uint32_t work_index = p_task->group->index.postincrement();
-				if (work_index >= p_task->group->max) {
-					do_post = work_index == p_task->group->max; // First one reaching max handles semaphore and clean-up.
-					break;
-				}
-				p_task->native_group_func(p_task->native_func_userdata, work_index);
-			}
+		Callable::CallError ce;
+		Variant ret;
+		Variant arg;
+		Variant *argptr = &arg;

-		} else {
-			Callable::CallError ce;
-			Variant ret;
-			Variant arg;
-			Variant *argptr = &arg;
-			while (true) {
-				uint32_t work_index = p_task->group->index.postincrement();
-				if (work_index >= p_task->group->max) {
-					do_post = work_index == p_task->group->max; // First one reaching max handles semaphore and clean-up.
-					break;
-				}
+		while (true) {
+			uint32_t work_index = p_task->group->index.postincrement();
+
+			if (work_index >= p_task->group->max) {
+				break;
+			}
+			if (p_task->native_group_func) {
+				p_task->native_group_func(p_task->native_func_userdata, work_index);
+			} else if (p_task->template_userdata) {
+				p_task->template_userdata->callback_indexed(work_index);
+			} else {
 				arg = work_index;
 				p_task->callable.call((const Variant **)&argptr, 1, ret, ce);
 			}
+
+			// This is the only way to ensure posting is done when all tasks are really complete.
+			uint32_t completed_amount = p_task->group->completed_index.increment();
+
+			if (completed_amount == p_task->group->max) {
+				do_post = true;
+			}
+		}
+
+		if (do_post && p_task->template_userdata) {
+			memdelete(p_task->template_userdata); // This is no longer needed at this point, so get rid of it.
 		}

 		if (low_priority && use_native_low_priority_threads) {
@ -104,6 +117,9 @@ void WorkerThreadPool::_process_task(Task *p_task) {
 	} else {
 		if (p_task->native_func) {
 			p_task->native_func(p_task->native_func_userdata);
+		} else if (p_task->template_userdata) {
+			p_task->template_userdata->callback();
+			memdelete(p_task->template_userdata);
 		} else {
 			Callable::CallError ce;
 			Variant ret;
@ -171,13 +187,19 @@ void WorkerThreadPool::_post_task(Task *p_task, bool p_high_priority) {
 }

 WorkerThreadPool::TaskID WorkerThreadPool::add_native_task(void (*p_func)(void *), void *p_userdata, bool p_high_priority, const String &p_description) {
+	return _add_task(Callable(), p_func, p_userdata, nullptr, p_high_priority, p_description);
+}
+
+WorkerThreadPool::TaskID WorkerThreadPool::_add_task(const Callable &p_callable, void (*p_func)(void *), void *p_userdata, BaseTemplateUserdata *p_template_userdata, bool p_high_priority, const String &p_description) {
 	task_mutex.lock();
 	// Get a free task
 	Task *task = task_allocator.alloc();
 	TaskID id = last_task++;
+	task->callable = p_callable;
 	task->native_func = p_func;
 	task->native_func_userdata = p_userdata;
 	task->description = p_description;
+	task->template_userdata = p_template_userdata;
 	tasks.insert(id, task);
 	task_mutex.unlock();

@ -187,18 +209,7 @@ WorkerThreadPool::TaskID WorkerThreadPool::add_native_task(void (*p_func)(void *
 }

 WorkerThreadPool::TaskID WorkerThreadPool::add_task(const Callable &p_action, bool p_high_priority, const String &p_description) {
-	task_mutex.lock();
-	// Get a free task
-	Task *task = task_allocator.alloc();
-	TaskID id = last_task++;
-	task->callable = p_action;
-	task->description = p_description;
-	tasks.insert(id, task);
-	task_mutex.unlock();
-
-	_post_task(task, p_high_priority);
-
-	return id;
+	return _add_task(p_action, nullptr, nullptr, nullptr, p_high_priority, p_description);
 }

 bool WorkerThreadPool::is_task_completed(TaskID p_task_id) const {
@ -269,8 +280,8 @@ void WorkerThreadPool::wait_for_task_completion(TaskID p_task_id) {
 	task_mutex.unlock();
 }

-WorkerThreadPool::GroupID WorkerThreadPool::add_native_group_task(void (*p_func)(void *, uint32_t), void *p_userdata, int p_elements, int p_tasks, bool p_high_priority, const String &p_description) {
-	ERR_FAIL_COND_V(p_elements <= 0, INVALID_TASK_ID);
+WorkerThreadPool::GroupID WorkerThreadPool::_add_group_task(const Callable &p_callable, void (*p_func)(void *, uint32_t), void *p_userdata, BaseTemplateUserdata *p_template_userdata, int p_elements, int p_tasks, bool p_high_priority, const String &p_description) {
+	ERR_FAIL_COND_V(p_elements < 0, INVALID_TASK_ID);
 	if (p_tasks < 0) {
 		p_tasks = threads.size();
 	}
@ -280,17 +291,34 @@ WorkerThreadPool::GroupID WorkerThreadPool::add_native_group_task(void (*p_func)
 	GroupID id = last_task++;
 	group->max = p_elements;
 	group->self = id;
-	group->tasks_used = p_tasks;
-	Task **tasks_posted = (Task **)alloca(sizeof(Task *) * p_tasks);
-	for (int i = 0; i < p_tasks; i++) {
-		Task *task = task_allocator.alloc();
-		task->native_group_func = p_func;
-		task->native_func_userdata = p_userdata;
-		task->description = p_description;
-		task->group = group;
-		tasks_posted[i] = task;
-		// No task ID is used.
+
+	Task **tasks_posted = nullptr;
+	if (p_elements == 0) {
+		// Should really not call it with zero Elements, but at least it should work.
+		group->completed.set_to(true);
+		group->done_semaphore.post();
+		group->tasks_used = 0;
+		p_tasks = 0;
+		if (p_template_userdata) {
+			memdelete(p_template_userdata);
+		}
+
+	} else {
+		group->tasks_used = p_tasks;
+		tasks_posted = (Task **)alloca(sizeof(Task *) * p_tasks);
+		for (int i = 0; i < p_tasks; i++) {
+			Task *task = task_allocator.alloc();
+			task->native_group_func = p_func;
+			task->native_func_userdata = p_userdata;
+			task->description = p_description;
+			task->group = group;
+			task->callable = p_callable;
+			task->template_userdata = p_template_userdata;
+			tasks_posted[i] = task;
+			// No task ID is used.
+		}
 	}
+
 	groups[id] = group;
 	task_mutex.unlock();

@ -308,43 +336,25 @@ WorkerThreadPool::GroupID WorkerThreadPool::add_native_group_task(void (*p_func)
 	return id;
 }

+WorkerThreadPool::GroupID WorkerThreadPool::add_native_group_task(void (*p_func)(void *, uint32_t), void *p_userdata, int p_elements, int p_tasks, bool p_high_priority, const String &p_description) {
+	return _add_group_task(Callable(), p_func, p_userdata, nullptr, p_elements, p_tasks, p_high_priority, p_description);
+}
+
 WorkerThreadPool::GroupID WorkerThreadPool::add_group_task(const Callable &p_action, int p_elements, int p_tasks, bool p_high_priority, const String &p_description) {
-	ERR_FAIL_COND_V(p_elements <= 0, INVALID_TASK_ID);
-	if (p_tasks < 0) {
-		p_tasks = threads.size();
-	}
-
-	task_mutex.lock();
-	Group *group = group_allocator.alloc();
-	GroupID id = last_task++;
-	group->max = p_elements;
-	group->self = id;
-	group->tasks_used = p_tasks;
-	Task **tasks_posted = (Task **)alloca(sizeof(Task *) * p_tasks);
-	for (int i = 0; i < p_tasks; i++) {
-		Task *task = task_allocator.alloc();
-		task->callable = p_action;
-		task->description = p_description;
-		task->group = group;
-		tasks_posted[i] = task;
-		// No task ID is used.
-	}
-	groups[id] = group;
-	task_mutex.unlock();
-
-	if (!p_high_priority && use_native_low_priority_threads) {
-		group->low_priority_native_tasks.resize(p_tasks);
-	}
-
-	for (int i = 0; i < p_tasks; i++) {
-		_post_task(tasks_posted[i], p_high_priority);
-		if (!p_high_priority && use_native_low_priority_threads) {
-			group->low_priority_native_tasks[i] = tasks_posted[i];
-		}
-	}
-	return id;
+	return _add_group_task(p_action, nullptr, nullptr, nullptr, p_elements, p_tasks, p_high_priority, p_description);
 }

+uint32_t WorkerThreadPool::get_group_processed_element_count(GroupID p_group) const {
+	task_mutex.lock();
+	const Group *const *groupp = groups.getptr(p_group);
+	if (!groupp) {
+		task_mutex.unlock();
+		ERR_FAIL_V_MSG(0, "Invalid Group ID");
+	}
+	uint32_t elements = (*groupp)->completed_index.get();
+	task_mutex.unlock();
+	return elements;
+}
 bool WorkerThreadPool::is_group_task_completed(GroupID p_group) const {
 	task_mutex.lock();
 	const Group *const *groupp = groups.getptr(p_group);
@ -451,6 +461,7 @@ void WorkerThreadPool::_bind_methods() {

 	ClassDB::bind_method(D_METHOD("add_group_task", "action", "elements", "tasks_needed", "high_priority", "description"), &WorkerThreadPool::add_group_task, DEFVAL(-1), DEFVAL(false), DEFVAL(String()));
 	ClassDB::bind_method(D_METHOD("is_group_task_completed", "group_id"), &WorkerThreadPool::is_group_task_completed);
+	ClassDB::bind_method(D_METHOD("get_group_processed_element_count", "group_id"), &WorkerThreadPool::get_group_processed_element_count);
 	ClassDB::bind_method(D_METHOD("wait_for_group_task_completion", "group_id"), &WorkerThreadPool::wait_for_group_task_completion);
 }

--- a/core/object/worker_thread_pool.h
+++ b/core/object/worker_thread_pool.h
@ -53,9 +53,16 @@ public:
 private:
 	struct Task;

+	struct BaseTemplateUserdata {
+		virtual void callback() {}
+		virtual void callback_indexed(uint32_t p_index) {}
+		virtual ~BaseTemplateUserdata() {}
+	};
+
 	struct Group {
 		GroupID self;
 		SafeNumeric<uint32_t> index;
+		SafeNumeric<uint32_t> completed_index;
 		uint32_t max = 0;
 		Semaphore done_semaphore;
 		SafeFlag completed;
@ -76,7 +83,10 @@ private:
 		SelfList<Task> task_elem;
 		bool waiting = false; // Waiting for completion
 		bool low_priority = false;
+		BaseTemplateUserdata *template_userdata = nullptr;
 		Thread *low_priority_thread = nullptr;
+
+		void free_template_userdata();
 		Task() :
 				task_elem(this) {}
 	};
@ -119,18 +129,60 @@ private:

 	static WorkerThreadPool *singleton;

+	TaskID _add_task(const Callable &p_callable, void (*p_func)(void *), void *p_userdata, BaseTemplateUserdata *p_template_userdata, bool p_high_priority, const String &p_description);
+	GroupID _add_group_task(const Callable &p_callable, void (*p_func)(void *, uint32_t), void *p_userdata, BaseTemplateUserdata *p_template_userdata, int p_elements, int p_tasks, bool p_high_priority, const String &p_description);
+
+	template <class C, class M, class U>
+	struct TaskUserData : public BaseTemplateUserdata {
+		C *instance;
+		M method;
+		U userdata;
+		virtual void callback() override {
+			(instance->*method)(userdata);
+		}
+	};
+
+	template <class C, class M, class U>
+	struct GroupUserData : public BaseTemplateUserdata {
+		C *instance;
+		M method;
+		U userdata;
+		virtual void callback_indexed(uint32_t p_index) override {
+			(instance->*method)(p_index, userdata);
+		}
+	};
+
 protected:
 	static void _bind_methods();

 public:
+	template <class C, class M, class U>
+	TaskID add_template_task(C *p_instance, M p_method, U p_userdata, bool p_high_priority = false, const String &p_description = String()) {
+		typedef TaskUserData<C, M, U> TUD;
+		TUD *ud = memnew(TUD);
+		ud->instance = p_instance;
+		ud->method = p_method;
+		ud->userdata = p_userdata;
+		return _add_task(Callable(), nullptr, nullptr, ud, p_high_priority, p_description);
+	}
 	TaskID add_native_task(void (*p_func)(void *), void *p_userdata, bool p_high_priority = false, const String &p_description = String());
 	TaskID add_task(const Callable &p_action, bool p_high_priority = false, const String &p_description = String());

 	bool is_task_completed(TaskID p_task_id) const;
 	void wait_for_task_completion(TaskID p_task_id);

+	template <class C, class M, class U>
+	GroupID add_template_group_task(C *p_instance, M p_method, U p_userdata, int p_elements, int p_tasks = -1, bool p_high_priority = false, const String &p_description = String()) {
+		typedef GroupUserData<C, M, U> GUD;
+		GUD *ud = memnew(GUD);
+		ud->instance = p_instance;
+		ud->method = p_method;
+		ud->userdata = p_userdata;
+		return _add_group_task(Callable(), nullptr, nullptr, ud, p_elements, p_tasks, p_high_priority, p_description);
+	}
 	GroupID add_native_group_task(void (*p_func)(void *, uint32_t), void *p_userdata, int p_elements, int p_tasks = -1, bool p_high_priority = false, const String &p_description = String());
 	GroupID add_group_task(const Callable &p_action, int p_elements, int p_tasks = -1, bool p_high_priority = false, const String &p_description = String());
+	uint32_t get_group_processed_element_count(GroupID p_group) const;
 	bool is_group_task_completed(GroupID p_group) const;
 	void wait_for_group_task_completion(GroupID p_group);

--- a/core/templates/thread_work_pool.cpp
+++ b/core/templates/thread_work_pool.cpp
@ -1,81 +0,0 @@
-/*************************************************************************/
-/*  thread_work_pool.cpp                                                 */
-/*************************************************************************/
-/*                       This file is part of:                           */
-/*                           GODOT ENGINE                                */
-/*                      https://godotengine.org                          */
-/*************************************************************************/
-/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
-/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
-/*                                                                       */
-/* Permission is hereby granted, free of charge, to any person obtaining */
-/* a copy of this software and associated documentation files (the       */
-/* "Software"), to deal in the Software without restriction, including   */
-/* without limitation the rights to use, copy, modify, merge, publish,   */
-/* distribute, sublicense, and/or sell copies of the Software, and to    */
-/* permit persons to whom the Software is furnished to do so, subject to */
-/* the following conditions:                                             */
-/*                                                                       */
-/* The above copyright notice and this permission notice shall be        */
-/* included in all copies or substantial portions of the Software.       */
-/*                                                                       */
-/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
-/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
-/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
-/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
-/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
-/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
-/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
-/*************************************************************************/
-
-#include "thread_work_pool.h"
-
-#include "core/os/os.h"
-
-void ThreadWorkPool::_thread_function(void *p_user) {
-	ThreadData *thread = static_cast<ThreadData *>(p_user);
-	while (true) {
-		thread->start.wait();
-		if (thread->exit.load()) {
-			break;
-		}
-		thread->work->work();
-		thread->completed.post();
-	}
-}
-
-void ThreadWorkPool::init(int p_thread_count) {
-	ERR_FAIL_COND(threads != nullptr);
-	if (p_thread_count < 0) {
-		p_thread_count = OS::get_singleton()->get_default_thread_pool_size();
-	}
-
-	thread_count = p_thread_count;
-	threads = memnew_arr(ThreadData, thread_count);
-
-	for (uint32_t i = 0; i < thread_count; i++) {
-		threads[i].exit.store(false);
-		threads[i].thread.start(&ThreadWorkPool::_thread_function, &threads[i]);
-	}
-}
-
-void ThreadWorkPool::finish() {
-	if (threads == nullptr) {
-		return;
-	}
-
-	for (uint32_t i = 0; i < thread_count; i++) {
-		threads[i].exit.store(true);
-		threads[i].start.post();
-	}
-	for (uint32_t i = 0; i < thread_count; i++) {
-		threads[i].thread.wait_to_finish();
-	}
-
-	memdelete_arr(threads);
-	threads = nullptr;
-}
-
-ThreadWorkPool::~ThreadWorkPool() {
-	finish();
-}
--- a/core/templates/thread_work_pool.h
+++ b/core/templates/thread_work_pool.h
@ -1,157 +0,0 @@
-/*************************************************************************/
-/*  thread_work_pool.h                                                   */
-/*************************************************************************/
-/*                       This file is part of:                           */
-/*                           GODOT ENGINE                                */
-/*                      https://godotengine.org                          */
-/*************************************************************************/
-/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
-/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
-/*                                                                       */
-/* Permission is hereby granted, free of charge, to any person obtaining */
-/* a copy of this software and associated documentation files (the       */
-/* "Software"), to deal in the Software without restriction, including   */
-/* without limitation the rights to use, copy, modify, merge, publish,   */
-/* distribute, sublicense, and/or sell copies of the Software, and to    */
-/* permit persons to whom the Software is furnished to do so, subject to */
-/* the following conditions:                                             */
-/*                                                                       */
-/* The above copyright notice and this permission notice shall be        */
-/* included in all copies or substantial portions of the Software.       */
-/*                                                                       */
-/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
-/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
-/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
-/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
-/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
-/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
-/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
-/*************************************************************************/
-
-#ifndef THREAD_WORK_POOL_H
-#define THREAD_WORK_POOL_H
-
-#include "core/os/memory.h"
-#include "core/os/semaphore.h"
-#include "core/os/thread.h"
-
-#include <atomic>
-
-class ThreadWorkPool {
-	std::atomic<uint32_t> index;
-
-	struct BaseWork {
-		std::atomic<uint32_t> *index = nullptr;
-		uint32_t max_elements = 0;
-		virtual void work() = 0;
-		virtual ~BaseWork() = default;
-	};
-
-	template <class C, class M, class U>
-	struct Work : public BaseWork {
-		C *instance;
-		M method;
-		U userdata;
-		virtual void work() override {
-			while (true) {
-				uint32_t work_index = index->fetch_add(1, std::memory_order_relaxed);
-				if (work_index >= max_elements) {
-					break;
-				}
-				(instance->*method)(work_index, userdata);
-			}
-		}
-	};
-
-	struct ThreadData {
-		Thread thread;
-		Semaphore start;
-		Semaphore completed;
-		std::atomic<bool> exit;
-		BaseWork *work = nullptr;
-	};
-
-	ThreadData *threads = nullptr;
-	uint32_t thread_count = 0;
-	uint32_t threads_working = 0;
-	BaseWork *current_work = nullptr;
-
-	static void _thread_function(void *p_user);
-
-public:
-	template <class C, class M, class U>
-	void begin_work(uint32_t p_elements, C *p_instance, M p_method, U p_userdata) {
-		ERR_FAIL_COND(!threads); //never initialized
-		ERR_FAIL_COND(current_work != nullptr);
-
-		index.store(0, std::memory_order_release);
-
-		Work<C, M, U> *w = memnew((Work<C, M, U>));
-		w->instance = p_instance;
-		w->userdata = p_userdata;
-		w->method = p_method;
-		w->index = &index;
-		w->max_elements = p_elements;
-
-		current_work = w;
-
-		threads_working = MIN(p_elements, thread_count);
-
-		for (uint32_t i = 0; i < threads_working; i++) {
-			threads[i].work = w;
-			threads[i].start.post();
-		}
-	}
-
-	bool is_working() const {
-		return current_work != nullptr;
-	}
-
-	bool is_done_dispatching() const {
-		ERR_FAIL_COND_V(current_work == nullptr, true);
-		return index.load(std::memory_order_acquire) >= current_work->max_elements;
-	}
-
-	uint32_t get_work_index() const {
-		ERR_FAIL_COND_V(current_work == nullptr, 0);
-		uint32_t idx = index.load(std::memory_order_acquire);
-		return MIN(idx, current_work->max_elements);
-	}
-
-	void end_work() {
-		ERR_FAIL_COND(current_work == nullptr);
-		for (uint32_t i = 0; i < threads_working; i++) {
-			threads[i].completed.wait();
-			threads[i].work = nullptr;
-		}
-
-		threads_working = 0;
-		memdelete(current_work);
-		current_work = nullptr;
-	}
-
-	template <class C, class M, class U>
-	void do_work(uint32_t p_elements, C *p_instance, M p_method, U p_userdata) {
-		switch (p_elements) {
-			case 0:
-				// Nothing to do, so do nothing.
-				break;
-			case 1:
-				// No value in pushing the work to another thread if it's a single job
-				// and we're going to wait for it to finish. Just run it right here.
-				(p_instance->*p_method)(0, p_userdata);
-				break;
-			default:
-				// Multiple jobs to do; commence threaded business.
-				begin_work(p_elements, p_instance, p_method, p_userdata);
-				end_work();
-		}
-	}
-
-	_FORCE_INLINE_ int get_thread_count() const { return thread_count; }
-	void init(int p_thread_count = -1);
-	void finish();
-	~ThreadWorkPool();
-};
-
-#endif // THREAD_WORK_POOL_H
--- a/doc/classes/WorkerThreadPool.xml
+++ b/doc/classes/WorkerThreadPool.xml
@ -25,6 +25,12 @@
 			<description>
 			</description>
 		</method>
+		<method name="get_group_processed_element_count" qualifiers="const">
+			<return type="int" />
+			<argument index="0" name="group_id" type="int" />
+			<description>
+			</description>
+		</method>
 		<method name="is_group_task_completed" qualifiers="const">
 			<return type="bool" />
 			<argument index="0" name="group_id" type="int" />
--- a/editor/editor_file_system.cpp
+++ b/editor/editor_file_system.cpp
@ -36,6 +36,7 @@
 #include "core/io/resource_importer.h"
 #include "core/io/resource_loader.h"
 #include "core/io/resource_saver.h"
+#include "core/object/worker_thread_pool.h"
 #include "core/os/os.h"
 #include "core/variant/variant_parser.h"
 #include "editor/editor_node.h"
@ -2137,7 +2138,7 @@ void EditorFileSystem::reimport_files(const Vector<String> &p_files) {
 					data.reimport_from = from;
 					data.reimport_files = reimport_files.ptr();

-					import_threads.begin_work(i - from + 1, this, &EditorFileSystem::_reimport_thread, &data);
+					WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &EditorFileSystem::_reimport_thread, &data, i - from + 1, -1, false, vformat(TTR("Import resources of type: %s"), reimport_files[from].importer));
 					int current_index = from - 1;
 					do {
 						if (current_index < data.max_index) {
@ -2145,9 +2146,9 @@ void EditorFileSystem::reimport_files(const Vector<String> &p_files) {
 							pr.step(reimport_files[current_index].path.get_file(), current_index);
 						}
 						OS::get_singleton()->delay_usec(1);
-					} while (!import_threads.is_done_dispatching());
+					} while (!WorkerThreadPool::get_singleton()->is_group_task_completed(group_task));

-					import_threads.end_work();
+					WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);

 					importer->import_threaded_end();
 				}
@ -2430,12 +2431,10 @@ EditorFileSystem::EditorFileSystem() {

 	scan_total = 0;
 	update_script_classes_queued.clear();
-	import_threads.init();
 	ResourceUID::get_singleton()->clear(); //will be updated on scan
 	ResourceSaver::set_get_resource_id_for_path(_resource_saver_get_resource_id_for_path);
 }

 EditorFileSystem::~EditorFileSystem() {
-	import_threads.finish();
 	ResourceSaver::set_get_resource_id_for_path(nullptr);
 }
--- a/editor/editor_file_system.h
+++ b/editor/editor_file_system.h
@ -36,7 +36,6 @@
 #include "core/os/thread_safe.h"
 #include "core/templates/hash_set.h"
 #include "core/templates/safe_refcount.h"
-#include "core/templates/thread_work_pool.h"
 #include "scene/main/node.h"

 class FileAccess;
@ -275,8 +274,6 @@ class EditorFileSystem : public Node {

 	HashSet<String> group_file_cache;

-	ThreadWorkPool import_threads;
-
 	struct ImportThreadData {
 		const ImportFile *reimport_files;
 		int reimport_from;
--- a/modules/navigation/nav_map.cpp
+++ b/modules/navigation/nav_map.cpp
@ -30,9 +30,9 @@

 #include "nav_map.h"

+#include "core/object/worker_thread_pool.h"
 #include "nav_region.h"
 #include "rvo_agent.h"
-
 #include <algorithm>

 #define THREE_POINTS_CROSS_PRODUCT(m_a, m_b, m_c) (((m_c) - (m_a)).cross((m_b) - (m_a)))
@ -683,14 +683,8 @@ void NavMap::compute_single_step(uint32_t index, RvoAgent **agent) {
 void NavMap::step(real_t p_deltatime) {
 	deltatime = p_deltatime;
 	if (controlled_agents.size() > 0) {
-		if (step_work_pool.get_thread_count() == 0) {
-			step_work_pool.init();
-		}
-		step_work_pool.do_work(
-				controlled_agents.size(),
-				this,
-				&NavMap::compute_single_step,
-				controlled_agents.data());
+		WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &NavMap::compute_single_step, controlled_agents.data(), controlled_agents.size(), -1, true, SNAME("NavigationMapAgents"));
+		WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);
 	}
 }

@ -736,5 +730,4 @@ NavMap::NavMap() {
 }

 NavMap::~NavMap() {
-	step_work_pool.finish();
 }
--- a/modules/navigation/nav_map.h
+++ b/modules/navigation/nav_map.h
@ -34,8 +34,8 @@
 #include "nav_rid.h"

 #include "core/math/math_defs.h"
+#include "core/object/worker_thread_pool.h"
 #include "core/templates/rb_map.h"
-#include "core/templates/thread_work_pool.h"
 #include "nav_utils.h"

 #include <KdTree.h>
@ -81,9 +81,6 @@ class NavMap : public NavRid {
 	/// Change the id each time the map is updated.
 	uint32_t map_update_id = 0;

-	/// Pooled threads for computing steps
-	ThreadWorkPool step_work_pool;
-
 public:
 	NavMap();
 	~NavMap();
--- a/modules/raycast/raycast_occlusion_cull.cpp
+++ b/modules/raycast/raycast_occlusion_cull.cpp
@ -30,6 +30,7 @@

 #include "raycast_occlusion_cull.h"
 #include "core/config/project_settings.h"
+#include "core/object/worker_thread_pool.h"
 #include "core/templates/local_vector.h"

 #ifdef __SSE2__
@ -78,9 +79,9 @@ void RaycastOcclusionCull::RaycastHZBuffer::resize(const Size2i &p_size) {
 	memset(camera_ray_masks.ptr(), ~0, camera_rays_tile_count * TILE_RAYS * sizeof(uint32_t));
 }

-void RaycastOcclusionCull::RaycastHZBuffer::update_camera_rays(const Transform3D &p_cam_transform, const Projection &p_cam_projection, bool p_cam_orthogonal, ThreadWorkPool &p_thread_work_pool) {
+void RaycastOcclusionCull::RaycastHZBuffer::update_camera_rays(const Transform3D &p_cam_transform, const Projection &p_cam_projection, bool p_cam_orthogonal) {
 	CameraRayThreadData td;
-	td.thread_count = p_thread_work_pool.get_thread_count();
+	td.thread_count = WorkerThreadPool::get_singleton()->get_thread_count();

 	td.z_near = p_cam_projection.get_z_near();
 	td.z_far = p_cam_projection.get_z_far() * 1.05f;
@ -106,7 +107,8 @@ void RaycastOcclusionCull::RaycastHZBuffer::update_camera_rays(const Transform3D

 	debug_tex_range = td.z_far;

-	p_thread_work_pool.do_work(td.thread_count, this, &RaycastHZBuffer::_camera_rays_threaded, &td);
+	WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &RaycastHZBuffer::_camera_rays_threaded, &td, td.thread_count, -1, true, SNAME("RaycastOcclusionCullUpdateCamera"));
+	WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);
 }

 void RaycastOcclusionCull::RaycastHZBuffer::_camera_rays_threaded(uint32_t p_thread, const CameraRayThreadData *p_data) {
@ -331,10 +333,10 @@ void RaycastOcclusionCull::scenario_remove_instance(RID p_scenario, RID p_instan
 }

 void RaycastOcclusionCull::Scenario::_update_dirty_instance_thread(int p_idx, RID *p_instances) {
-	_update_dirty_instance(p_idx, p_instances, nullptr);
+	_update_dirty_instance(p_idx, p_instances);
 }

-void RaycastOcclusionCull::Scenario::_update_dirty_instance(int p_idx, RID *p_instances, ThreadWorkPool *p_thread_pool) {
+void RaycastOcclusionCull::Scenario::_update_dirty_instance(int p_idx, RID *p_instances) {
 	OccluderInstance *occ_inst = instances.getptr(p_instances[p_idx]);

 	if (!occ_inst) {
@ -355,14 +357,16 @@ void RaycastOcclusionCull::Scenario::_update_dirty_instance(int p_idx, RID *p_in
 	const Vector3 *read_ptr = occ->vertices.ptr();
 	Vector3 *write_ptr = occ_inst->xformed_vertices.ptr();

-	if (p_thread_pool && vertices_size > 1024) {
+	if (vertices_size > 1024) {
 		TransformThreadData td;
 		td.xform = occ_inst->xform;
 		td.read = read_ptr;
 		td.write = write_ptr;
 		td.vertex_count = vertices_size;
-		td.thread_count = p_thread_pool->get_thread_count();
-		p_thread_pool->do_work(td.thread_count, this, &Scenario::_transform_vertices_thread, &td);
+		td.thread_count = WorkerThreadPool::get_singleton()->get_thread_count();
+		WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &Scenario::_transform_vertices_thread, &td, td.thread_count, -1, true, SNAME("RaycastOcclusionCull"));
+		WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);
+
 	} else {
 		_transform_vertices_range(read_ptr, write_ptr, occ_inst->xform, 0, vertices_size);
 	}
@ -392,7 +396,7 @@ void RaycastOcclusionCull::Scenario::_commit_scene(void *p_ud) {
 	scenario->commit_done = true;
 }

-bool RaycastOcclusionCull::Scenario::update(ThreadWorkPool &p_thread_pool) {
+bool RaycastOcclusionCull::Scenario::update() {
 	ERR_FAIL_COND_V(singleton == nullptr, false);

 	if (commit_thread == nullptr) {
@ -426,13 +430,15 @@ bool RaycastOcclusionCull::Scenario::update(ThreadWorkPool &p_thread_pool) {
 		instances.erase(removed_instances[i]);
 	}

-	if (dirty_instances_array.size() / p_thread_pool.get_thread_count() > 128) {
+	if (dirty_instances_array.size() / WorkerThreadPool::get_singleton()->get_thread_count() > 128) {
 		// Lots of instances, use per-instance threading
-		p_thread_pool.do_work(dirty_instances_array.size(), this, &Scenario::_update_dirty_instance_thread, dirty_instances_array.ptr());
+		WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &Scenario::_update_dirty_instance_thread, dirty_instances_array.ptr(), dirty_instances_array.size(), -1, true, SNAME("RaycastOcclusionCullUpdate"));
+		WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);
+
 	} else {
 		// Few instances, use threading on the vertex transforms
 		for (unsigned int i = 0; i < dirty_instances_array.size(); i++) {
-			_update_dirty_instance(i, dirty_instances_array.ptr(), &p_thread_pool);
+			_update_dirty_instance(i, dirty_instances_array.ptr());
 		}
 	}

@ -484,7 +490,7 @@ void RaycastOcclusionCull::Scenario::_raycast(uint32_t p_idx, const RaycastThrea
 	rtcIntersect16((const int *)&p_raycast_data->masks[p_idx * TILE_RAYS], ebr_scene[current_scene_idx], &ctx, &p_raycast_data->rays[p_idx]);
 }

-void RaycastOcclusionCull::Scenario::raycast(CameraRayTile *r_rays, const uint32_t *p_valid_masks, uint32_t p_tile_count, ThreadWorkPool &p_thread_pool) const {
+void RaycastOcclusionCull::Scenario::raycast(CameraRayTile *r_rays, const uint32_t *p_valid_masks, uint32_t p_tile_count) const {
 	ERR_FAIL_COND(singleton == nullptr);
 	if (raycast_singleton->ebr_device == nullptr) {
 		return; // Embree is initialized on demand when there is some scenario with occluders in it.
@ -498,7 +504,8 @@ void RaycastOcclusionCull::Scenario::raycast(CameraRayTile *r_rays, const uint32
 	td.rays = r_rays;
 	td.masks = p_valid_masks;

-	p_thread_pool.do_work(p_tile_count, this, &Scenario::_raycast, &td);
+	WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &Scenario::_raycast, &td, p_tile_count, -1, true, SNAME("RaycastOcclusionCullRaycast"));
+	WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);
 }

 ////////////////////////////////////////////////////////
@ -524,7 +531,7 @@ void RaycastOcclusionCull::buffer_set_size(RID p_buffer, const Vector2i &p_size)
 	buffers[p_buffer].resize(p_size);
 }

-void RaycastOcclusionCull::buffer_update(RID p_buffer, const Transform3D &p_cam_transform, const Projection &p_cam_projection, bool p_cam_orthogonal, ThreadWorkPool &p_thread_pool) {
+void RaycastOcclusionCull::buffer_update(RID p_buffer, const Transform3D &p_cam_transform, const Projection &p_cam_projection, bool p_cam_orthogonal) {
 	if (!buffers.has(p_buffer)) {
 		return;
 	}
@ -537,16 +544,16 @@ void RaycastOcclusionCull::buffer_update(RID p_buffer, const Transform3D &p_cam_

 	Scenario &scenario = scenarios[buffer.scenario_rid];

-	bool removed = scenario.update(p_thread_pool);
+	bool removed = scenario.update();

 	if (removed) {
 		scenarios.erase(buffer.scenario_rid);
 		return;
 	}

-	buffer.update_camera_rays(p_cam_transform, p_cam_projection, p_cam_orthogonal, p_thread_pool);
+	buffer.update_camera_rays(p_cam_transform, p_cam_projection, p_cam_orthogonal);

-	scenario.raycast(buffer.camera_rays, buffer.camera_ray_masks.ptr(), buffer.camera_rays_tile_count, p_thread_pool);
+	scenario.raycast(buffer.camera_rays, buffer.camera_ray_masks.ptr(), buffer.camera_rays_tile_count);
 	buffer.sort_rays(-p_cam_transform.basis.get_column(2), p_cam_orthogonal);
 	buffer.update_mips();
 }
--- a/modules/raycast/raycast_occlusion_cull.h
+++ b/modules/raycast/raycast_occlusion_cull.h
@ -76,7 +76,7 @@ public:
 		virtual void clear() override;
 		virtual void resize(const Size2i &p_size) override;
 		void sort_rays(const Vector3 &p_camera_dir, bool p_orthogonal);
-		void update_camera_rays(const Transform3D &p_cam_transform, const Projection &p_cam_projection, bool p_cam_orthogonal, ThreadWorkPool &p_thread_work_pool);
+		void update_camera_rays(const Transform3D &p_cam_transform, const Projection &p_cam_projection, bool p_cam_orthogonal);

 		~RaycastHZBuffer();
 	};
@ -143,14 +143,14 @@ private:
 		LocalVector<RID> removed_instances;

 		void _update_dirty_instance_thread(int p_idx, RID *p_instances);
-		void _update_dirty_instance(int p_idx, RID *p_instances, ThreadWorkPool *p_thread_pool);
+		void _update_dirty_instance(int p_idx, RID *p_instances);
 		void _transform_vertices_thread(uint32_t p_thread, TransformThreadData *p_data);
 		void _transform_vertices_range(const Vector3 *p_read, Vector3 *p_write, const Transform3D &p_xform, int p_from, int p_to);
 		static void _commit_scene(void *p_ud);
-		bool update(ThreadWorkPool &p_thread_pool);
+		bool update();

 		void _raycast(uint32_t p_thread, const RaycastThreadData *p_raycast_data) const;
-		void raycast(CameraRayTile *r_rays, const uint32_t *p_valid_masks, uint32_t p_tile_count, ThreadWorkPool &p_thread_pool) const;
+		void raycast(CameraRayTile *r_rays, const uint32_t *p_valid_masks, uint32_t p_tile_count) const;
 	};

 	static RaycastOcclusionCull *raycast_singleton;
@ -183,7 +183,8 @@ public:
 	virtual HZBuffer *buffer_get_ptr(RID p_buffer) override;
 	virtual void buffer_set_scenario(RID p_buffer, RID p_scenario) override;
 	virtual void buffer_set_size(RID p_buffer, const Vector2i &p_size) override;
-	virtual void buffer_update(RID p_buffer, const Transform3D &p_cam_transform, const Projection &p_cam_projection, bool p_cam_orthogonal, ThreadWorkPool &p_thread_pool) override;
+	virtual void buffer_update(RID p_buffer, const Transform3D &p_cam_transform, const Projection &p_cam_projection, bool p_cam_orthogonal) override;
+
 	virtual RID buffer_get_debug_texture(RID p_buffer) override;

 	virtual void set_build_quality(RS::ViewportOcclusionCullingBuildQuality p_quality) override;
--- a/modules/text_server_adv/text_server_adv.cpp
+++ b/modules/text_server_adv/text_server_adv.cpp
@ -29,6 +29,7 @@
 /*************************************************************************/

 #include "text_server_adv.h"
+#include "core/object/worker_thread_pool.h"

 #ifdef GDEXTENSION
 // Headers for building as GDExtension plug-in.
@ -1039,10 +1040,8 @@ _FORCE_INLINE_ TextServerAdvanced::FontGlyph TextServerAdvanced::rasterize_msdf(
 		td.projection = &projection;
 		td.distancePixelConversion = &distancePixelConversion;

-		if (p_font_data->work_pool.get_thread_count() == 0) {
-			p_font_data->work_pool.init();
-		}
-		p_font_data->work_pool.do_work(h, this, &TextServerAdvanced::_generateMTSDF_threaded, &td);
+		WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &TextServerAdvanced::_generateMTSDF_threaded, &td, h, -1, true, SNAME("FontServerRasterizeMSDF"));
+		WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);

 		msdfgen::msdfErrorCorrection(image, shape, projection, p_pixel_range, config);

--- a/modules/text_server_adv/text_server_adv.h
+++ b/modules/text_server_adv/text_server_adv.h
@ -65,11 +65,12 @@
 #include <godot_cpp/classes/image.hpp>
 #include <godot_cpp/classes/image_texture.hpp>
 #include <godot_cpp/classes/ref.hpp>
+#include <godot_cpp/classes/worker_thread_pool.hpp>

 #include <godot_cpp/templates/hash_map.hpp>
 #include <godot_cpp/templates/hash_set.hpp>
 #include <godot_cpp/templates/rid_owner.hpp>
-#include <godot_cpp/templates/thread_work_pool.hpp>
+
 #include <godot_cpp/templates/vector.hpp>

 using namespace godot;
@ -77,9 +78,9 @@ using namespace godot;
 #else
 // Headers for building as built-in module.

+#include "core/object/worker_thread_pool.h"
 #include "core/templates/hash_map.h"
 #include "core/templates/rid_owner.h"
-#include "core/templates/thread_work_pool.h"
 #include "scene/resources/texture.h"
 #include "servers/text/text_server_extension.h"

@ -252,10 +253,8 @@ class TextServerAdvanced : public TextServerExtension {
 		const uint8_t *data_ptr;
 		size_t data_size;
 		int face_index = 0;
-		mutable ThreadWorkPool work_pool;

 		~FontAdvanced() {
-			work_pool.finish();
 			for (const KeyValue<Vector2i, FontForSizeAdvanced *> &E : cache) {
 				memdelete(E.value);
 			}
--- a/modules/text_server_fb/text_server_fb.cpp
+++ b/modules/text_server_fb/text_server_fb.cpp
@ -461,10 +461,8 @@ _FORCE_INLINE_ TextServerFallback::FontGlyph TextServerFallback::rasterize_msdf(
 		td.projection = &projection;
 		td.distancePixelConversion = &distancePixelConversion;

-		if (p_font_data->work_pool.get_thread_count() == 0) {
-			p_font_data->work_pool.init();
-		}
-		p_font_data->work_pool.do_work(h, this, &TextServerFallback::_generateMTSDF_threaded, &td);
+		WorkerThreadPool::GroupID group_id = WorkerThreadPool::get_singleton()->add_template_group_task(this, &TextServerFallback::_generateMTSDF_threaded, &td, h, -1, true, SNAME("TextServerFBRenderMSDF"));
+		WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_id);

 		msdfgen::msdfErrorCorrection(image, shape, projection, p_pixel_range, config);

--- a/modules/text_server_fb/text_server_fb.h
+++ b/modules/text_server_fb/text_server_fb.h
@ -79,9 +79,9 @@ using namespace godot;

 #include "servers/text/text_server_extension.h"

+#include "core/object/worker_thread_pool.h"
 #include "core/templates/hash_map.h"
 #include "core/templates/rid_owner.h"
-#include "core/templates/thread_work_pool.h"
 #include "scene/resources/texture.h"

 #include "modules/modules_enabled.gen.h" // For freetype, msdfgen.
@ -208,10 +208,7 @@ class TextServerFallback : public TextServerExtension {
 		size_t data_size;
 		int face_index = 0;

-		mutable ThreadWorkPool work_pool;
-
 		~FontFallback() {
-			work_pool.finish();
 			for (const KeyValue<Vector2i, FontForSizeFallback *> &E : cache) {
 				memdelete(E.value);
 			}
--- a/scene/3d/gpu_particles_collision_3d.cpp
+++ b/scene/3d/gpu_particles_collision_3d.cpp
@ -30,6 +30,7 @@

 #include "gpu_particles_collision_3d.h"

+#include "core/object/worker_thread_pool.h"
 #include "mesh_instance_3d.h"
 #include "scene/3d/camera_3d.h"
 #include "scene/main/viewport.h"
@ -339,15 +340,12 @@ void GPUParticlesCollisionSDF3D::_compute_sdf_z(uint32_t p_z, ComputeSDFParams *
 }

 void GPUParticlesCollisionSDF3D::_compute_sdf(ComputeSDFParams *params) {
-	ThreadWorkPool work_pool;
-	work_pool.init();
-	work_pool.begin_work(params->size.z, this, &GPUParticlesCollisionSDF3D::_compute_sdf_z, params);
-	while (!work_pool.is_done_dispatching()) {
+	WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &GPUParticlesCollisionSDF3D::_compute_sdf_z, params, params->size.z);
+	while (!WorkerThreadPool::get_singleton()->is_group_task_completed(group_task)) {
 		OS::get_singleton()->delay_usec(10000);
-		bake_step_function(work_pool.get_work_index() * 100 / params->size.z, "Baking SDF");
+		bake_step_function(WorkerThreadPool::get_singleton()->get_group_processed_element_count(group_task) * 100 / params->size.z, "Baking SDF");
 	}
-	work_pool.end_work();
-	work_pool.finish();
+	WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);
 }

 Vector3i GPUParticlesCollisionSDF3D::get_estimated_cell_size() const {
--- a/servers/physics_2d/godot_step_2d.cpp
+++ b/servers/physics_2d/godot_step_2d.cpp
@ -239,7 +239,8 @@ void GodotStep2D::step(GodotSpace2D *p_space, real_t p_delta) {
 	/* SETUP CONSTRAINTS / PROCESS COLLISIONS */

 	uint32_t total_contraint_count = all_constraints.size();
-	work_pool.do_work(total_contraint_count, this, &GodotStep2D::_setup_contraint, nullptr);
+	WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &GodotStep2D::_setup_contraint, nullptr, total_contraint_count, -1, true, SNAME("Physics2DConstraintSetup"));
+	WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);

 	{ //profile
 		profile_endtime = OS::get_singleton()->get_ticks_usec();
@ -258,7 +259,8 @@ void GodotStep2D::step(GodotSpace2D *p_space, real_t p_delta) {

 	// Warning: _solve_island modifies the constraint islands for optimization purpose,
 	// their content is not reliable after these calls and shouldn't be used anymore.
-	work_pool.do_work(island_count, this, &GodotStep2D::_solve_island, nullptr);
+	group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &GodotStep2D::_solve_island, nullptr, island_count, -1, true, SNAME("Physics2DConstraintSolveIslands"));
+	WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);

 	{ //profile
 		profile_endtime = OS::get_singleton()->get_ticks_usec();
@ -297,10 +299,7 @@ GodotStep2D::GodotStep2D() {
 	body_islands.reserve(BODY_ISLAND_COUNT_RESERVE);
 	constraint_islands.reserve(ISLAND_COUNT_RESERVE);
 	all_constraints.reserve(CONSTRAINT_COUNT_RESERVE);
-
-	work_pool.init();
 }

 GodotStep2D::~GodotStep2D() {
-	work_pool.finish();
 }
--- a/servers/physics_2d/godot_step_2d.h
+++ b/servers/physics_2d/godot_step_2d.h
@ -33,8 +33,8 @@

 #include "godot_space_2d.h"

+#include "core/object/worker_thread_pool.h"
 #include "core/templates/local_vector.h"
-#include "core/templates/thread_work_pool.h"

 class GodotStep2D {
 	uint64_t _step = 1;
@ -42,8 +42,6 @@ class GodotStep2D {
 	int iterations = 0;
 	real_t delta = 0.0;

-	ThreadWorkPool work_pool;
-
 	LocalVector<LocalVector<GodotBody2D *>> body_islands;
 	LocalVector<LocalVector<GodotConstraint2D *>> constraint_islands;
 	LocalVector<GodotConstraint2D *> all_constraints;
--- a/servers/physics_3d/godot_step_3d.cpp
+++ b/servers/physics_3d/godot_step_3d.cpp
@ -343,7 +343,8 @@ void GodotStep3D::step(GodotSpace3D *p_space, real_t p_delta) {
 	/* SETUP CONSTRAINTS / PROCESS COLLISIONS */

 	uint32_t total_contraint_count = all_constraints.size();
-	work_pool.do_work(total_contraint_count, this, &GodotStep3D::_setup_contraint, nullptr);
+	WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &GodotStep3D::_setup_contraint, nullptr, total_contraint_count, -1, true, SNAME("Physics3DConstraintSetup"));
+	WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);

 	{ //profile
 		profile_endtime = OS::get_singleton()->get_ticks_usec();
@ -362,7 +363,8 @@ void GodotStep3D::step(GodotSpace3D *p_space, real_t p_delta) {

 	// Warning: _solve_island modifies the constraint islands for optimization purpose,
 	// their content is not reliable after these calls and shouldn't be used anymore.
-	work_pool.do_work(island_count, this, &GodotStep3D::_solve_island, nullptr);
+	group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &GodotStep3D::_solve_island, nullptr, island_count, -1, true, SNAME("Physics3DConstraintSolveIslands"));
+	WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);

 	{ //profile
 		profile_endtime = OS::get_singleton()->get_ticks_usec();
@ -409,10 +411,7 @@ GodotStep3D::GodotStep3D() {
 	body_islands.reserve(BODY_ISLAND_COUNT_RESERVE);
 	constraint_islands.reserve(ISLAND_COUNT_RESERVE);
 	all_constraints.reserve(CONSTRAINT_COUNT_RESERVE);
-
-	work_pool.init();
 }

 GodotStep3D::~GodotStep3D() {
-	work_pool.finish();
 }
--- a/servers/physics_3d/godot_step_3d.h
+++ b/servers/physics_3d/godot_step_3d.h
@ -33,8 +33,8 @@

 #include "godot_space_3d.h"

+#include "core/object/worker_thread_pool.h"
 #include "core/templates/local_vector.h"
-#include "core/templates/thread_work_pool.h"

 class GodotStep3D {
 	uint64_t _step = 1;
@ -42,8 +42,6 @@ class GodotStep3D {
 	int iterations = 0;
 	real_t delta = 0.0;

-	ThreadWorkPool work_pool;
-
 	LocalVector<LocalVector<GodotBody3D *>> body_islands;
 	LocalVector<LocalVector<GodotConstraint3D *>> constraint_islands;
 	LocalVector<GodotConstraint3D *> all_constraints;
--- a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp
+++ b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp
@ -775,7 +775,7 @@ void RenderForwardClustered::_render_list(RenderingDevice::DrawListID p_draw_lis

 void RenderForwardClustered::_render_list_thread_function(uint32_t p_thread, RenderListParameters *p_params) {
 	uint32_t render_total = p_params->element_count;
-	uint32_t total_threads = RendererThreadPool::singleton->thread_work_pool.get_thread_count();
+	uint32_t total_threads = WorkerThreadPool::get_singleton()->get_thread_count();
 	uint32_t render_from = p_thread * render_total / total_threads;
 	uint32_t render_to = (p_thread + 1 == total_threads) ? render_total : ((p_thread + 1) * render_total / total_threads);
 	_render_list(thread_draw_lists[p_thread], p_params->framebuffer_format, p_params, render_from, render_to);
@ -787,9 +787,10 @@ void RenderForwardClustered::_render_list_with_threads(RenderListParameters *p_p

 	if ((uint32_t)p_params->element_count > render_list_thread_threshold && false) { // secondary command buffers need more testing at this time
 		//multi threaded
-		thread_draw_lists.resize(RendererThreadPool::singleton->thread_work_pool.get_thread_count());
+		thread_draw_lists.resize(WorkerThreadPool::get_singleton()->get_thread_count());
 		RD::get_singleton()->draw_list_begin_split(p_framebuffer, thread_draw_lists.size(), thread_draw_lists.ptr(), p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, p_clear_color_values, p_clear_depth, p_clear_stencil, p_region, p_storage_textures);
-		RendererThreadPool::singleton->thread_work_pool.do_work(thread_draw_lists.size(), this, &RenderForwardClustered::_render_list_thread_function, p_params);
+		WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &RenderForwardClustered::_render_list_thread_function, p_params, thread_draw_lists.size(), -1, true, SNAME("ForwardClusteredRenderList"));
+		WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);
 		RD::get_singleton()->draw_list_end(p_params->barrier);
 	} else {
 		//single threaded
--- a/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp
+++ b/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp
@ -758,9 +758,12 @@ void RenderForwardMobile::_render_scene(RenderDataRD *p_render_data, const Color
 			if ((uint32_t)render_list_params.element_count > render_list_thread_threshold && false) {
 				// secondary command buffers need more testing at this time
 				//multi threaded
-				thread_draw_lists.resize(RendererThreadPool::singleton->thread_work_pool.get_thread_count());
+				thread_draw_lists.resize(WorkerThreadPool::get_singleton()->get_thread_count());
 				RD::get_singleton()->draw_list_begin_split(framebuffer, thread_draw_lists.size(), thread_draw_lists.ptr(), keep_color ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CLEAR, can_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, can_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, c, 1.0, 0);
-				RendererThreadPool::singleton->thread_work_pool.do_work(thread_draw_lists.size(), this, &RenderForwardMobile::_render_list_thread_function, &render_list_params);
+
+				WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &RenderForwardMobile::_render_list_thread_function, &render_list_params, thread_draw_lists.size(), -1, true, SNAME("ForwardMobileRenderList"));
+				WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);
+
 			} else {
 				//single threaded
 				RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(framebuffer, keep_color ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CLEAR, can_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, can_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, c, 1.0, 0);
@ -822,10 +825,12 @@ void RenderForwardMobile::_render_scene(RenderDataRD *p_render_data, const Color
 			if ((uint32_t)render_list_params.element_count > render_list_thread_threshold && false) {
 				// secondary command buffers need more testing at this time
 				//multi threaded
-				thread_draw_lists.resize(RendererThreadPool::singleton->thread_work_pool.get_thread_count());
+				thread_draw_lists.resize(WorkerThreadPool::get_singleton()->get_thread_count());
 				RD::get_singleton()->draw_list_switch_to_next_pass_split(thread_draw_lists.size(), thread_draw_lists.ptr());
 				render_list_params.subpass = RD::get_singleton()->draw_list_get_current_pass();
-				RendererThreadPool::singleton->thread_work_pool.do_work(thread_draw_lists.size(), this, &RenderForwardMobile::_render_list_thread_function, &render_list_params);
+				WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &RenderForwardMobile::_render_list_thread_function, &render_list_params, thread_draw_lists.size(), -1, true, SNAME("ForwardMobileRenderSubpass"));
+				WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);
+
 			} else {
 				//single threaded
 				RD::DrawListID draw_list = RD::get_singleton()->draw_list_switch_to_next_pass();
@ -859,9 +864,11 @@ void RenderForwardMobile::_render_scene(RenderDataRD *p_render_data, const Color
 			if ((uint32_t)render_list_params.element_count > render_list_thread_threshold && false) {
 				// secondary command buffers need more testing at this time
 				//multi threaded
-				thread_draw_lists.resize(RendererThreadPool::singleton->thread_work_pool.get_thread_count());
+				thread_draw_lists.resize(WorkerThreadPool::get_singleton()->get_thread_count());
 				RD::get_singleton()->draw_list_begin_split(framebuffer, thread_draw_lists.size(), thread_draw_lists.ptr(), can_continue_color ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, can_continue_depth ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ);
-				RendererThreadPool::singleton->thread_work_pool.do_work(thread_draw_lists.size(), this, &RenderForwardMobile::_render_list_thread_function, &render_list_params);
+				WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &RenderForwardMobile::_render_list_thread_function, &render_list_params, thread_draw_lists.size(), -1, true, SNAME("ForwardMobileRenderSubpass"));
+				WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);
+
 				RD::get_singleton()->draw_list_end(RD::BARRIER_MASK_ALL);
 			} else {
 				//single threaded
@ -1771,7 +1778,7 @@ void RenderForwardMobile::_render_list(RenderingDevice::DrawListID p_draw_list,

 void RenderForwardMobile::_render_list_thread_function(uint32_t p_thread, RenderListParameters *p_params) {
 	uint32_t render_total = p_params->element_count;
-	uint32_t total_threads = RendererThreadPool::singleton->thread_work_pool.get_thread_count();
+	uint32_t total_threads = WorkerThreadPool::get_singleton()->get_thread_count();
 	uint32_t render_from = p_thread * render_total / total_threads;
 	uint32_t render_to = (p_thread + 1 == total_threads) ? render_total : ((p_thread + 1) * render_total / total_threads);
 	_render_list(thread_draw_lists[p_thread], p_params->framebuffer_format, p_params, render_from, render_to);
@ -1783,9 +1790,11 @@ void RenderForwardMobile::_render_list_with_threads(RenderListParameters *p_para

 	if ((uint32_t)p_params->element_count > render_list_thread_threshold && false) { // secondary command buffers need more testing at this time
 		//multi threaded
-		thread_draw_lists.resize(RendererThreadPool::singleton->thread_work_pool.get_thread_count());
+		thread_draw_lists.resize(WorkerThreadPool::get_singleton()->get_thread_count());
 		RD::get_singleton()->draw_list_begin_split(p_framebuffer, thread_draw_lists.size(), thread_draw_lists.ptr(), p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, p_clear_color_values, p_clear_depth, p_clear_stencil, p_region, p_storage_textures);
-		RendererThreadPool::singleton->thread_work_pool.do_work(thread_draw_lists.size(), this, &RenderForwardMobile::_render_list_thread_function, p_params);
+		WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &RenderForwardMobile::_render_list_thread_function, p_params, thread_draw_lists.size(), -1, true, SNAME("ForwardMobileRenderSubpass"));
+		WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);
+
 		RD::get_singleton()->draw_list_end(p_params->barrier);
 	} else {
 		//single threaded
--- a/servers/rendering/renderer_rd/renderer_compositor_rd.h
+++ b/servers/rendering/renderer_rd/renderer_compositor_rd.h
@ -32,7 +32,6 @@
 #define RENDERER_COMPOSITOR_RD_H

 #include "core/os/os.h"
-#include "core/templates/thread_work_pool.h"
 #include "servers/rendering/renderer_compositor.h"
 #include "servers/rendering/renderer_rd/effects_rd.h"
 #include "servers/rendering/renderer_rd/environment/fog.h"
--- a/servers/rendering/renderer_rd/shader_rd.cpp
+++ b/servers/rendering/renderer_rd/shader_rd.cpp
@ -476,7 +476,9 @@ void ShaderRD::_compile_version(Version *p_version) {

 #if 1

-	RendererThreadPool::singleton->thread_work_pool.do_work(variant_defines.size(), this, &ShaderRD::_compile_variant, p_version);
+	WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &ShaderRD::_compile_variant, p_version, variant_defines.size(), -1, true, SNAME("ShaderCompilation"));
+	WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);
+
 #else
 	for (int i = 0; i < variant_defines.size(); i++) {
 		_compile_variant(i, p_version);
--- a/servers/rendering/renderer_scene_cull.cpp
+++ b/servers/rendering/renderer_scene_cull.cpp
@ -2518,14 +2518,14 @@ void RendererSceneCull::render_camera(RID p_render_buffers, RID p_camera, RID p_

 	RENDER_TIMESTAMP("Update Occlusion Buffer")
 	// For now just cull on the first camera
-	RendererSceneOcclusionCull::get_singleton()->buffer_update(p_viewport, camera_data.main_transform, camera_data.main_projection, camera_data.is_orthogonal, RendererThreadPool::singleton->thread_work_pool);
+	RendererSceneOcclusionCull::get_singleton()->buffer_update(p_viewport, camera_data.main_transform, camera_data.main_projection, camera_data.is_orthogonal);

 	_render_scene(&camera_data, p_render_buffers, environment, camera->effects, camera->visible_layers, p_scenario, p_viewport, p_shadow_atlas, RID(), -1, p_screen_mesh_lod_threshold, true, r_render_info);
 #endif
 }

 void RendererSceneCull::_visibility_cull_threaded(uint32_t p_thread, VisibilityCullData *cull_data) {
-	uint32_t total_threads = RendererThreadPool::singleton->thread_work_pool.get_thread_count();
+	uint32_t total_threads = WorkerThreadPool::get_singleton()->get_thread_count();
 	uint32_t bin_from = p_thread * cull_data->cull_count / total_threads;
 	uint32_t bin_to = (p_thread + 1 == total_threads) ? cull_data->cull_count : ((p_thread + 1) * cull_data->cull_count / total_threads);

@ -2622,7 +2622,7 @@ bool RendererSceneCull::_visibility_parent_check(const CullData &p_cull_data, co

 void RendererSceneCull::_scene_cull_threaded(uint32_t p_thread, CullData *cull_data) {
 	uint32_t cull_total = cull_data->scenario->instance_data.size();
-	uint32_t total_threads = RendererThreadPool::singleton->thread_work_pool.get_thread_count();
+	uint32_t total_threads = WorkerThreadPool::get_singleton()->get_thread_count();
 	uint32_t cull_from = p_thread * cull_total / total_threads;
 	uint32_t cull_to = (p_thread + 1 == total_threads) ? cull_total : ((p_thread + 1) * cull_total / total_threads);

@ -2919,7 +2919,8 @@ void RendererSceneCull::_render_scene(const RendererSceneRender::CameraData *p_c
 			}

 			if (visibility_cull_data.cull_count > thread_cull_threshold) {
-				RendererThreadPool::singleton->thread_work_pool.do_work(RendererThreadPool::singleton->thread_work_pool.get_thread_count(), this, &RendererSceneCull::_visibility_cull_threaded, &visibility_cull_data);
+				WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &RendererSceneCull::_visibility_cull_threaded, &visibility_cull_data, WorkerThreadPool::get_singleton()->get_thread_count(), -1, true, SNAME("VisibilityCullInstances"));
+				WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);
 			} else {
 				_visibility_cull(visibility_cull_data, visibility_cull_data.cull_offset, visibility_cull_data.cull_offset + visibility_cull_data.cull_count);
 			}
@ -3024,7 +3025,8 @@ void RendererSceneCull::_render_scene(const RendererSceneRender::CameraData *p_c
 				scene_cull_result_threads[i].clear();
 			}

-			RendererThreadPool::singleton->thread_work_pool.do_work(scene_cull_result_threads.size(), this, &RendererSceneCull::_scene_cull_threaded, &cull_data);
+			WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &RendererSceneCull::_scene_cull_threaded, &cull_data, scene_cull_result_threads.size(), -1, true, SNAME("RenderCullInstances"));
+			WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);

 			for (uint32_t i = 0; i < scene_cull_result_threads.size(); i++) {
 				scene_cull_result.append_from(scene_cull_result_threads[i]);
@ -4030,14 +4032,14 @@ RendererSceneCull::RendererSceneCull() {
 	}

 	scene_cull_result.init(&rid_cull_page_pool, &geometry_instance_cull_page_pool, &instance_cull_page_pool);
-	scene_cull_result_threads.resize(RendererThreadPool::singleton->thread_work_pool.get_thread_count());
+	scene_cull_result_threads.resize(WorkerThreadPool::get_singleton()->get_thread_count());
 	for (uint32_t i = 0; i < scene_cull_result_threads.size(); i++) {
 		scene_cull_result_threads[i].init(&rid_cull_page_pool, &geometry_instance_cull_page_pool, &instance_cull_page_pool);
 	}

 	indexer_update_iterations = GLOBAL_GET("rendering/limits/spatial_indexer/update_iterations_per_frame");
 	thread_cull_threshold = GLOBAL_GET("rendering/limits/spatial_indexer/threaded_cull_minimum_instances");
-	thread_cull_threshold = MAX(thread_cull_threshold, (uint32_t)RendererThreadPool::singleton->thread_work_pool.get_thread_count()); //make sure there is at least one thread per CPU
+	thread_cull_threshold = MAX(thread_cull_threshold, (uint32_t)WorkerThreadPool::get_singleton()->get_thread_count()); //make sure there is at least one thread per CPU

 	taa_jitter_array.resize(TAA_JITTER_COUNT);
 	for (int i = 0; i < TAA_JITTER_COUNT; i++) {
--- a/servers/rendering/renderer_scene_occlusion_cull.h
+++ b/servers/rendering/renderer_scene_occlusion_cull.h
@ -183,7 +183,8 @@ public:
 	}
 	virtual void buffer_set_scenario(RID p_buffer, RID p_scenario) { _print_warning(); }
 	virtual void buffer_set_size(RID p_buffer, const Vector2i &p_size) { _print_warning(); }
-	virtual void buffer_update(RID p_buffer, const Transform3D &p_cam_transform, const Projection &p_cam_projection, bool p_cam_orthogonal, ThreadWorkPool &p_thread_pool) {}
+	virtual void buffer_update(RID p_buffer, const Transform3D &p_cam_transform, const Projection &p_cam_projection, bool p_cam_orthogonal) {}
+
 	virtual RID buffer_get_debug_texture(RID p_buffer) {
 		_print_warning();
 		return RID();
--- a/servers/rendering/renderer_thread_pool.cpp
+++ b/servers/rendering/renderer_thread_pool.cpp
@ -1,42 +0,0 @@
-/*************************************************************************/
-/*  renderer_thread_pool.cpp                                             */
-/*************************************************************************/
-/*                       This file is part of:                           */
-/*                           GODOT ENGINE                                */
-/*                      https://godotengine.org                          */
-/*************************************************************************/
-/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
-/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
-/*                                                                       */
-/* Permission is hereby granted, free of charge, to any person obtaining */
-/* a copy of this software and associated documentation files (the       */
-/* "Software"), to deal in the Software without restriction, including   */
-/* without limitation the rights to use, copy, modify, merge, publish,   */
-/* distribute, sublicense, and/or sell copies of the Software, and to    */
-/* permit persons to whom the Software is furnished to do so, subject to */
-/* the following conditions:                                             */
-/*                                                                       */
-/* The above copyright notice and this permission notice shall be        */
-/* included in all copies or substantial portions of the Software.       */
-/*                                                                       */
-/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
-/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
-/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
-/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
-/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
-/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
-/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
-/*************************************************************************/
-
-#include "renderer_thread_pool.h"
-
-RendererThreadPool *RendererThreadPool::singleton = nullptr;
-
-RendererThreadPool::RendererThreadPool() {
-	singleton = this;
-	thread_work_pool.init();
-}
-
-RendererThreadPool::~RendererThreadPool() {
-	thread_work_pool.finish();
-}
--- a/servers/rendering/renderer_thread_pool.h
+++ b/servers/rendering/renderer_thread_pool.h
@ -1,45 +0,0 @@
-/*************************************************************************/
-/*  renderer_thread_pool.h                                               */
-/*************************************************************************/
-/*                       This file is part of:                           */
-/*                           GODOT ENGINE                                */
-/*                      https://godotengine.org                          */
-/*************************************************************************/
-/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
-/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
-/*                                                                       */
-/* Permission is hereby granted, free of charge, to any person obtaining */
-/* a copy of this software and associated documentation files (the       */
-/* "Software"), to deal in the Software without restriction, including   */
-/* without limitation the rights to use, copy, modify, merge, publish,   */
-/* distribute, sublicense, and/or sell copies of the Software, and to    */
-/* permit persons to whom the Software is furnished to do so, subject to */
-/* the following conditions:                                             */
-/*                                                                       */
-/* The above copyright notice and this permission notice shall be        */
-/* included in all copies or substantial portions of the Software.       */
-/*                                                                       */
-/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
-/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
-/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
-/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
-/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
-/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
-/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
-/*************************************************************************/
-
-#ifndef RENDERER_THREAD_POOL_H
-#define RENDERER_THREAD_POOL_H
-
-#include "core/templates/thread_work_pool.h"
-
-class RendererThreadPool {
-public:
-	ThreadWorkPool thread_work_pool;
-
-	static RendererThreadPool *singleton;
-	RendererThreadPool();
-	~RendererThreadPool();
-};
-
-#endif // RENDERER_THREAD_POOL_H
--- a/servers/rendering/renderer_viewport.cpp
+++ b/servers/rendering/renderer_viewport.cpp
@ -154,7 +154,7 @@ void RendererViewport::_draw_3d(Viewport *p_viewport) {
 	if (p_viewport->use_occlusion_culling) {
 		if (p_viewport->occlusion_buffer_dirty) {
 			float aspect = p_viewport->size.aspect();
-			int max_size = occlusion_rays_per_thread * RendererThreadPool::singleton->thread_work_pool.get_thread_count();
+			int max_size = occlusion_rays_per_thread * WorkerThreadPool::get_singleton()->get_thread_count();

 			int viewport_size = p_viewport->size.width * p_viewport->size.height;
 			max_size = CLAMP(max_size, viewport_size / (32 * 32), viewport_size / (2 * 2)); // At least one depth pixel for every 16x16 region. At most one depth pixel for every 2x2 region.
--- a/servers/rendering_server.cpp
+++ b/servers/rendering_server.cpp
@ -2831,7 +2831,6 @@ void RenderingServer::set_render_loop_enabled(bool p_enabled) {
 RenderingServer::RenderingServer() {
 	//ERR_FAIL_COND(singleton);

-	thread_pool = memnew(RendererThreadPool);
 	singleton = this;
 }

@ -3032,6 +3031,5 @@ void RenderingServer::init() {
 }

 RenderingServer::~RenderingServer() {
-	memdelete(thread_pool);
 	singleton = nullptr;
 }
--- a/servers/rendering_server.h
+++ b/servers/rendering_server.h
@ -35,11 +35,11 @@
 #include "core/math/geometry_3d.h"
 #include "core/math/transform_2d.h"
 #include "core/object/class_db.h"
+#include "core/object/worker_thread_pool.h"
 #include "core/templates/rid.h"
 #include "core/variant/typed_array.h"
 #include "core/variant/variant.h"
 #include "servers/display_server.h"
-#include "servers/rendering/renderer_thread_pool.h"
 #include "servers/rendering/rendering_device.h"

 class RenderingServer : public Object {
@ -52,8 +52,6 @@ class RenderingServer : public Object {

 	Array _get_array_from_surface(uint32_t p_format, Vector<uint8_t> p_vertex_data, Vector<uint8_t> p_attrib_data, Vector<uint8_t> p_skin_data, int p_vertex_len, Vector<uint8_t> p_index_data, int p_index_len) const;

-	RendererThreadPool *thread_pool = nullptr;
-
 	const Vector2 SMALL_VEC2 = Vector2(CMP_EPSILON, CMP_EPSILON);
 	const Vector3 SMALL_VEC3 = Vector3(CMP_EPSILON, CMP_EPSILON, CMP_EPSILON);