diff --git a/core/object/worker_thread_pool.h b/core/object/worker_thread_pool.h index 9fe8497eafe5..d4d93877658c 100644 --- a/core/object/worker_thread_pool.h +++ b/core/object/worker_thread_pool.h @@ -202,25 +202,4 @@ public: ~WorkerThreadPool(); }; -template -static _FORCE_INLINE_ void for_range(int i_begin, int i_end, bool parallel, String name, F f) { - if (!parallel) { - for (int i = i_begin; i < i_end; i++) { - f(i); - } - return; - } - - auto wrapper = [&](int i, void *unused) { - f(i + i_begin); - }; - - WorkerThreadPool *wtp = WorkerThreadPool::get_singleton(); - WorkerThreadPool::GroupID gid = wtp->add_template_group_task( - &wrapper, &decltype(wrapper)::operator(), nullptr, - i_end - i_begin, -1, - true, name); - wtp->wait_for_group_task_completion(gid); -} - #endif // WORKER_THREAD_POOL_H diff --git a/modules/raycast/raycast_occlusion_cull.cpp b/modules/raycast/raycast_occlusion_cull.cpp index eee0de967eeb..69fbf8748348 100644 --- a/modules/raycast/raycast_occlusion_cull.cpp +++ b/modules/raycast/raycast_occlusion_cull.cpp @@ -355,14 +355,41 @@ void RaycastOcclusionCull::Scenario::_update_dirty_instance(int p_idx, RID *p_in // Embree requires the last element to be readable by a 16-byte SSE load instruction, so we add padding to be safe. occ_inst->xformed_vertices.resize(vertices_size + 1); - for_range(0, vertices_size, vertices_size > 1024, SNAME("RaycastOcclusionCull"), [&](const int i) { - occ_inst->xformed_vertices[i] = occ_inst->xform.xform(occ->vertices[i]); - }); + const Vector3 *read_ptr = occ->vertices.ptr(); + Vector3 *write_ptr = occ_inst->xformed_vertices.ptr(); + + if (vertices_size > 1024) { + TransformThreadData td; + td.xform = occ_inst->xform; + td.read = read_ptr; + td.write = write_ptr; + td.vertex_count = vertices_size; + td.thread_count = WorkerThreadPool::get_singleton()->get_thread_count(); + WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &Scenario::_transform_vertices_thread, &td, td.thread_count, -1, true, SNAME("RaycastOcclusionCull")); + WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task); + + } else { + _transform_vertices_range(read_ptr, write_ptr, occ_inst->xform, 0, vertices_size); + } occ_inst->indices.resize(occ->indices.size()); memcpy(occ_inst->indices.ptr(), occ->indices.ptr(), occ->indices.size() * sizeof(int32_t)); } +void RaycastOcclusionCull::Scenario::_transform_vertices_thread(uint32_t p_thread, TransformThreadData *p_data) { + uint32_t vertex_total = p_data->vertex_count; + uint32_t total_threads = p_data->thread_count; + uint32_t from = p_thread * vertex_total / total_threads; + uint32_t to = (p_thread + 1 == total_threads) ? vertex_total : ((p_thread + 1) * vertex_total / total_threads); + _transform_vertices_range(p_data->read, p_data->write, p_data->xform, from, to); +} + +void RaycastOcclusionCull::Scenario::_transform_vertices_range(const Vector3 *p_read, Vector3 *p_write, const Transform3D &p_xform, int p_from, int p_to) { + for (int i = p_from; i < p_to; i++) { + p_write[i] = p_xform.xform(p_read[i]); + } +} + void RaycastOcclusionCull::Scenario::_commit_scene(void *p_ud) { Scenario *scenario = (Scenario *)p_ud; int commit_idx = 1 - (scenario->current_scene_idx); diff --git a/modules/raycast/raycast_occlusion_cull.h b/modules/raycast/raycast_occlusion_cull.h index 7a5346878bc8..c4e733b66459 100644 --- a/modules/raycast/raycast_occlusion_cull.h +++ b/modules/raycast/raycast_occlusion_cull.h @@ -121,6 +121,14 @@ private: const uint32_t *masks; }; + struct TransformThreadData { + uint32_t thread_count; + uint32_t vertex_count; + Transform3D xform; + const Vector3 *read; + Vector3 *write = nullptr; + }; + Thread *commit_thread = nullptr; bool commit_done = true; bool dirty = false; @@ -136,6 +144,8 @@ private: void _update_dirty_instance_thread(int p_idx, RID *p_instances); void _update_dirty_instance(int p_idx, RID *p_instances); + void _transform_vertices_thread(uint32_t p_thread, TransformThreadData *p_data); + void _transform_vertices_range(const Vector3 *p_read, Vector3 *p_write, const Transform3D &p_xform, int p_from, int p_to); static void _commit_scene(void *p_ud); bool update(); diff --git a/tests/core/threads/test_worker_thread_pool.h b/tests/core/threads/test_worker_thread_pool.h index ef0b475715b4..e9a762b57bb9 100644 --- a/tests/core/threads/test_worker_thread_pool.h +++ b/tests/core/threads/test_worker_thread_pool.h @@ -106,32 +106,6 @@ TEST_CASE("[WorkerThreadPool] Process elements using group tasks") { } } -TEST_CASE("[WorkerThreadPool] Parallel foreach") { - const int count_max = 256; - - for (int midpoint = 0; midpoint < count_max; midpoint++) { - LocalVector c; - c.resize(count_max); - - for_range(0, count_max, true, String(), [&](int i) { - c[i] = 1; - }); - c.sort(); - CHECK(c[0] == 1); - CHECK(c[0] == c[count_max - 1]); - - for_range(0, midpoint, false, String(), [&](int i) { - c[i]++; - }); - for_range(midpoint, count_max, true, String(), [&](int i) { - c[i]++; - }); - c.sort(); - CHECK(c[0] == 2); - CHECK(c[0] == c[count_max - 1]); - } -} - } // namespace TestWorkerThreadPool #endif // TEST_WORKER_THREAD_POOL_H