From fecd22d2aef0afb0436ed53b75f32cb9329af1d3 Mon Sep 17 00:00:00 2001 From: Alexandre Julliard Date: Wed, 29 May 2024 22:54:58 +0200 Subject: [PATCH] vkd3d: Import upstream release 1.12. --- dlls/d3d10/tests/effect.c | 1 - dlls/d3dcompiler_43/tests/hlsl_d3d9.c | 6 +- libs/vkd3d/AUTHORS | 2 +- libs/vkd3d/Makefile.in | 1 + libs/vkd3d/config.h | 4 +- libs/vkd3d/include/private/vkd3d_common.h | 322 ++ libs/vkd3d/include/private/vkd3d_memory.h | 11 +- libs/vkd3d/include/vkd3d.h | 205 +- libs/vkd3d/include/vkd3d_shader.h | 76 +- libs/vkd3d/include/vkd3d_types.h | 8 + libs/vkd3d/libs/vkd3d-common/blob.c | 5 +- libs/vkd3d/libs/vkd3d-common/debug.c | 2 +- libs/vkd3d/libs/vkd3d-common/error.c | 7 +- libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 1198 ++++--- libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 360 +- libs/vkd3d/libs/vkd3d-shader/dxbc.c | 47 +- libs/vkd3d/libs/vkd3d-shader/dxil.c | 2449 ++++++++++++- libs/vkd3d/libs/vkd3d-shader/fx.c | 996 +++++- libs/vkd3d/libs/vkd3d-shader/glsl.c | 109 +- libs/vkd3d/libs/vkd3d-shader/hlsl.c | 729 ++-- libs/vkd3d/libs/vkd3d-shader/hlsl.h | 160 +- libs/vkd3d/libs/vkd3d-shader/hlsl.l | 9 +- libs/vkd3d/libs/vkd3d-shader/hlsl.y | 910 +++-- libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 887 +++-- .../libs/vkd3d-shader/hlsl_constant_ops.c | 317 +- libs/vkd3d/libs/vkd3d-shader/ir.c | 3162 ++++++++++++++--- libs/vkd3d/libs/vkd3d-shader/preproc.l | 3 +- libs/vkd3d/libs/vkd3d-shader/spirv.c | 729 +++- libs/vkd3d/libs/vkd3d-shader/tpf.c | 417 +-- .../libs/vkd3d-shader/vkd3d_shader_main.c | 340 +- .../libs/vkd3d-shader/vkd3d_shader_private.h | 192 +- libs/vkd3d/libs/vkd3d/cache.c | 254 ++ libs/vkd3d/libs/vkd3d/command.c | 41 +- libs/vkd3d/libs/vkd3d/device.c | 1062 ++++-- libs/vkd3d/libs/vkd3d/resource.c | 51 +- libs/vkd3d/libs/vkd3d/state.c | 42 +- libs/vkd3d/libs/vkd3d/utils.c | 34 + libs/vkd3d/libs/vkd3d/vkd3d_main.c | 22 +- libs/vkd3d/libs/vkd3d/vkd3d_private.h | 268 +- 39 files changed, 12000 insertions(+), 3438 deletions(-) create mode 100644 libs/vkd3d/libs/vkd3d/cache.c diff --git a/dlls/d3d10/tests/effect.c b/dlls/d3d10/tests/effect.c index 94f046ea479..534ea00fa17 100644 --- a/dlls/d3d10/tests/effect.c +++ b/dlls/d3d10/tests/effect.c @@ -9941,7 +9941,6 @@ static void test_effect_compiler(void) cb = effect->lpVtbl->GetConstantBufferByIndex(effect, 1); hr = cb->lpVtbl->GetDesc(cb, &var_desc); - todo_wine ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); if (hr == S_OK) ok(!strcmp(var_desc.Name, "cb1"), "Unexpected variable name %s.\n", var_desc.Name); diff --git a/dlls/d3dcompiler_43/tests/hlsl_d3d9.c b/dlls/d3dcompiler_43/tests/hlsl_d3d9.c index a6ecb6a37ee..e736f932b39 100644 --- a/dlls/d3dcompiler_43/tests/hlsl_d3d9.c +++ b/dlls/d3dcompiler_43/tests/hlsl_d3d9.c @@ -586,7 +586,7 @@ static void test_conditionals(void) ID3D10Blob_Release(ps_code); } - todo_wine ps_code = compile_shader(ps_ternary_source, "ps_2_0", 0); + ps_code = compile_shader(ps_ternary_source, "ps_2_0", 0); if (ps_code) { draw_quad(device, ps_code); @@ -595,7 +595,6 @@ static void test_conditionals(void) for (i = 0; i < 320; i += 40) { v = get_readback_vec4(&rb, i, 0); - todo_wine ok(compare_vec4(v, 0.5f, 0.25f, 0.5f, 0.75f, 0), "Got unexpected value {%.8e, %.8e, %.8e, %.8e}.\n", v->x, v->y, v->z, v->w); } @@ -603,7 +602,6 @@ static void test_conditionals(void) for (i = 360; i < 640; i += 40) { v = get_readback_vec4(&rb, i, 0); - todo_wine ok(compare_vec4(v, 0.6f, 0.8f, 0.1f, 0.2f, 0), "Got unexpected value {%.8e, %.8e, %.8e, %.8e}.\n", v->x, v->y, v->z, v->w); } @@ -662,7 +660,6 @@ static void test_float_vectors(void) ID3D10Blob_Release(ps_code); } - todo_wine ps_code = compile_shader(ps_uniform_indexing_source, "ps_2_0", 0); if (ps_code) { @@ -674,7 +671,6 @@ static void test_float_vectors(void) draw_quad(device, ps_code); v = get_color_vec4(device, 0, 0); - todo_wine ok(compare_vec4(&v, 0.5f, 0.3f, 0.8f, 0.2f, 0), "Got unexpected value {%.8e, %.8e, %.8e, %.8e}.\n", v.x, v.y, v.z, v.w); diff --git a/libs/vkd3d/AUTHORS b/libs/vkd3d/AUTHORS index 2bd9d4e7514..b26c2819c30 100644 --- a/libs/vkd3d/AUTHORS +++ b/libs/vkd3d/AUTHORS @@ -10,6 +10,7 @@ Chip Davis Conor McCarthy David Gow Derek Lesho +Elizabeth Figura Ethan Lee Evan Tang Fabian Maurer @@ -36,5 +37,4 @@ Stefan Dösinger Sven Hesse Victor Chiletto Vinson Lee -Zebediah Figura Zhiyi Zhang diff --git a/libs/vkd3d/Makefile.in b/libs/vkd3d/Makefile.in index 448e9a0e61d..94e4833dc9a 100644 --- a/libs/vkd3d/Makefile.in +++ b/libs/vkd3d/Makefile.in @@ -30,6 +30,7 @@ SOURCES = \ libs/vkd3d-shader/spirv.c \ libs/vkd3d-shader/tpf.c \ libs/vkd3d-shader/vkd3d_shader_main.c \ + libs/vkd3d/cache.c \ libs/vkd3d/command.c \ libs/vkd3d/device.c \ libs/vkd3d/resource.c \ diff --git a/libs/vkd3d/config.h b/libs/vkd3d/config.h index 78afdec2e0a..f0f4ca68c5c 100644 --- a/libs/vkd3d/config.h +++ b/libs/vkd3d/config.h @@ -1,5 +1,5 @@ #define PACKAGE_NAME "vkd3d" -#define PACKAGE_STRING "vkd3d 1.11" -#define PACKAGE_VERSION "1.11" +#define PACKAGE_STRING "vkd3d 1.12" +#define PACKAGE_VERSION "1.12" #define PATH_MAX 1024 #define SONAME_LIBVULKAN "vulkan-1.dll" diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h index 6a3b530d868..a9d709d10fe 100644 --- a/libs/vkd3d/include/private/vkd3d_common.h +++ b/libs/vkd3d/include/private/vkd3d_common.h @@ -30,6 +30,9 @@ #include #include #include +#ifndef _WIN32 +#include +#endif #ifdef _MSC_VER #include @@ -72,6 +75,8 @@ #define TAG_XNAP VKD3D_MAKE_TAG('X', 'N', 'A', 'P') #define TAG_XNAS VKD3D_MAKE_TAG('X', 'N', 'A', 'S') +#define TAG_RD11_REVERSE 0x25441313 + static inline uint64_t align(uint64_t addr, size_t alignment) { return (addr + (alignment - 1)) & ~(alignment - 1); @@ -105,6 +110,125 @@ VKD3D_NORETURN static inline void vkd3d_unreachable_(const char *filename, unsig #define vkd3d_unreachable() vkd3d_unreachable_(__FILE__, __LINE__) #endif +#ifdef VKD3D_NO_TRACE_MESSAGES +#define TRACE(args...) do { } while (0) +#define TRACE_ON() (false) +#endif + +#ifdef VKD3D_NO_DEBUG_MESSAGES +#define WARN(args...) do { } while (0) +#define FIXME(args...) do { } while (0) +#endif + +enum vkd3d_dbg_level +{ + VKD3D_DBG_LEVEL_NONE, + VKD3D_DBG_LEVEL_ERR, + VKD3D_DBG_LEVEL_FIXME, + VKD3D_DBG_LEVEL_WARN, + VKD3D_DBG_LEVEL_TRACE, +}; + +enum vkd3d_dbg_level vkd3d_dbg_get_level(void); + +void vkd3d_dbg_printf(enum vkd3d_dbg_level level, const char *function, const char *fmt, ...) VKD3D_PRINTF_FUNC(3, 4); +void vkd3d_dbg_set_log_callback(PFN_vkd3d_log callback); + +const char *vkd3d_dbg_sprintf(const char *fmt, ...) VKD3D_PRINTF_FUNC(1, 2); +const char *vkd3d_dbg_vsprintf(const char *fmt, va_list args); +const char *debugstr_a(const char *str); +const char *debugstr_an(const char *str, size_t n); +const char *debugstr_w(const WCHAR *wstr, size_t wchar_size); + +#define VKD3D_DBG_LOG(level) \ + do { \ + const enum vkd3d_dbg_level vkd3d_dbg_level = VKD3D_DBG_LEVEL_##level; \ + VKD3D_DBG_PRINTF + +#define VKD3D_DBG_LOG_ONCE(first_time_level, level) \ + do { \ + static bool vkd3d_dbg_next_time; \ + const enum vkd3d_dbg_level vkd3d_dbg_level = vkd3d_dbg_next_time \ + ? VKD3D_DBG_LEVEL_##level : VKD3D_DBG_LEVEL_##first_time_level; \ + vkd3d_dbg_next_time = true; \ + VKD3D_DBG_PRINTF + +#define VKD3D_DBG_PRINTF(...) \ + vkd3d_dbg_printf(vkd3d_dbg_level, __FUNCTION__, __VA_ARGS__); } while (0) + +#ifndef TRACE +#define TRACE VKD3D_DBG_LOG(TRACE) +#endif + +#ifndef WARN +#define WARN VKD3D_DBG_LOG(WARN) +#endif + +#ifndef FIXME +#define FIXME VKD3D_DBG_LOG(FIXME) +#endif + +#define ERR VKD3D_DBG_LOG(ERR) + +#ifndef TRACE_ON +#define TRACE_ON() (vkd3d_dbg_get_level() == VKD3D_DBG_LEVEL_TRACE) +#endif + +#ifndef WARN_ON +#define WARN_ON() (vkd3d_dbg_get_level() >= VKD3D_DBG_LEVEL_WARN) +#endif + +#define FIXME_ONCE VKD3D_DBG_LOG_ONCE(FIXME, WARN) + +#define VKD3D_DEBUG_ENV_NAME(name) const char *const vkd3d_dbg_env_name = name + +static inline const char *debugstr_guid(const GUID *guid) +{ + if (!guid) + return "(null)"; + + return vkd3d_dbg_sprintf("{%08lx-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x}", + (unsigned long)guid->Data1, guid->Data2, guid->Data3, guid->Data4[0], + guid->Data4[1], guid->Data4[2], guid->Data4[3], guid->Data4[4], + guid->Data4[5], guid->Data4[6], guid->Data4[7]); +} + +static inline const char *debugstr_hresult(HRESULT hr) +{ + switch (hr) + { +#define TO_STR(u) case u: return #u; + TO_STR(S_OK) + TO_STR(S_FALSE) + TO_STR(E_NOTIMPL) + TO_STR(E_NOINTERFACE) + TO_STR(E_POINTER) + TO_STR(E_ABORT) + TO_STR(E_FAIL) + TO_STR(E_OUTOFMEMORY) + TO_STR(E_INVALIDARG) + TO_STR(DXGI_ERROR_NOT_FOUND) + TO_STR(DXGI_ERROR_MORE_DATA) + TO_STR(DXGI_ERROR_UNSUPPORTED) +#undef TO_STR + default: + return vkd3d_dbg_sprintf("%#x", (int)hr); + } +} + +unsigned int vkd3d_env_var_as_uint(const char *name, unsigned int default_value); + +struct vkd3d_debug_option +{ + const char *name; + uint64_t flag; +}; + +bool vkd3d_debug_list_has_member(const char *string, const char *member); +uint64_t vkd3d_parse_debug_options(const char *string, + const struct vkd3d_debug_option *options, unsigned int option_count); +void vkd3d_set_thread_name(const char *name); + static inline unsigned int vkd3d_popcount(unsigned int v) { #ifdef _MSC_VER @@ -216,6 +340,13 @@ static inline int vkd3d_u32_compare(uint32_t x, uint32_t y) return (x > y) - (x < y); } +static inline int vkd3d_u64_compare(uint64_t x, uint64_t y) +{ + return (x > y) - (x < y); +} + +#define VKD3D_BITMAP_SIZE(x) (((x) + 0x1f) >> 5) + static inline bool bitmap_clear(uint32_t *map, unsigned int idx) { return map[idx >> 5] &= ~(1u << (idx & 0x1f)); @@ -305,6 +436,197 @@ static inline uint32_t vkd3d_atomic_increment_u32(uint32_t volatile *x) return vkd3d_atomic_add_fetch_u32(x, 1); } +static inline bool vkd3d_atomic_compare_exchange_u32(uint32_t volatile *x, uint32_t expected, uint32_t val) +{ +#if HAVE_SYNC_BOOL_COMPARE_AND_SWAP + return __sync_bool_compare_and_swap(x, expected, val); +#elif defined(_WIN32) + return InterlockedCompareExchange((LONG *)x, val, expected) == expected; +#else +# error "vkd3d_atomic_compare_exchange_u32() not implemented for this platform" +#endif +} + +static inline bool vkd3d_atomic_compare_exchange_ptr(void * volatile *x, void *expected, void *val) +{ +#if HAVE_SYNC_BOOL_COMPARE_AND_SWAP + return __sync_bool_compare_and_swap(x, expected, val); +#elif defined(_WIN32) + return InterlockedCompareExchangePointer(x, val, expected) == expected; +#else +# error "vkd3d_atomic_compare_exchange_ptr() not implemented for this platform" +#endif +} + +static inline uint32_t vkd3d_atomic_exchange_u32(uint32_t volatile *x, uint32_t val) +{ +#if HAVE_ATOMIC_EXCHANGE_N + return __atomic_exchange_n(x, val, __ATOMIC_SEQ_CST); +#elif defined(_WIN32) + return InterlockedExchange((LONG *)x, val); +#else + uint32_t expected; + + do + { + expected = *x; + } while (!vkd3d_atomic_compare_exchange_u32(x, expected, val)); + + return expected; +#endif +} + +static inline void *vkd3d_atomic_exchange_ptr(void * volatile *x, void *val) +{ +#if HAVE_ATOMIC_EXCHANGE_N + return __atomic_exchange_n(x, val, __ATOMIC_SEQ_CST); +#elif defined(_WIN32) + return InterlockedExchangePointer(x, val); +#else + void *expected; + + do + { + expected = *x; + } while (!vkd3d_atomic_compare_exchange_ptr(x, expected, val)); + + return expected; +#endif +} + +struct vkd3d_mutex +{ +#ifdef _WIN32 + CRITICAL_SECTION lock; +#else + pthread_mutex_t lock; +#endif +}; + +#ifdef _WIN32 +#define VKD3D_MUTEX_INITIALIZER {{NULL, -1, 0, 0, 0, 0}} +#else +#define VKD3D_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER +#endif + +static inline void vkd3d_mutex_init(struct vkd3d_mutex *lock) +{ +#ifdef _WIN32 + InitializeCriticalSection(&lock->lock); +#else + int ret; + + if ((ret = pthread_mutex_init(&lock->lock, NULL))) + ERR("Failed to initialise the mutex, ret %d.\n", ret); +#endif +} + +static inline void vkd3d_mutex_lock(struct vkd3d_mutex *lock) +{ +#ifdef _WIN32 + EnterCriticalSection(&lock->lock); +#else + int ret; + + if ((ret = pthread_mutex_lock(&lock->lock))) + ERR("Failed to lock the mutex, ret %d.\n", ret); +#endif +} + +static inline void vkd3d_mutex_unlock(struct vkd3d_mutex *lock) +{ +#ifdef _WIN32 + LeaveCriticalSection(&lock->lock); +#else + int ret; + + if ((ret = pthread_mutex_unlock(&lock->lock))) + ERR("Failed to unlock the mutex, ret %d.\n", ret); +#endif +} + +static inline void vkd3d_mutex_destroy(struct vkd3d_mutex *lock) +{ +#ifdef _WIN32 + DeleteCriticalSection(&lock->lock); +#else + int ret; + + if ((ret = pthread_mutex_destroy(&lock->lock))) + ERR("Failed to destroy the mutex, ret %d.\n", ret); +#endif +} + +struct vkd3d_cond +{ +#ifdef _WIN32 + CONDITION_VARIABLE cond; +#else + pthread_cond_t cond; +#endif +}; + +static inline void vkd3d_cond_init(struct vkd3d_cond *cond) +{ +#ifdef _WIN32 + InitializeConditionVariable(&cond->cond); +#else + int ret; + + if ((ret = pthread_cond_init(&cond->cond, NULL))) + ERR("Failed to initialise the condition variable, ret %d.\n", ret); +#endif +} + +static inline void vkd3d_cond_signal(struct vkd3d_cond *cond) +{ +#ifdef _WIN32 + WakeConditionVariable(&cond->cond); +#else + int ret; + + if ((ret = pthread_cond_signal(&cond->cond))) + ERR("Failed to signal the condition variable, ret %d.\n", ret); +#endif +} + +static inline void vkd3d_cond_broadcast(struct vkd3d_cond *cond) +{ +#ifdef _WIN32 + WakeAllConditionVariable(&cond->cond); +#else + int ret; + + if ((ret = pthread_cond_broadcast(&cond->cond))) + ERR("Failed to broadcast the condition variable, ret %d.\n", ret); +#endif +} + +static inline void vkd3d_cond_wait(struct vkd3d_cond *cond, struct vkd3d_mutex *lock) +{ +#ifdef _WIN32 + if (!SleepConditionVariableCS(&cond->cond, &lock->lock, INFINITE)) + ERR("Failed to wait on the condition variable, error %lu.\n", GetLastError()); +#else + int ret; + + if ((ret = pthread_cond_wait(&cond->cond, &lock->lock))) + ERR("Failed to wait on the condition variable, ret %d.\n", ret); +#endif +} + +static inline void vkd3d_cond_destroy(struct vkd3d_cond *cond) +{ +#ifdef _WIN32 + /* Nothing to do. */ +#else + int ret; + + if ((ret = pthread_cond_destroy(&cond->cond))) + ERR("Failed to destroy the condition variable, ret %d.\n", ret); +#endif +} + static inline void vkd3d_parse_version(const char *version, int *major, int *minor) { *major = atoi(version); diff --git a/libs/vkd3d/include/private/vkd3d_memory.h b/libs/vkd3d/include/private/vkd3d_memory.h index 8a2edb1000d..682d35c03c6 100644 --- a/libs/vkd3d/include/private/vkd3d_memory.h +++ b/libs/vkd3d/include/private/vkd3d_memory.h @@ -24,7 +24,7 @@ #include #include -#include "vkd3d_debug.h" +#include "vkd3d_common.h" static inline void *vkd3d_malloc(size_t size) { @@ -65,6 +65,15 @@ static inline char *vkd3d_strdup(const char *string) return ptr; } +static inline void *vkd3d_memdup(const void *mem, size_t size) +{ + void *ptr; + + if ((ptr = vkd3d_malloc(size))) + memcpy(ptr, mem, size); + return ptr; +} + bool vkd3d_array_reserve(void **elements, size_t *capacity, size_t element_count, size_t element_size); #endif /* __VKD3D_MEMORY_H */ diff --git a/libs/vkd3d/include/vkd3d.h b/libs/vkd3d/include/vkd3d.h index aa68b70e1bf..e18f683e67d 100644 --- a/libs/vkd3d/include/vkd3d.h +++ b/libs/vkd3d/include/vkd3d.h @@ -46,21 +46,37 @@ extern "C" { * \since 1.0 */ +/** The type of a chained structure. */ enum vkd3d_structure_type { - /* 1.0 */ + /** The structure is a vkd3d_instance_create_info structure. */ VKD3D_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, + /** The structure is a vkd3d_device_create_info structure. */ VKD3D_STRUCTURE_TYPE_DEVICE_CREATE_INFO, + /** The structure is a vkd3d_image_resource_create_info structure. */ VKD3D_STRUCTURE_TYPE_IMAGE_RESOURCE_CREATE_INFO, - /* 1.1 */ + /** + * The structure is a vkd3d_optional_instance_extensions_info structure. + * \since 1.1 + */ VKD3D_STRUCTURE_TYPE_OPTIONAL_INSTANCE_EXTENSIONS_INFO, - /* 1.2 */ + /** + * The structure is a vkd3d_optional_device_extensions_info structure. + * \since 1.2 + */ VKD3D_STRUCTURE_TYPE_OPTIONAL_DEVICE_EXTENSIONS_INFO, + /** + * The structure is a vkd3d_application_info structure. + * \since 1.2 + */ VKD3D_STRUCTURE_TYPE_APPLICATION_INFO, - /* 1.3 */ + /** + * The structure is a vkd3d_host_time_domain_info structure. + * \since 1.3 + */ VKD3D_STRUCTURE_TYPE_HOST_TIME_DOMAIN_INFO, VKD3D_FORCE_32_BIT_ENUM(VKD3D_STRUCTURE_TYPE), @@ -80,6 +96,7 @@ enum vkd3d_api_version VKD3D_API_VERSION_1_9, VKD3D_API_VERSION_1_10, VKD3D_API_VERSION_1_11, + VKD3D_API_VERSION_1_12, VKD3D_FORCE_32_BIT_ENUM(VKD3D_API_VERSION), }; @@ -93,98 +110,262 @@ typedef HRESULT (*PFN_vkd3d_join_thread)(void *thread); struct vkd3d_instance; +/** + * A chained structure containing instance creation parameters. + */ struct vkd3d_instance_create_info { + /** Must be set to VKD3D_STRUCTURE_TYPE_INSTANCE_CREATE_INFO. */ enum vkd3d_structure_type type; + /** Optional pointer to a structure containing further parameters. */ const void *next; + /** An pointer to a function to signal events. */ PFN_vkd3d_signal_event pfn_signal_event; + /** + * An optional pointer to a function to create threads. If this is NULL vkd3d will use a + * function of its choice, depending on the platform. It must be NULL if and only if + * pfn_join_thread is NULL. + */ PFN_vkd3d_create_thread pfn_create_thread; + /** + * An optional pointer to a function to join threads. If this is NULL vkd3d will use a + * function of its choice, depending on the platform. It must be NULL if and only if + * pfn_create_thread is NULL. + */ PFN_vkd3d_join_thread pfn_join_thread; + /** The size of type WCHAR. It must be 2 or 4 and should normally be set to sizeof(WCHAR). */ size_t wchar_size; - /* If set to NULL, libvkd3d loads libvulkan. */ + /** + * A pointer to the vkGetInstanceProcAddr Vulkan function, which will be used to load all the + * other Vulkan functions. If set to NULL, vkd3d will search and use the Vulkan loader. + */ PFN_vkGetInstanceProcAddr pfn_vkGetInstanceProcAddr; + /** + * A list of Vulkan instance extensions to request. They are intended as required, so instance + * creation will fail if any of them is not available. + */ const char * const *instance_extensions; + /** The number of elements in the instance_extensions array. */ uint32_t instance_extension_count; }; -/* Extends vkd3d_instance_create_info. Available since 1.1. */ +/** + * A chained structure to specify optional instance extensions. + * + * This structure extends vkd3d_instance_create_info. + * + * \since 1.1 + */ struct vkd3d_optional_instance_extensions_info { + /** Must be set to VKD3D_STRUCTURE_TYPE_OPTIONAL_INSTANCE_EXTENSIONS_INFO. */ enum vkd3d_structure_type type; + /** Optional pointer to a structure containing further parameters. */ const void *next; + /** + * A list of optional Vulkan instance extensions to request. Instance creation does not fail if + * they are not available. + */ const char * const *extensions; + /** The number of elements in the extensions array. */ uint32_t extension_count; }; -/* Extends vkd3d_instance_create_info. Available since 1.2. */ +/** + * A chained structure to specify application information. + * + * This structure extends vkd3d_instance_create_info. + * + * \since 1.2 + */ struct vkd3d_application_info { + /** Must be set to VKD3D_STRUCTURE_TYPE_APPLICATION_INFO. */ enum vkd3d_structure_type type; + /** Optional pointer to a structure containing further parameters. */ const void *next; + /** + * The application's name, to be passed to the Vulkan implementation. If it is NULL, a name is + * computed from the process executable filename. If that cannot be done, the empty string is + * used. + */ const char *application_name; + /** The application's version, to be passed to the Vulkan implementation. */ uint32_t application_version; - const char *engine_name; /* "vkd3d" if NULL */ - uint32_t engine_version; /* vkd3d version if engine_name is NULL */ + /** + * The engine name, to be passed to the Vulkan implementation. If it is NULL, "vkd3d" is used. + */ + const char *engine_name; + /** + * The engine version, to be passed to the Vulkan implementation. If it is 0, the version is + * computed from the vkd3d library version. + */ + uint32_t engine_version; + /** + * The vkd3d API version to use, to guarantee backward compatibility of the shared library. If + * this chained structure is not used then VKD3D_API_VERSION_1_0 is used. + */ enum vkd3d_api_version api_version; }; -/* Extends vkd3d_instance_create_info. Available since 1.3. */ +/** + * A chained structure to specify the host time domain. + * + * This structure extends vkd3d_instance_create_info. + * + * \since 1.3 + */ struct vkd3d_host_time_domain_info { + /** Must be set to VKD3D_STRUCTURE_TYPE_HOST_TIME_DOMAIN_INFO. */ enum vkd3d_structure_type type; + /** Optional pointer to a structure containing further parameters. */ const void *next; + /** + * The number of clock ticks per second, used for GetClockCalibration(). It should normally + * match the expected result of QueryPerformanceFrequency(). If this chained structure is not + * used then 10 millions is used, which means that each tick is a tenth of microsecond, or + * equivalently 100 nanoseconds. + */ uint64_t ticks_per_second; }; +/** + * A chained structure containing device creation parameters. + */ struct vkd3d_device_create_info { + /** Must be set to VKD3D_STRUCTURE_TYPE_DEVICE_CREATE_INFO. */ enum vkd3d_structure_type type; + /** Optional pointer to a structure containing further parameters. */ const void *next; + /** The minimum feature level to request. Device creation will fail with E_INVALIDARG if the + * Vulkan device doesn't have the features needed to fulfill the request. */ D3D_FEATURE_LEVEL minimum_feature_level; + /** + * The vkd3d instance to use to create a device. Either this or instance_create_info must be + * set. + */ struct vkd3d_instance *instance; + /** + * The parameters used to create an instance, which is then used to create a device. Either + * this or instance must be set. + */ const struct vkd3d_instance_create_info *instance_create_info; + /** + * The Vulkan physical device to use. If it is NULL, the first physical device found is used, + * prioritizing discrete GPUs over integrated GPUs and integrated GPUs over all the others. + * + * This parameter can be overridden by setting environment variable VKD3D_VULKAN_DEVICE. + */ VkPhysicalDevice vk_physical_device; + /** + * A list of Vulkan device extensions to request. They are intended as required, so device + * creation will fail if any of them is not available. + */ const char * const *device_extensions; + /** The number of elements in the device_extensions array. */ uint32_t device_extension_count; + /** + * An object to be set as the device parent. This is not used by vkd3d except for being + * returned by vkd3d_get_device_parent. + */ IUnknown *parent; + /** + * The adapter LUID to be set for the device. This is not used by vkd3d except for being + * returned by GetAdapterLuid. + */ LUID adapter_luid; }; -/* Extends vkd3d_device_create_info. Available since 1.2. */ +/** + * A chained structure to specify optional device extensions. + * + * This structure extends vkd3d_device_create_info. + * + * \since 1.2 + */ struct vkd3d_optional_device_extensions_info { + /** Must be set to VKD3D_STRUCTURE_TYPE_OPTIONAL_DEVICE_EXTENSIONS_INFO. */ enum vkd3d_structure_type type; + /** Optional pointer to a structure containing further parameters. */ const void *next; + /** + * A list of optional Vulkan device extensions to request. Device creation does not fail if + * they are not available. + */ const char * const *extensions; + /** The number of elements in the extensions array. */ uint32_t extension_count; }; -/* vkd3d_image_resource_create_info flags */ +/** + * When specified as a flag of vkd3d_image_resource_create_info, it means that vkd3d will do the + * initial transition operation on the image from VK_IMAGE_LAYOUT_UNDEFINED to its appropriate + * Vulkan layout (depending on its D3D12 resource state). If this flag is not specified the caller + * is responsible for transitioning the Vulkan image to the appropriate layout. + */ #define VKD3D_RESOURCE_INITIAL_STATE_TRANSITION 0x00000001 +/** + * When specified as a flag of vkd3d_image_resource_create_info, it means that field present_state + * is honored. + */ #define VKD3D_RESOURCE_PRESENT_STATE_TRANSITION 0x00000002 +/** + * A chained structure containing the parameters to create a D3D12 resource backed by a Vulkan + * image. + */ struct vkd3d_image_resource_create_info { + /** Must be set to VKD3D_STRUCTURE_TYPE_IMAGE_RESOURCE_CREATE_INFO. */ enum vkd3d_structure_type type; + /** Optional pointer to a structure containing further parameters. */ const void *next; + /** The Vulkan image that backs the resource. */ VkImage vk_image; + /** The resource description. */ D3D12_RESOURCE_DESC desc; + /** + * A combination of zero or more flags. The valid flags are + * VKD3D_RESOURCE_INITIAL_STATE_TRANSITION and VKD3D_RESOURCE_PRESENT_STATE_TRANSITION. + */ unsigned int flags; + /** + * This field specifies how to handle resource state D3D12_RESOURCE_STATE_PRESENT for + * the resource. Notice that on D3D12 there is no difference between + * D3D12_RESOURCE_STATE_COMMON and D3D12_RESOURCE_STATE_PRESENT (they have the same value), + * while on Vulkan two different layouts are used (VK_IMAGE_LAYOUT_GENERAL and + * VK_IMAGE_LAYOUT_PRESENT_SRC_KHR). + * + * * When flag VKD3D_RESOURCE_PRESENT_STATE_TRANSITION is not specified, field + * present_state is ignored and resource state D3D12_RESOURCE_STATE_COMMON/_PRESENT is + * mapped to VK_IMAGE_LAYOUT_GENERAL; this is useful for non-swapchain resources. + * * Otherwise, when present_state is D3D12_RESOURCE_STATE_PRESENT/_COMMON, resource state + * D3D12_RESOURCE_STATE_COMMON/_PRESENT is mapped to VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + * this is useful for swapchain resources that are directly backed by a Vulkan swapchain + * image. + * * Otherwise, resource state D3D12_RESOURCE_STATE_COMMON/_PRESENT is treated as resource + * state present_state; this is useful for swapchain resources that backed by a Vulkan + * non-swapchain image, which the client will likely consume with a copy or drawing + * operation at presentation time. + */ D3D12_RESOURCE_STATES present_state; }; diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h index 9e663919c38..d3afcc11b16 100644 --- a/libs/vkd3d/include/vkd3d_shader.h +++ b/libs/vkd3d/include/vkd3d_shader.h @@ -21,6 +21,7 @@ #include #include +#include #include #ifdef __cplusplus @@ -53,6 +54,7 @@ enum vkd3d_shader_api_version VKD3D_SHADER_API_VERSION_1_9, VKD3D_SHADER_API_VERSION_1_10, VKD3D_SHADER_API_VERSION_1_11, + VKD3D_SHADER_API_VERSION_1_12, VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_API_VERSION), }; @@ -148,6 +150,12 @@ enum vkd3d_shader_compile_option_formatting_flags VKD3D_SHADER_COMPILE_OPTION_FORMATTING_OFFSETS = 0x00000004, VKD3D_SHADER_COMPILE_OPTION_FORMATTING_HEADER = 0x00000008, VKD3D_SHADER_COMPILE_OPTION_FORMATTING_RAW_IDS = 0x00000010, + /** + * Emit the signatures when disassembling a shader. + * + * \since 1.12 + */ + VKD3D_SHADER_COMPILE_OPTION_FORMATTING_IO_SIGNATURES = 0x00000020, VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_FORMATTING_FLAGS), }; @@ -208,10 +216,33 @@ enum vkd3d_shader_compile_option_feature_flags * This corresponds to the "shaderFloat64" feature in the Vulkan API, and * the "GL_ARB_gpu_shader_fp64" extension in the OpenGL API. */ VKD3D_SHADER_COMPILE_OPTION_FEATURE_FLOAT64 = 0x00000002, + /** The SPIR-V target environment supports wave operations. + * This flag is valid only in VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_1 + * or greater, and corresponds to the following minimum requirements in + * VkPhysicalDeviceSubgroupProperties: + * - subgroupSize >= 4. + * - supportedOperations has BASIC, VOTE, ARITHMETIC, BALLOT, SHUFFLE and + * QUAD bits set. + * - supportedStages include COMPUTE and FRAGMENT. \since 1.12 */ + VKD3D_SHADER_COMPILE_OPTION_FEATURE_WAVE_OPS = 0x00000004, VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_FEATURE_FLAGS), }; +/** + * Flags for vkd3d_shader_parse_dxbc(). + * + * \since 1.12 + */ +enum vkd3d_shader_parse_dxbc_flags +{ + /** Ignore the checksum and continue parsing even if it is + * incorrect. */ + VKD3D_SHADER_PARSE_DXBC_IGNORE_CHECKSUM = 0x00000001, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_PARSE_DXBC_FLAGS), +}; + enum vkd3d_shader_compile_option_name { /** @@ -279,6 +310,36 @@ enum vkd3d_shader_compile_option_name * \since 1.11 */ VKD3D_SHADER_COMPILE_OPTION_FEATURE = 0x0000000a, + /** + * If \a value is non-zero compilation will produce a child effect using + * shared object descriptions, as instructed by the "shared" modifier. + * Child effects are supported with fx_4_0, and fx_4_1 profiles. This option + * and "shared" modifiers are ignored for the fx_5_0 profile and non-fx profiles. + * The fx_2_0 profile does not have a separate concept of child effects, variables + * marked with "shared" modifier will be marked as such in a binary. + * + * \since 1.12 + */ + VKD3D_SHADER_COMPILE_OPTION_CHILD_EFFECT = 0x0000000b, + /** + * If \a value is nonzero, emit a compile warning warn when vectors or + * matrices are truncated in an implicit conversion. + * If warnings are disabled, this option has no effect. + * This option has no effects for targets other than HLSL. + * + * The default value is nonzero, i.e. enable implicit truncation warnings. + * + * \since 1.12 + */ + VKD3D_SHADER_COMPILE_OPTION_WARN_IMPLICIT_TRUNCATION = 0x0000000c, + /** + * If \a value is nonzero, empty constant buffers descriptions are + * written out in the output effect binary. This option applies only + * to fx_4_0 and fx_4_1 profiles and is otherwise ignored. + * + * \since 1.12 + */ + VKD3D_SHADER_COMPILE_OPTION_INCLUDE_EMPTY_BUFFERS_IN_EFFECTS = 0x0000000d, VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_NAME), }; @@ -872,6 +933,8 @@ enum vkd3d_shader_spirv_environment VKD3D_SHADER_SPIRV_ENVIRONMENT_NONE, VKD3D_SHADER_SPIRV_ENVIRONMENT_OPENGL_4_5, VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0, /* default target */ + /** \since 1.12 */ + VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_1, VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_SPIRV_ENVIRONMENT), }; @@ -886,6 +949,8 @@ enum vkd3d_shader_spirv_extension VKD3D_SHADER_SPIRV_EXTENSION_EXT_STENCIL_EXPORT, /** \since 1.11 */ VKD3D_SHADER_SPIRV_EXTENSION_EXT_VIEWPORT_INDEX_LAYER, + /** \since 1.12 */ + VKD3D_SHADER_SPIRV_EXTENSION_EXT_FRAGMENT_SHADER_INTERLOCK, VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_SPIRV_EXTENSION), }; @@ -1995,8 +2060,12 @@ VKD3D_SHADER_API const enum vkd3d_shader_target_type *vkd3d_shader_get_supported * - VKD3D_SHADER_SOURCE_D3D_BYTECODE to VKD3D_SHADER_TARGET_SPIRV_TEXT * (if vkd3d was compiled with SPIRV-Tools) * - VKD3D_SHADER_SOURCE_D3D_BYTECODE to VKD3D_SHADER_TARGET_D3D_ASM - * - VKD3D_SHADER_SOURCE_HLSL to VKD3D_SHADER_TARGET_DXBC_TPF + * - VKD3D_SHADER_SOURCE_HLSL to VKD3D_SHADER_TARGET_SPIRV_BINARY + * - VKD3D_SHADER_SOURCE_HLSL to VKD3D_SHADER_TARGET_SPIRV_TEXT + * (if vkd3d was compiled with SPIRV-Tools) + * - VKD3D_SHADER_SOURCE_HLSL to VKD3D_SHADER_TARGET_D3D_ASM * - VKD3D_SHADER_SOURCE_HLSL to VKD3D_SHADER_TARGET_D3D_BYTECODE + * - VKD3D_SHADER_SOURCE_HLSL to VKD3D_SHADER_TARGET_DXBC_TPF * - VKD3D_SHADER_SOURCE_HLSL to VKD3D_SHADER_TARGET_FX * * Supported transformations can also be detected at runtime with the functions @@ -2377,9 +2446,8 @@ VKD3D_SHADER_API void vkd3d_shader_free_dxbc(struct vkd3d_shader_dxbc_desc *dxbc * * \param dxbc A vkd3d_shader_code structure containing the DXBC blob to parse. * - * \param flags A set of flags modifying the behaviour of the function. No - * flags are defined for this version of vkd3d-shader, and this parameter - * should be set to 0. + * \param flags A combination of zero or more elements of enum + * vkd3d_shader_parse_dxbc_flags. * * \param desc A vkd3d_shader_dxbc_desc structure describing the contents of * the DXBC blob. Its vkd3d_shader_dxbc_section_desc structures will contain diff --git a/libs/vkd3d/include/vkd3d_types.h b/libs/vkd3d/include/vkd3d_types.h index 12ceef42fc4..f5a10117c12 100644 --- a/libs/vkd3d/include/vkd3d_types.h +++ b/libs/vkd3d/include/vkd3d_types.h @@ -41,6 +41,8 @@ enum vkd3d_result { /** Success. */ VKD3D_OK = 0, + /** Success as a result of there being nothing to do. \since 1.12 */ + VKD3D_FALSE = 1, /** An unspecified failure occurred. */ VKD3D_ERROR = -1, /** There are not enough resources available to complete the operation. */ @@ -51,6 +53,12 @@ enum vkd3d_result VKD3D_ERROR_INVALID_SHADER = -4, /** The operation is not implemented in this version of vkd3d. */ VKD3D_ERROR_NOT_IMPLEMENTED = -5, + /** The object or entry already exists. \since 1.12 */ + VKD3D_ERROR_KEY_ALREADY_EXISTS = -6, + /** The requested object was not found. \since 1.12 */ + VKD3D_ERROR_NOT_FOUND = -7, + /** The output buffer is larger than the requested object \since 1.12. */ + VKD3D_ERROR_MORE_DATA = -8, VKD3D_FORCE_32_BIT_ENUM(VKD3D_RESULT), }; diff --git a/libs/vkd3d/libs/vkd3d-common/blob.c b/libs/vkd3d/libs/vkd3d-common/blob.c index 06a12ef5bc4..f60ef7db769 100644 --- a/libs/vkd3d/libs/vkd3d-common/blob.c +++ b/libs/vkd3d/libs/vkd3d-common/blob.c @@ -16,12 +16,15 @@ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA */ +#ifndef __MINGW32__ +#define WIDL_C_INLINE_WRAPPERS +#endif #define COBJMACROS #define CONST_VTABLE #include "vkd3d.h" #include "vkd3d_blob.h" -#include "vkd3d_debug.h" #include "vkd3d_memory.h" +#include "d3d12shader.h" struct vkd3d_blob { diff --git a/libs/vkd3d/libs/vkd3d-common/debug.c b/libs/vkd3d/libs/vkd3d-common/debug.c index e12cd39450a..4523fc997ef 100644 --- a/libs/vkd3d/libs/vkd3d-common/debug.c +++ b/libs/vkd3d/libs/vkd3d-common/debug.c @@ -20,7 +20,7 @@ # define _WIN32_WINNT 0x0600 /* For InitOnceExecuteOnce(). */ #endif -#include "vkd3d_debug.h" +#include "vkd3d_common.h" #include #include diff --git a/libs/vkd3d/libs/vkd3d-common/error.c b/libs/vkd3d/libs/vkd3d-common/error.c index 3572669ac1c..2f978c4977d 100644 --- a/libs/vkd3d/libs/vkd3d-common/error.c +++ b/libs/vkd3d/libs/vkd3d-common/error.c @@ -17,7 +17,6 @@ */ #include "vkd3d_common.h" -#include "vkd3d_debug.h" HRESULT hresult_from_vkd3d_result(int vkd3d_result) { @@ -36,6 +35,12 @@ HRESULT hresult_from_vkd3d_result(int vkd3d_result) return E_INVALIDARG; case VKD3D_ERROR_NOT_IMPLEMENTED: return E_NOTIMPL; + case VKD3D_ERROR_KEY_ALREADY_EXISTS: + return DXGI_ERROR_ALREADY_EXISTS; + case VKD3D_ERROR_NOT_FOUND: + return DXGI_ERROR_NOT_FOUND; + case VKD3D_ERROR_MORE_DATA: + return DXGI_ERROR_MORE_DATA; default: FIXME("Unhandled vkd3d result %d.\n", vkd3d_result); return E_FAIL; diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c index 3f86bd45960..9abc2c4db70 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c @@ -250,6 +250,7 @@ static const char * const shader_opcode_names[] = [VKD3DSIH_NOT ] = "not", [VKD3DSIH_NRM ] = "nrm", [VKD3DSIH_OR ] = "or", + [VKD3DSIH_ORD ] = "ord", [VKD3DSIH_PHASE ] = "phase", [VKD3DSIH_PHI ] = "phi", [VKD3DSIH_POW ] = "pow", @@ -321,44 +322,34 @@ static const char * const shader_opcode_names[] = [VKD3DSIH_UMAX ] = "umax", [VKD3DSIH_UMIN ] = "umin", [VKD3DSIH_UMUL ] = "umul", + [VKD3DSIH_UNO ] = "uno", [VKD3DSIH_USHR ] = "ushr", [VKD3DSIH_UTOD ] = "utod", [VKD3DSIH_UTOF ] = "utof", [VKD3DSIH_UTOU ] = "utou", + [VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL ] = "wave_active_all_equal", + [VKD3DSIH_WAVE_ACTIVE_BALLOT ] = "wave_active_ballot", + [VKD3DSIH_WAVE_ACTIVE_BIT_AND ] = "wave_active_bit_and", + [VKD3DSIH_WAVE_ACTIVE_BIT_OR ] = "wave_active_bit_or", + [VKD3DSIH_WAVE_ACTIVE_BIT_XOR ] = "wave_active_bit_xor", + [VKD3DSIH_WAVE_ALL_BIT_COUNT ] = "wave_all_bit_count", + [VKD3DSIH_WAVE_ALL_TRUE ] = "wave_all_true", + [VKD3DSIH_WAVE_ANY_TRUE ] = "wave_any_true", + [VKD3DSIH_WAVE_IS_FIRST_LANE ] = "wave_is_first_lane", + [VKD3DSIH_WAVE_OP_ADD ] = "wave_op_add", + [VKD3DSIH_WAVE_OP_IMAX ] = "wave_op_imax", + [VKD3DSIH_WAVE_OP_IMIN ] = "wave_op_imin", + [VKD3DSIH_WAVE_OP_MAX ] = "wave_op_max", + [VKD3DSIH_WAVE_OP_MIN ] = "wave_op_min", + [VKD3DSIH_WAVE_OP_MUL ] = "wave_op_mul", + [VKD3DSIH_WAVE_OP_UMAX ] = "wave_op_umax", + [VKD3DSIH_WAVE_OP_UMIN ] = "wave_op_umin", + [VKD3DSIH_WAVE_PREFIX_BIT_COUNT ] = "wave_prefix_bit_count", + [VKD3DSIH_WAVE_READ_LANE_AT ] = "wave_read_lane_at", + [VKD3DSIH_WAVE_READ_LANE_FIRST ] = "wave_read_lane_first", [VKD3DSIH_XOR ] = "xor", }; -static const struct -{ - enum vkd3d_shader_input_sysval_semantic sysval_semantic; - const char *sysval_name; -} -shader_input_sysval_semantic_names[] = -{ - {VKD3D_SIV_POSITION, "position"}, - {VKD3D_SIV_CLIP_DISTANCE, "clip_distance"}, - {VKD3D_SIV_CULL_DISTANCE, "cull_distance"}, - {VKD3D_SIV_RENDER_TARGET_ARRAY_INDEX, "render_target_array_index"}, - {VKD3D_SIV_VIEWPORT_ARRAY_INDEX, "viewport_array_index"}, - {VKD3D_SIV_VERTEX_ID, "vertex_id"}, - {VKD3D_SIV_INSTANCE_ID, "instance_id"}, - {VKD3D_SIV_PRIMITIVE_ID, "primitive_id"}, - {VKD3D_SIV_IS_FRONT_FACE, "is_front_face"}, - {VKD3D_SIV_SAMPLE_INDEX, "sample_index"}, - {VKD3D_SIV_QUAD_U0_TESS_FACTOR, "finalQuadUeq0EdgeTessFactor"}, - {VKD3D_SIV_QUAD_V0_TESS_FACTOR, "finalQuadVeq0EdgeTessFactor"}, - {VKD3D_SIV_QUAD_U1_TESS_FACTOR, "finalQuadUeq1EdgeTessFactor"}, - {VKD3D_SIV_QUAD_V1_TESS_FACTOR, "finalQuadVeq1EdgeTessFactor"}, - {VKD3D_SIV_QUAD_U_INNER_TESS_FACTOR, "finalQuadUInsideTessFactor"}, - {VKD3D_SIV_QUAD_V_INNER_TESS_FACTOR, "finalQuadVInsideTessFactor"}, - {VKD3D_SIV_TRIANGLE_U_TESS_FACTOR, "finalTriUeq0EdgeTessFactor"}, - {VKD3D_SIV_TRIANGLE_V_TESS_FACTOR, "finalTriVeq0EdgeTessFactor"}, - {VKD3D_SIV_TRIANGLE_W_TESS_FACTOR, "finalTriWeq0EdgeTessFactor"}, - {VKD3D_SIV_TRIANGLE_INNER_TESS_FACTOR, "finalTriInsideTessFactor"}, - {VKD3D_SIV_LINE_DETAIL_TESS_FACTOR, "finalLineDetailTessFactor"}, - {VKD3D_SIV_LINE_DENSITY_TESS_FACTOR, "finalLineDensityTessFactor"}, -}; - struct vkd3d_d3d_asm_colours { const char *reset; @@ -370,6 +361,7 @@ struct vkd3d_d3d_asm_colours const char *swizzle; const char *version; const char *write_mask; + const char *label; }; struct vkd3d_d3d_asm_compiler @@ -377,22 +369,10 @@ struct vkd3d_d3d_asm_compiler struct vkd3d_string_buffer buffer; struct vkd3d_shader_version shader_version; struct vkd3d_d3d_asm_colours colours; - enum vsir_asm_dialect dialect; + enum vsir_asm_flags flags; const struct vkd3d_shader_instruction *current; }; -static int VKD3D_PRINTF_FUNC(2, 3) shader_addline(struct vkd3d_string_buffer *buffer, const char *format, ...) -{ - va_list args; - int ret; - - va_start(args, format); - ret = vkd3d_string_buffer_vprintf(buffer, format, args); - va_end(args); - - return ret; -} - /* Convert floating point offset relative to a register file to an absolute * offset for float constants. */ static unsigned int shader_get_float_offset(enum vkd3d_shader_register_type register_type, UINT register_idx) @@ -445,6 +425,23 @@ static void shader_dump_global_flags(struct vkd3d_d3d_asm_compiler *compiler, vkd3d_string_buffer_printf(&compiler->buffer, "unknown_flags(%#"PRIx64")", (uint64_t)global_flags); } +static void shader_dump_atomic_op_flags(struct vkd3d_d3d_asm_compiler *compiler, uint32_t atomic_flags) +{ + if (atomic_flags & VKD3DARF_SEQ_CST) + { + vkd3d_string_buffer_printf(&compiler->buffer, "_seqCst"); + atomic_flags &= ~VKD3DARF_SEQ_CST; + } + if (atomic_flags & VKD3DARF_VOLATILE) + { + vkd3d_string_buffer_printf(&compiler->buffer, "_volatile"); + atomic_flags &= ~VKD3DARF_VOLATILE; + } + + if (atomic_flags) + vkd3d_string_buffer_printf(&compiler->buffer, "_unknown_flags(%#x)", atomic_flags); +} + static void shader_dump_sync_flags(struct vkd3d_d3d_asm_compiler *compiler, uint32_t sync_flags) { if (sync_flags & VKD3DSSF_GLOBAL_UAV) @@ -511,96 +508,138 @@ static void shader_dump_uav_flags(struct vkd3d_d3d_asm_compiler *compiler, uint3 vkd3d_string_buffer_printf(&compiler->buffer, "_unknown_flags(%#x)", uav_flags); } -static void shader_dump_tessellator_domain(struct vkd3d_d3d_asm_compiler *compiler, - enum vkd3d_tessellator_domain domain) +static void shader_print_tessellator_domain(struct vkd3d_d3d_asm_compiler *compiler, + const char *prefix, enum vkd3d_tessellator_domain d, const char *suffix) { struct vkd3d_string_buffer *buffer = &compiler->buffer; + const char *domain; - shader_addline(buffer, "domain_"); - switch (domain) + switch (d) { case VKD3D_TESSELLATOR_DOMAIN_LINE: - shader_addline(buffer, "isoline"); + domain = "domain_isoline"; break; case VKD3D_TESSELLATOR_DOMAIN_TRIANGLE: - shader_addline(buffer, "tri"); + domain = "domain_tri"; break; case VKD3D_TESSELLATOR_DOMAIN_QUAD: - shader_addline(buffer, "quad"); + domain = "domain_quad"; break; default: - shader_addline(buffer, "unknown_tessellator_domain(%#x)", domain); - break; + vkd3d_string_buffer_printf(buffer, "%s%s%s%s", + prefix, compiler->colours.error, d, compiler->colours.reset, suffix); + return; } + + vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, domain, suffix); } -static void shader_dump_tessellator_output_primitive(struct vkd3d_d3d_asm_compiler *compiler, - enum vkd3d_shader_tessellator_output_primitive output_primitive) +static void shader_print_tessellator_output_primitive(struct vkd3d_d3d_asm_compiler *compiler, + const char *prefix, enum vkd3d_shader_tessellator_output_primitive p, const char *suffix) { struct vkd3d_string_buffer *buffer = &compiler->buffer; + const char *primitive; - shader_addline(buffer, "output_"); - switch (output_primitive) + switch (p) { case VKD3D_SHADER_TESSELLATOR_OUTPUT_POINT: - shader_addline(buffer, "point"); + primitive = "output_point"; break; case VKD3D_SHADER_TESSELLATOR_OUTPUT_LINE: - shader_addline(buffer, "line"); + primitive = "output_line"; break; case VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CW: - shader_addline(buffer, "triangle_cw"); + primitive = "output_triangle_cw"; break; case VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CCW: - shader_addline(buffer, "triangle_ccw"); + primitive = "output_triangle_ccw"; break; default: - shader_addline(buffer, "unknown_tessellator_output_primitive(%#x)", output_primitive); - break; + vkd3d_string_buffer_printf(buffer, "%s%s%s%s", + prefix, compiler->colours.error, p, compiler->colours.reset, suffix); + return; } + + vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, primitive, suffix); } -static void shader_dump_tessellator_partitioning(struct vkd3d_d3d_asm_compiler *compiler, - enum vkd3d_shader_tessellator_partitioning partitioning) +static void shader_print_tessellator_partitioning(struct vkd3d_d3d_asm_compiler *compiler, + const char *prefix, enum vkd3d_shader_tessellator_partitioning p, const char *suffix) { struct vkd3d_string_buffer *buffer = &compiler->buffer; + const char *partitioning; - shader_addline(buffer, "partitioning_"); - switch (partitioning) + switch (p) { case VKD3D_SHADER_TESSELLATOR_PARTITIONING_INTEGER: - shader_addline(buffer, "integer"); + partitioning = "partitioning_integer"; break; case VKD3D_SHADER_TESSELLATOR_PARTITIONING_POW2: - shader_addline(buffer, "pow2"); + partitioning = "partitioning_pow2"; break; case VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD: - shader_addline(buffer, "fractional_odd"); + partitioning = "partitioning_fractional_odd"; break; case VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN: - shader_addline(buffer, "fractional_even"); + partitioning = "partitioning_fractional_even"; break; default: - shader_addline(buffer, "unknown_tessellator_partitioning(%#x)", partitioning); - break; + vkd3d_string_buffer_printf(buffer, "%s%s%s%s", + prefix, compiler->colours.error, p, compiler->colours.reset, suffix); + return; } + + vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, partitioning, suffix); } -static void shader_dump_shader_input_sysval_semantic(struct vkd3d_d3d_asm_compiler *compiler, - enum vkd3d_shader_input_sysval_semantic semantic) +static void shader_print_input_sysval_semantic(struct vkd3d_d3d_asm_compiler *compiler, + const char *prefix, enum vkd3d_shader_input_sysval_semantic semantic, const char *suffix) { unsigned int i; + static const struct + { + enum vkd3d_shader_input_sysval_semantic sysval_semantic; + const char *sysval_name; + } + shader_input_sysval_semantic_names[] = + { + {VKD3D_SIV_POSITION, "position"}, + {VKD3D_SIV_CLIP_DISTANCE, "clip_distance"}, + {VKD3D_SIV_CULL_DISTANCE, "cull_distance"}, + {VKD3D_SIV_RENDER_TARGET_ARRAY_INDEX, "render_target_array_index"}, + {VKD3D_SIV_VIEWPORT_ARRAY_INDEX, "viewport_array_index"}, + {VKD3D_SIV_VERTEX_ID, "vertex_id"}, + {VKD3D_SIV_INSTANCE_ID, "instance_id"}, + {VKD3D_SIV_PRIMITIVE_ID, "primitive_id"}, + {VKD3D_SIV_IS_FRONT_FACE, "is_front_face"}, + {VKD3D_SIV_SAMPLE_INDEX, "sample_index"}, + {VKD3D_SIV_QUAD_U0_TESS_FACTOR, "finalQuadUeq0EdgeTessFactor"}, + {VKD3D_SIV_QUAD_V0_TESS_FACTOR, "finalQuadVeq0EdgeTessFactor"}, + {VKD3D_SIV_QUAD_U1_TESS_FACTOR, "finalQuadUeq1EdgeTessFactor"}, + {VKD3D_SIV_QUAD_V1_TESS_FACTOR, "finalQuadVeq1EdgeTessFactor"}, + {VKD3D_SIV_QUAD_U_INNER_TESS_FACTOR, "finalQuadUInsideTessFactor"}, + {VKD3D_SIV_QUAD_V_INNER_TESS_FACTOR, "finalQuadVInsideTessFactor"}, + {VKD3D_SIV_TRIANGLE_U_TESS_FACTOR, "finalTriUeq0EdgeTessFactor"}, + {VKD3D_SIV_TRIANGLE_V_TESS_FACTOR, "finalTriVeq0EdgeTessFactor"}, + {VKD3D_SIV_TRIANGLE_W_TESS_FACTOR, "finalTriWeq0EdgeTessFactor"}, + {VKD3D_SIV_TRIANGLE_INNER_TESS_FACTOR, "finalTriInsideTessFactor"}, + {VKD3D_SIV_LINE_DETAIL_TESS_FACTOR, "finalLineDetailTessFactor"}, + {VKD3D_SIV_LINE_DENSITY_TESS_FACTOR, "finalLineDensityTessFactor"}, + }; + for (i = 0; i < ARRAY_SIZE(shader_input_sysval_semantic_names); ++i) { - if (shader_input_sysval_semantic_names[i].sysval_semantic == semantic) - { - vkd3d_string_buffer_printf(&compiler->buffer, "%s", shader_input_sysval_semantic_names[i].sysval_name); - return; - } + if (shader_input_sysval_semantic_names[i].sysval_semantic != semantic) + continue; + + vkd3d_string_buffer_printf(&compiler->buffer, "%s%s%s", + prefix, shader_input_sysval_semantic_names[i].sysval_name, suffix); + return; } - vkd3d_string_buffer_printf(&compiler->buffer, "unknown_shader_input_sysval_semantic(%#x)", semantic); + vkd3d_string_buffer_printf(&compiler->buffer, "%s%s%s%s", + prefix, compiler->colours.error, semantic, compiler->colours.reset, suffix); } static void shader_dump_resource_type(struct vkd3d_d3d_asm_compiler *compiler, enum vkd3d_shader_resource_type type) @@ -646,6 +685,8 @@ static void shader_dump_data_type(struct vkd3d_d3d_asm_compiler *compiler, enum [VKD3D_DATA_UINT8 ] = "uint8", [VKD3D_DATA_UINT64 ] = "uint64", [VKD3D_DATA_BOOL ] = "bool", + [VKD3D_DATA_UINT16 ] = "uint16", + [VKD3D_DATA_HALF ] = "half", }; const char *name; @@ -673,128 +714,133 @@ static void shader_dump_resource_data_type(struct vkd3d_d3d_asm_compiler *compil vkd3d_string_buffer_printf(&compiler->buffer, ")"); } -static void shader_dump_decl_usage(struct vkd3d_d3d_asm_compiler *compiler, - const struct vkd3d_shader_semantic *semantic, uint32_t flags) +static void shader_print_dcl_usage(struct vkd3d_d3d_asm_compiler *compiler, + const char *prefix, const struct vkd3d_shader_semantic *semantic, uint32_t flags, const char *suffix) { struct vkd3d_string_buffer *buffer = &compiler->buffer; + unsigned int usage_idx; + const char *usage; + bool indexed; if (semantic->resource.reg.reg.type == VKD3DSPR_COMBINED_SAMPLER) { switch (semantic->resource_type) { case VKD3D_SHADER_RESOURCE_TEXTURE_2D: - shader_addline(buffer, "_2d"); + usage = "2d"; break; - case VKD3D_SHADER_RESOURCE_TEXTURE_3D: - shader_addline(buffer, "_volume"); + usage = "volume"; break; - case VKD3D_SHADER_RESOURCE_TEXTURE_CUBE: - shader_addline(buffer, "_cube"); + usage = "cube"; break; - default: - shader_addline(buffer, "_unknown_resource_type(%#x)", semantic->resource_type); - break; + vkd3d_string_buffer_printf(buffer, "%s%s%s%s", + prefix, compiler->colours.error, semantic->resource_type, compiler->colours.reset, suffix); + return; } - } - else if (semantic->resource.reg.reg.type == VKD3DSPR_RESOURCE || semantic->resource.reg.reg.type == VKD3DSPR_UAV) - { - if (semantic->resource.reg.reg.type == VKD3DSPR_RESOURCE) - shader_addline(buffer, "_resource"); - shader_addline(buffer, "_"); + vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, usage, suffix); + return; + } + + if (semantic->resource.reg.reg.type == VKD3DSPR_RESOURCE || semantic->resource.reg.reg.type == VKD3DSPR_UAV) + { + vkd3d_string_buffer_printf(buffer, "%s", prefix); + if (semantic->resource.reg.reg.type == VKD3DSPR_RESOURCE) + vkd3d_string_buffer_printf(buffer, "resource_"); + shader_dump_resource_type(compiler, semantic->resource_type); if (semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS || semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY) { - shader_addline(buffer, "(%u)", semantic->sample_count); + vkd3d_string_buffer_printf(buffer, "(%u)", semantic->sample_count); } if (semantic->resource.reg.reg.type == VKD3DSPR_UAV) shader_dump_uav_flags(compiler, flags); - shader_addline(buffer, " "); + vkd3d_string_buffer_printf(buffer, " "); shader_dump_resource_data_type(compiler, semantic->resource_data_type); + vkd3d_string_buffer_printf(buffer, "%s", suffix); + return; } - else + + /* Pixel shaders 3.0 don't have usage semantics. */ + if (!vkd3d_shader_ver_ge(&compiler->shader_version, 3, 0) + && compiler->shader_version.type == VKD3D_SHADER_TYPE_PIXEL) + return; + + indexed = false; + usage_idx = semantic->usage_idx; + switch (semantic->usage) { - /* Pixel shaders 3.0 don't have usage semantics. */ - if (!vkd3d_shader_ver_ge(&compiler->shader_version, 3, 0) - && compiler->shader_version.type == VKD3D_SHADER_TYPE_PIXEL) + case VKD3D_DECL_USAGE_POSITION: + usage = "position"; + indexed = true; + break; + case VKD3D_DECL_USAGE_BLEND_INDICES: + usage = "blend"; + break; + case VKD3D_DECL_USAGE_BLEND_WEIGHT: + usage = "weight"; + break; + case VKD3D_DECL_USAGE_NORMAL: + usage = "normal"; + indexed = true; + break; + case VKD3D_DECL_USAGE_PSIZE: + usage = "psize"; + break; + case VKD3D_DECL_USAGE_COLOR: + if (semantic->usage_idx) + { + usage = "specular"; + indexed = true; + --usage_idx; + break; + } + usage = "color"; + break; + case VKD3D_DECL_USAGE_TEXCOORD: + usage = "texcoord"; + indexed = true; + break; + case VKD3D_DECL_USAGE_TANGENT: + usage = "tangent"; + break; + case VKD3D_DECL_USAGE_BINORMAL: + usage = "binormal"; + break; + case VKD3D_DECL_USAGE_TESS_FACTOR: + usage = "tessfactor"; + break; + case VKD3D_DECL_USAGE_POSITIONT: + usage = "positionT"; + indexed = true; + break; + case VKD3D_DECL_USAGE_FOG: + usage = "fog"; + break; + case VKD3D_DECL_USAGE_DEPTH: + usage = "depth"; + break; + case VKD3D_DECL_USAGE_SAMPLE: + usage = "sample"; + break; + default: + vkd3d_string_buffer_printf(buffer, "%s%s%s%s", + prefix, compiler->colours.error, semantic->usage, usage_idx, compiler->colours.reset, suffix); return; - else - shader_addline(buffer, "_"); - - switch (semantic->usage) - { - case VKD3D_DECL_USAGE_POSITION: - shader_addline(buffer, "position%u", semantic->usage_idx); - break; - - case VKD3D_DECL_USAGE_BLEND_INDICES: - shader_addline(buffer, "blend"); - break; - - case VKD3D_DECL_USAGE_BLEND_WEIGHT: - shader_addline(buffer, "weight"); - break; - - case VKD3D_DECL_USAGE_NORMAL: - shader_addline(buffer, "normal%u", semantic->usage_idx); - break; - - case VKD3D_DECL_USAGE_PSIZE: - shader_addline(buffer, "psize"); - break; - - case VKD3D_DECL_USAGE_COLOR: - if (!semantic->usage_idx) - shader_addline(buffer, "color"); - else - shader_addline(buffer, "specular%u", (semantic->usage_idx - 1)); - break; - - case VKD3D_DECL_USAGE_TEXCOORD: - shader_addline(buffer, "texcoord%u", semantic->usage_idx); - break; - - case VKD3D_DECL_USAGE_TANGENT: - shader_addline(buffer, "tangent"); - break; - - case VKD3D_DECL_USAGE_BINORMAL: - shader_addline(buffer, "binormal"); - break; - - case VKD3D_DECL_USAGE_TESS_FACTOR: - shader_addline(buffer, "tessfactor"); - break; - - case VKD3D_DECL_USAGE_POSITIONT: - shader_addline(buffer, "positionT%u", semantic->usage_idx); - break; - - case VKD3D_DECL_USAGE_FOG: - shader_addline(buffer, "fog"); - break; - - case VKD3D_DECL_USAGE_DEPTH: - shader_addline(buffer, "depth"); - break; - - case VKD3D_DECL_USAGE_SAMPLE: - shader_addline(buffer, "sample"); - break; - - default: - shader_addline(buffer, "", semantic->usage); - FIXME("Unrecognised semantic usage %#x.\n", semantic->usage); - } } + + if (indexed) + vkd3d_string_buffer_printf(buffer, "%s%s%u%s", prefix, usage, usage_idx, suffix); + else + vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, usage, suffix); } -static void shader_dump_src_param(struct vkd3d_d3d_asm_compiler *compiler, - const struct vkd3d_shader_src_param *param); +static void shader_print_src_param(struct vkd3d_d3d_asm_compiler *compiler, + const char *prefix, const struct vkd3d_shader_src_param *param, const char *suffix); static void shader_print_float_literal(struct vkd3d_d3d_asm_compiler *compiler, const char *prefix, float f, const char *suffix) @@ -891,13 +937,9 @@ static void shader_print_untyped_literal(struct vkd3d_d3d_asm_compiler *compiler static void shader_print_subscript(struct vkd3d_d3d_asm_compiler *compiler, unsigned int offset, const struct vkd3d_shader_src_param *rel_addr) { - vkd3d_string_buffer_printf(&compiler->buffer, "["); if (rel_addr) - { - shader_dump_src_param(compiler, rel_addr); - vkd3d_string_buffer_printf(&compiler->buffer, " + "); - } - shader_print_uint_literal(compiler, "", offset, "]"); + shader_print_src_param(compiler, "[", rel_addr, " + "); + shader_print_uint_literal(compiler, rel_addr ? "" : "[", offset, "]"); } static void shader_print_subscript_range(struct vkd3d_d3d_asm_compiler *compiler, @@ -910,8 +952,8 @@ static void shader_print_subscript_range(struct vkd3d_d3d_asm_compiler *compiler vkd3d_string_buffer_printf(&compiler->buffer, "*]"); } -static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const struct vkd3d_shader_register *reg, - bool is_declaration) +static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const char *prefix, + const struct vkd3d_shader_register *reg, bool is_declaration, const char *suffix) { struct vkd3d_string_buffer *buffer = &compiler->buffer; unsigned int offset = reg->idx[0].offset; @@ -920,22 +962,23 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const static const char * const rastout_reg_names[] = {"oPos", "oFog", "oPts"}; static const char * const misctype_reg_names[] = {"vPos", "vFace"}; - shader_addline(buffer, "%s", compiler->colours.reg); + vkd3d_string_buffer_printf(buffer, "%s%s", prefix, + reg->type == VKD3DSPR_LABEL ? compiler->colours.label : compiler->colours.reg); switch (reg->type) { case VKD3DSPR_TEMP: - shader_addline(buffer, "r"); + vkd3d_string_buffer_printf(buffer, "r"); break; case VKD3DSPR_INPUT: - shader_addline(buffer, "v"); + vkd3d_string_buffer_printf(buffer, "v"); break; case VKD3DSPR_CONST: case VKD3DSPR_CONST2: case VKD3DSPR_CONST3: case VKD3DSPR_CONST4: - shader_addline(buffer, "c"); + vkd3d_string_buffer_printf(buffer, "c"); offset = shader_get_float_offset(reg->type, offset); break; @@ -945,205 +988,210 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const break; case VKD3DSPR_RASTOUT: - shader_addline(buffer, "%s", rastout_reg_names[offset]); + vkd3d_string_buffer_printf(buffer, "%s", rastout_reg_names[offset]); break; case VKD3DSPR_COLOROUT: - shader_addline(buffer, "oC"); + vkd3d_string_buffer_printf(buffer, "oC"); break; case VKD3DSPR_DEPTHOUT: - shader_addline(buffer, "oDepth"); + vkd3d_string_buffer_printf(buffer, "oDepth"); break; case VKD3DSPR_DEPTHOUTGE: - shader_addline(buffer, "oDepthGE"); + vkd3d_string_buffer_printf(buffer, "oDepthGE"); break; case VKD3DSPR_DEPTHOUTLE: - shader_addline(buffer, "oDepthLE"); + vkd3d_string_buffer_printf(buffer, "oDepthLE"); break; case VKD3DSPR_ATTROUT: - shader_addline(buffer, "oD"); + vkd3d_string_buffer_printf(buffer, "oD"); break; case VKD3DSPR_TEXCRDOUT: /* Vertex shaders >= 3.0 use general purpose output registers * (VKD3DSPR_OUTPUT), which can include an address token. */ if (vkd3d_shader_ver_ge(&compiler->shader_version, 3, 0)) - shader_addline(buffer, "o"); + vkd3d_string_buffer_printf(buffer, "o"); else - shader_addline(buffer, "oT"); + vkd3d_string_buffer_printf(buffer, "oT"); break; case VKD3DSPR_CONSTINT: - shader_addline(buffer, "i"); + vkd3d_string_buffer_printf(buffer, "i"); break; case VKD3DSPR_CONSTBOOL: - shader_addline(buffer, "b"); + vkd3d_string_buffer_printf(buffer, "b"); break; case VKD3DSPR_LABEL: - shader_addline(buffer, "l"); + vkd3d_string_buffer_printf(buffer, "l"); break; case VKD3DSPR_LOOP: - shader_addline(buffer, "aL"); + vkd3d_string_buffer_printf(buffer, "aL"); break; case VKD3DSPR_COMBINED_SAMPLER: case VKD3DSPR_SAMPLER: - shader_addline(buffer, "s"); + vkd3d_string_buffer_printf(buffer, "s"); is_descriptor = true; break; case VKD3DSPR_MISCTYPE: if (offset > 1) - { - FIXME("Unhandled misctype register %u.\n", offset); - shader_addline(buffer, "", offset); - } + vkd3d_string_buffer_printf(buffer, "%s%s", + compiler->colours.error, offset, compiler->colours.reset); else - { - shader_addline(buffer, "%s", misctype_reg_names[offset]); - } + vkd3d_string_buffer_printf(buffer, "%s", misctype_reg_names[offset]); break; case VKD3DSPR_PREDICATE: - shader_addline(buffer, "p"); + vkd3d_string_buffer_printf(buffer, "p"); break; case VKD3DSPR_IMMCONST: - shader_addline(buffer, "l"); + vkd3d_string_buffer_printf(buffer, "l"); break; case VKD3DSPR_IMMCONST64: - shader_addline(buffer, "d"); + vkd3d_string_buffer_printf(buffer, "d"); break; case VKD3DSPR_CONSTBUFFER: - shader_addline(buffer, "cb"); + vkd3d_string_buffer_printf(buffer, "cb"); is_descriptor = true; break; case VKD3DSPR_IMMCONSTBUFFER: - shader_addline(buffer, "icb"); + vkd3d_string_buffer_printf(buffer, "icb"); break; case VKD3DSPR_PRIMID: - shader_addline(buffer, "primID"); + vkd3d_string_buffer_printf(buffer, "primID"); break; case VKD3DSPR_NULL: - shader_addline(buffer, "null"); + vkd3d_string_buffer_printf(buffer, "null"); break; case VKD3DSPR_RASTERIZER: - shader_addline(buffer, "rasterizer"); + vkd3d_string_buffer_printf(buffer, "rasterizer"); break; case VKD3DSPR_RESOURCE: - shader_addline(buffer, "t"); + vkd3d_string_buffer_printf(buffer, "t"); is_descriptor = true; break; case VKD3DSPR_UAV: - shader_addline(buffer, "u"); + vkd3d_string_buffer_printf(buffer, "u"); is_descriptor = true; break; case VKD3DSPR_OUTPOINTID: - shader_addline(buffer, "vOutputControlPointID"); + vkd3d_string_buffer_printf(buffer, "vOutputControlPointID"); break; case VKD3DSPR_FORKINSTID: - shader_addline(buffer, "vForkInstanceId"); + vkd3d_string_buffer_printf(buffer, "vForkInstanceId"); break; case VKD3DSPR_JOININSTID: - shader_addline(buffer, "vJoinInstanceId"); + vkd3d_string_buffer_printf(buffer, "vJoinInstanceId"); break; case VKD3DSPR_INCONTROLPOINT: - shader_addline(buffer, "vicp"); + vkd3d_string_buffer_printf(buffer, "vicp"); break; case VKD3DSPR_OUTCONTROLPOINT: - shader_addline(buffer, "vocp"); + vkd3d_string_buffer_printf(buffer, "vocp"); break; case VKD3DSPR_PATCHCONST: - shader_addline(buffer, "vpc"); + vkd3d_string_buffer_printf(buffer, "vpc"); break; case VKD3DSPR_TESSCOORD: - shader_addline(buffer, "vDomainLocation"); + vkd3d_string_buffer_printf(buffer, "vDomainLocation"); break; case VKD3DSPR_GROUPSHAREDMEM: - shader_addline(buffer, "g"); + vkd3d_string_buffer_printf(buffer, "g"); break; case VKD3DSPR_THREADID: - shader_addline(buffer, "vThreadID"); + vkd3d_string_buffer_printf(buffer, "vThreadID"); break; case VKD3DSPR_THREADGROUPID: - shader_addline(buffer, "vThreadGroupID"); + vkd3d_string_buffer_printf(buffer, "vThreadGroupID"); break; case VKD3DSPR_LOCALTHREADID: - shader_addline(buffer, "vThreadIDInGroup"); + vkd3d_string_buffer_printf(buffer, "vThreadIDInGroup"); break; case VKD3DSPR_LOCALTHREADINDEX: - shader_addline(buffer, "vThreadIDInGroupFlattened"); + vkd3d_string_buffer_printf(buffer, "vThreadIDInGroupFlattened"); break; case VKD3DSPR_IDXTEMP: - shader_addline(buffer, "x"); + vkd3d_string_buffer_printf(buffer, "x"); break; case VKD3DSPR_STREAM: - shader_addline(buffer, "m"); + vkd3d_string_buffer_printf(buffer, "m"); break; case VKD3DSPR_FUNCTIONBODY: - shader_addline(buffer, "fb"); + vkd3d_string_buffer_printf(buffer, "fb"); break; case VKD3DSPR_FUNCTIONPOINTER: - shader_addline(buffer, "fp"); + vkd3d_string_buffer_printf(buffer, "fp"); break; case VKD3DSPR_COVERAGE: - shader_addline(buffer, "vCoverage"); + vkd3d_string_buffer_printf(buffer, "vCoverage"); break; case VKD3DSPR_SAMPLEMASK: - shader_addline(buffer, "oMask"); + vkd3d_string_buffer_printf(buffer, "oMask"); break; case VKD3DSPR_GSINSTID: - shader_addline(buffer, "vGSInstanceID"); + vkd3d_string_buffer_printf(buffer, "vGSInstanceID"); break; case VKD3DSPR_OUTSTENCILREF: - shader_addline(buffer, "oStencilRef"); + vkd3d_string_buffer_printf(buffer, "oStencilRef"); break; case VKD3DSPR_UNDEF: - shader_addline(buffer, "undef"); + vkd3d_string_buffer_printf(buffer, "undef"); break; case VKD3DSPR_SSA: - shader_addline(buffer, "sr"); + vkd3d_string_buffer_printf(buffer, "sr"); + break; + + case VKD3DSPR_WAVELANECOUNT: + vkd3d_string_buffer_printf(buffer, "vWaveLaneCount"); + break; + + case VKD3DSPR_WAVELANEINDEX: + vkd3d_string_buffer_printf(buffer, "vWaveLaneIndex"); break; default: - shader_addline(buffer, "", reg->type); + vkd3d_string_buffer_printf(buffer, "%s%s", + compiler->colours.error, reg->type, compiler->colours.reset); break; } @@ -1162,7 +1210,7 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const break; } - shader_addline(buffer, "%s(", compiler->colours.reset); + vkd3d_string_buffer_printf(buffer, "%s(", compiler->colours.reset); switch (reg->dimension) { case VSIR_DIMENSION_SCALAR: @@ -1183,7 +1231,8 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const shader_print_uint_literal(compiler, "", reg->u.immconst_u32[0], ""); break; default: - shader_addline(buffer, "", reg->data_type); + vkd3d_string_buffer_printf(buffer, "%s%s", + compiler->colours.error, reg->data_type, compiler->colours.reset); break; } break; @@ -1222,20 +1271,22 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const shader_print_uint_literal(compiler, ", ", reg->u.immconst_u32[3], ""); break; default: - shader_addline(buffer, "", reg->data_type); + vkd3d_string_buffer_printf(buffer, "%s%s", + compiler->colours.error, reg->data_type, compiler->colours.reset); break; } break; default: - shader_addline(buffer, "", reg->dimension); + vkd3d_string_buffer_printf(buffer, "%s%s", + compiler->colours.error, reg->dimension, compiler->colours.reset); break; } - shader_addline(buffer, ")"); + vkd3d_string_buffer_printf(buffer, ")"); } else if (reg->type == VKD3DSPR_IMMCONST64) { - shader_addline(buffer, "%s(", compiler->colours.reset); + vkd3d_string_buffer_printf(buffer, "%s(", compiler->colours.reset); /* A double2 vector is treated as a float4 vector in enum vsir_dimension. */ if (reg->dimension == VSIR_DIMENSION_SCALAR || reg->dimension == VSIR_DIMENSION_VEC4) { @@ -1253,14 +1304,16 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const } else { - shader_addline(buffer, "", reg->data_type); + vkd3d_string_buffer_printf(buffer, "%s%s", + compiler->colours.error, reg->data_type, compiler->colours.reset); } } else { - shader_addline(buffer, "", reg->dimension); + vkd3d_string_buffer_printf(buffer, "%s%s", + compiler->colours.error, reg->dimension, compiler->colours.reset); } - shader_addline(buffer, ")"); + vkd3d_string_buffer_printf(buffer, ")"); } else if (reg->type != VKD3DSPR_RASTOUT && reg->type != VKD3DSPR_MISCTYPE @@ -1304,7 +1357,7 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const } else { - shader_addline(buffer, "%s", compiler->colours.reset); + vkd3d_string_buffer_printf(buffer, "%s", compiler->colours.reset); } if (reg->type == VKD3DSPR_FUNCTIONPOINTER) @@ -1312,8 +1365,9 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const } else { - shader_addline(buffer, "%s", compiler->colours.reset); + vkd3d_string_buffer_printf(buffer, "%s", compiler->colours.reset); } + vkd3d_string_buffer_printf(buffer, "%s", suffix); } static void shader_print_precision(struct vkd3d_d3d_asm_compiler *compiler, const struct vkd3d_shader_register *reg) @@ -1357,8 +1411,8 @@ static void shader_print_non_uniform(struct vkd3d_d3d_asm_compiler *compiler, co compiler->colours.modifier, compiler->colours.reset); } -static void shader_dump_reg_type(struct vkd3d_d3d_asm_compiler *compiler, - const struct vkd3d_shader_register *reg) +static void shader_print_reg_type(struct vkd3d_d3d_asm_compiler *compiler, + const char *prefix, const struct vkd3d_shader_register *reg, const char *suffix) { static const char *dimensions[] = { @@ -1370,7 +1424,13 @@ static void shader_dump_reg_type(struct vkd3d_d3d_asm_compiler *compiler, struct vkd3d_string_buffer *buffer = &compiler->buffer; const char *dimension; - if (compiler->dialect != VSIR_ASM_VSIR) + if (!(compiler->flags & VSIR_ASM_FLAG_DUMP_TYPES)) + { + vkd3d_string_buffer_printf(buffer, "%s%s", prefix, suffix); + return; + } + + if (reg->data_type == VKD3D_DATA_UNUSED) return; if (reg->dimension < ARRAY_SIZE(dimensions)) @@ -1378,83 +1438,114 @@ static void shader_dump_reg_type(struct vkd3d_d3d_asm_compiler *compiler, else dimension = "??"; - shader_addline(buffer, " <%s", dimension); + vkd3d_string_buffer_printf(buffer, "%s <%s", prefix, dimension); shader_dump_data_type(compiler, reg->data_type); - shader_addline(buffer, ">"); + vkd3d_string_buffer_printf(buffer, ">%s", suffix); } -static void shader_dump_dst_param(struct vkd3d_d3d_asm_compiler *compiler, - const struct vkd3d_shader_dst_param *param, bool is_declaration) +static void shader_print_write_mask(struct vkd3d_d3d_asm_compiler *compiler, + const char *prefix, uint32_t mask, const char *suffix) +{ + unsigned int i = 0; + char buffer[5]; + + if (mask == 0) + { + vkd3d_string_buffer_printf(&compiler->buffer, "%s%s", prefix, suffix); + return; + } + + if (mask & VKD3DSP_WRITEMASK_0) + buffer[i++] = 'x'; + if (mask & VKD3DSP_WRITEMASK_1) + buffer[i++] = 'y'; + if (mask & VKD3DSP_WRITEMASK_2) + buffer[i++] = 'z'; + if (mask & VKD3DSP_WRITEMASK_3) + buffer[i++] = 'w'; + buffer[i++] = '\0'; + + vkd3d_string_buffer_printf(&compiler->buffer, "%s.%s%s%s%s", prefix, + compiler->colours.write_mask, buffer, compiler->colours.reset, suffix); +} + +static void shader_print_dst_param(struct vkd3d_d3d_asm_compiler *compiler, + const char *prefix, const struct vkd3d_shader_dst_param *param, bool is_declaration, const char *suffix) { - struct vkd3d_string_buffer *buffer = &compiler->buffer; uint32_t write_mask = param->write_mask; - shader_dump_register(compiler, ¶m->reg, is_declaration); + shader_print_register(compiler, prefix, ¶m->reg, is_declaration, ""); if (write_mask && param->reg.dimension == VSIR_DIMENSION_VEC4) { - static const char write_mask_chars[] = "xyzw"; - if (data_type_is_64_bit(param->reg.data_type)) write_mask = vsir_write_mask_32_from_64(write_mask); - shader_addline(buffer, ".%s", compiler->colours.write_mask); - if (write_mask & VKD3DSP_WRITEMASK_0) - shader_addline(buffer, "%c", write_mask_chars[0]); - if (write_mask & VKD3DSP_WRITEMASK_1) - shader_addline(buffer, "%c", write_mask_chars[1]); - if (write_mask & VKD3DSP_WRITEMASK_2) - shader_addline(buffer, "%c", write_mask_chars[2]); - if (write_mask & VKD3DSP_WRITEMASK_3) - shader_addline(buffer, "%c", write_mask_chars[3]); - shader_addline(buffer, "%s", compiler->colours.reset); + shader_print_write_mask(compiler, "", write_mask, ""); } shader_print_precision(compiler, ¶m->reg); shader_print_non_uniform(compiler, ¶m->reg); - shader_dump_reg_type(compiler, ¶m->reg); + shader_print_reg_type(compiler, "", ¶m->reg, suffix); } -static void shader_dump_src_param(struct vkd3d_d3d_asm_compiler *compiler, - const struct vkd3d_shader_src_param *param) +static void shader_print_src_param(struct vkd3d_d3d_asm_compiler *compiler, + const char *prefix, const struct vkd3d_shader_src_param *param, const char *suffix) { enum vkd3d_shader_src_modifier src_modifier = param->modifiers; struct vkd3d_string_buffer *buffer = &compiler->buffer; uint32_t swizzle = param->swizzle; + const char *modifier = ""; + bool is_abs = false; if (src_modifier == VKD3DSPSM_NEG || src_modifier == VKD3DSPSM_BIASNEG || src_modifier == VKD3DSPSM_SIGNNEG || src_modifier == VKD3DSPSM_X2NEG || src_modifier == VKD3DSPSM_ABSNEG) - shader_addline(buffer, "-"); + modifier = "-"; else if (src_modifier == VKD3DSPSM_COMP) - shader_addline(buffer, "1-"); + modifier = "1-"; else if (src_modifier == VKD3DSPSM_NOT) - shader_addline(buffer, "!"); + modifier = "!"; + vkd3d_string_buffer_printf(buffer, "%s%s", prefix, modifier); if (src_modifier == VKD3DSPSM_ABS || src_modifier == VKD3DSPSM_ABSNEG) - shader_addline(buffer, "|"); + is_abs = true; - shader_dump_register(compiler, ¶m->reg, false); + shader_print_register(compiler, is_abs ? "|" : "", ¶m->reg, false, ""); switch (src_modifier) { - case VKD3DSPSM_NONE: break; - case VKD3DSPSM_NEG: break; - case VKD3DSPSM_NOT: break; - case VKD3DSPSM_BIAS: shader_addline(buffer, "_bias"); break; - case VKD3DSPSM_BIASNEG: shader_addline(buffer, "_bias"); break; - case VKD3DSPSM_SIGN: shader_addline(buffer, "_bx2"); break; - case VKD3DSPSM_SIGNNEG: shader_addline(buffer, "_bx2"); break; - case VKD3DSPSM_COMP: break; - case VKD3DSPSM_X2: shader_addline(buffer, "_x2"); break; - case VKD3DSPSM_X2NEG: shader_addline(buffer, "_x2"); break; - case VKD3DSPSM_DZ: shader_addline(buffer, "_dz"); break; - case VKD3DSPSM_DW: shader_addline(buffer, "_dw"); break; + case VKD3DSPSM_NONE: + case VKD3DSPSM_NEG: + case VKD3DSPSM_COMP: + case VKD3DSPSM_ABS: case VKD3DSPSM_ABSNEG: - case VKD3DSPSM_ABS: /* handled later */ break; - default: shader_addline(buffer, "_unknown_modifier(%#x)", src_modifier); + case VKD3DSPSM_NOT: + break; + case VKD3DSPSM_BIAS: + case VKD3DSPSM_BIASNEG: + vkd3d_string_buffer_printf(buffer, "_bias"); + break; + case VKD3DSPSM_SIGN: + case VKD3DSPSM_SIGNNEG: + vkd3d_string_buffer_printf(buffer, "_bx2"); + break; + case VKD3DSPSM_X2: + case VKD3DSPSM_X2NEG: + vkd3d_string_buffer_printf(buffer, "_x2"); + break; + case VKD3DSPSM_DZ: + vkd3d_string_buffer_printf(buffer, "_dz"); + break; + case VKD3DSPSM_DW: + vkd3d_string_buffer_printf(buffer, "_dw"); + break; + default: + vkd3d_string_buffer_printf(buffer, "_%s%s", + compiler->colours.error, src_modifier, compiler->colours.reset); + break; } if (param->reg.type != VKD3DSPR_IMMCONST && param->reg.type != VKD3DSPR_IMMCONST64 @@ -1472,26 +1563,21 @@ static void shader_dump_src_param(struct vkd3d_d3d_asm_compiler *compiler, swizzle_z = vsir_swizzle_get_component(swizzle, 2); swizzle_w = vsir_swizzle_get_component(swizzle, 3); - if (swizzle_x == swizzle_y - && swizzle_x == swizzle_z - && swizzle_x == swizzle_w) - { - shader_addline(buffer, ".%s%c%s", compiler->colours.swizzle, + if (swizzle_x == swizzle_y && swizzle_x == swizzle_z && swizzle_x == swizzle_w) + vkd3d_string_buffer_printf(buffer, ".%s%c%s", compiler->colours.swizzle, swizzle_chars[swizzle_x], compiler->colours.reset); - } else - { - shader_addline(buffer, ".%s%c%c%c%c%s", compiler->colours.swizzle, + vkd3d_string_buffer_printf(buffer, ".%s%c%c%c%c%s", compiler->colours.swizzle, swizzle_chars[swizzle_x], swizzle_chars[swizzle_y], swizzle_chars[swizzle_z], swizzle_chars[swizzle_w], compiler->colours.reset); - } } - if (src_modifier == VKD3DSPSM_ABS || src_modifier == VKD3DSPSM_ABSNEG) - shader_addline(buffer, "|"); + + if (is_abs) + vkd3d_string_buffer_printf(buffer, "|"); shader_print_precision(compiler, ¶m->reg); shader_print_non_uniform(compiler, ¶m->reg); - shader_dump_reg_type(compiler, ¶m->reg); + shader_print_reg_type(compiler, "", ¶m->reg, suffix); } static void shader_dump_ins_modifiers(struct vkd3d_d3d_asm_compiler *compiler, @@ -1502,105 +1588,129 @@ static void shader_dump_ins_modifiers(struct vkd3d_d3d_asm_compiler *compiler, switch (dst->shift) { - case 0: break; - case 13: shader_addline(buffer, "_d8"); break; - case 14: shader_addline(buffer, "_d4"); break; - case 15: shader_addline(buffer, "_d2"); break; - case 1: shader_addline(buffer, "_x2"); break; - case 2: shader_addline(buffer, "_x4"); break; - case 3: shader_addline(buffer, "_x8"); break; - default: shader_addline(buffer, "_unhandled_shift(%d)", dst->shift); break; + case 0: + break; + case 13: + vkd3d_string_buffer_printf(buffer, "_d8"); + break; + case 14: + vkd3d_string_buffer_printf(buffer, "_d4"); + break; + case 15: + vkd3d_string_buffer_printf(buffer, "_d2"); + break; + case 1: + vkd3d_string_buffer_printf(buffer, "_x2"); + break; + case 2: + vkd3d_string_buffer_printf(buffer, "_x4"); + break; + case 3: + vkd3d_string_buffer_printf(buffer, "_x8"); + break; + default: + vkd3d_string_buffer_printf(buffer, "_unhandled_shift(%d)", dst->shift); + break; } - if (mmask & VKD3DSPDM_SATURATE) shader_addline(buffer, "_sat"); - if (mmask & VKD3DSPDM_PARTIALPRECISION) shader_addline(buffer, "_pp"); - if (mmask & VKD3DSPDM_MSAMPCENTROID) shader_addline(buffer, "_centroid"); + if (mmask & VKD3DSPDM_SATURATE) + vkd3d_string_buffer_printf(buffer, "_sat"); + if (mmask & VKD3DSPDM_PARTIALPRECISION) + vkd3d_string_buffer_printf(buffer, "_pp"); + if (mmask & VKD3DSPDM_MSAMPCENTROID) + vkd3d_string_buffer_printf(buffer, "_centroid"); mmask &= ~VKD3DSPDM_MASK; if (mmask) FIXME("Unrecognised modifier %#x.\n", mmask); } -static void shader_dump_primitive_type(struct vkd3d_d3d_asm_compiler *compiler, - const struct vkd3d_shader_primitive_type *primitive_type) +static void shader_print_primitive_type(struct vkd3d_d3d_asm_compiler *compiler, + const char *prefix, const struct vkd3d_shader_primitive_type *p, const char *suffix) { struct vkd3d_string_buffer *buffer = &compiler->buffer; + const char *primitive_type; - switch (primitive_type->type) + switch (p->type) { case VKD3D_PT_UNDEFINED: - shader_addline(buffer, "undefined"); + primitive_type = "undefined"; break; case VKD3D_PT_POINTLIST: - shader_addline(buffer, "pointlist"); + primitive_type = "pointlist"; break; case VKD3D_PT_LINELIST: - shader_addline(buffer, "linelist"); + primitive_type = "linelist"; break; case VKD3D_PT_LINESTRIP: - shader_addline(buffer, "linestrip"); + primitive_type = "linestrip"; break; case VKD3D_PT_TRIANGLELIST: - shader_addline(buffer, "trianglelist"); + primitive_type = "trianglelist"; break; case VKD3D_PT_TRIANGLESTRIP: - shader_addline(buffer, "trianglestrip"); + primitive_type = "trianglestrip"; break; case VKD3D_PT_TRIANGLEFAN: - shader_addline(buffer, "trianglefan"); + primitive_type = "trianglefan"; break; case VKD3D_PT_LINELIST_ADJ: - shader_addline(buffer, "linelist_adj"); + primitive_type = "linelist_adj"; break; case VKD3D_PT_LINESTRIP_ADJ: - shader_addline(buffer, "linestrip_adj"); + primitive_type = "linestrip_adj"; break; case VKD3D_PT_TRIANGLELIST_ADJ: - shader_addline(buffer, "trianglelist_adj"); + primitive_type = "trianglelist_adj"; break; case VKD3D_PT_TRIANGLESTRIP_ADJ: - shader_addline(buffer, "trianglestrip_adj"); + primitive_type = "trianglestrip_adj"; break; case VKD3D_PT_PATCH: - shader_addline(buffer, "patch%u", primitive_type->patch_vertex_count); - break; + vkd3d_string_buffer_printf(buffer, "%spatch%u%s", prefix, p->patch_vertex_count, suffix); + return; default: - shader_addline(buffer, "", primitive_type->type); - break; + vkd3d_string_buffer_printf(buffer, "%s%s%s%s", + prefix, compiler->colours.error, p->type, compiler->colours.reset, suffix); + return; } + vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, primitive_type, suffix); } -static void shader_dump_interpolation_mode(struct vkd3d_d3d_asm_compiler *compiler, - enum vkd3d_shader_interpolation_mode interpolation_mode) +static void shader_print_interpolation_mode(struct vkd3d_d3d_asm_compiler *compiler, + const char *prefix, enum vkd3d_shader_interpolation_mode m, const char *suffix) { struct vkd3d_string_buffer *buffer = &compiler->buffer; + const char *mode; - switch (interpolation_mode) + switch (m) { case VKD3DSIM_CONSTANT: - shader_addline(buffer, "constant"); + mode = "constant"; break; case VKD3DSIM_LINEAR: - shader_addline(buffer, "linear"); + mode = "linear"; break; case VKD3DSIM_LINEAR_CENTROID: - shader_addline(buffer, "linear centroid"); + mode = "linear centroid"; break; case VKD3DSIM_LINEAR_NOPERSPECTIVE: - shader_addline(buffer, "linear noperspective"); + mode = "linear noperspective"; break; case VKD3DSIM_LINEAR_SAMPLE: - shader_addline(buffer, "linear sample"); + mode = "linear sample"; break; case VKD3DSIM_LINEAR_NOPERSPECTIVE_CENTROID: - shader_addline(buffer, "linear noperspective centroid"); + mode = "linear noperspective centroid"; break; case VKD3DSIM_LINEAR_NOPERSPECTIVE_SAMPLE: - shader_addline(buffer, "linear noperspective sample"); + mode = "linear noperspective sample"; break; default: - shader_addline(buffer, "", interpolation_mode); - break; + vkd3d_string_buffer_printf(buffer, "%s%s%s%s", + prefix, compiler->colours.error, m, compiler->colours.reset, suffix); + return; } + vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, mode, suffix); } const char *shader_get_type_prefix(enum vkd3d_shader_type type) @@ -1654,9 +1764,15 @@ static void shader_dump_instruction_flags(struct vkd3d_d3d_asm_compiler *compile case VKD3DSIH_RETP: switch (ins->flags) { - case VKD3D_SHADER_CONDITIONAL_OP_NZ: shader_addline(buffer, "_nz"); break; - case VKD3D_SHADER_CONDITIONAL_OP_Z: shader_addline(buffer, "_z"); break; - default: shader_addline(buffer, "_unrecognized(%#x)", ins->flags); break; + case VKD3D_SHADER_CONDITIONAL_OP_NZ: + vkd3d_string_buffer_printf(buffer, "_nz"); + break; + case VKD3D_SHADER_CONDITIONAL_OP_Z: + vkd3d_string_buffer_printf(buffer, "_z"); + break; + default: + vkd3d_string_buffer_printf(buffer, "_unrecognized(%#x)", ins->flags); + break; } break; @@ -1664,49 +1780,99 @@ static void shader_dump_instruction_flags(struct vkd3d_d3d_asm_compiler *compile case VKD3DSIH_BREAKC: switch (ins->flags) { - case VKD3D_SHADER_REL_OP_GT: shader_addline(buffer, "_gt"); break; - case VKD3D_SHADER_REL_OP_EQ: shader_addline(buffer, "_eq"); break; - case VKD3D_SHADER_REL_OP_GE: shader_addline(buffer, "_ge"); break; - case VKD3D_SHADER_REL_OP_LT: shader_addline(buffer, "_lt"); break; - case VKD3D_SHADER_REL_OP_NE: shader_addline(buffer, "_ne"); break; - case VKD3D_SHADER_REL_OP_LE: shader_addline(buffer, "_le"); break; - default: shader_addline(buffer, "_(%u)", ins->flags); + case VKD3D_SHADER_REL_OP_GT: + vkd3d_string_buffer_printf(buffer, "_gt"); + break; + case VKD3D_SHADER_REL_OP_EQ: + vkd3d_string_buffer_printf(buffer, "_eq"); + break; + case VKD3D_SHADER_REL_OP_GE: + vkd3d_string_buffer_printf(buffer, "_ge"); + break; + case VKD3D_SHADER_REL_OP_LT: + vkd3d_string_buffer_printf(buffer, "_lt"); + break; + case VKD3D_SHADER_REL_OP_NE: + vkd3d_string_buffer_printf(buffer, "_ne"); + break; + case VKD3D_SHADER_REL_OP_LE: + vkd3d_string_buffer_printf(buffer, "_le"); + break; + default: + vkd3d_string_buffer_printf(buffer, "_(%u)", ins->flags); + break; } break; case VKD3DSIH_RESINFO: switch (ins->flags) { - case VKD3DSI_NONE: break; - case VKD3DSI_RESINFO_RCP_FLOAT: shader_addline(buffer, "_rcpFloat"); break; - case VKD3DSI_RESINFO_UINT: shader_addline(buffer, "_uint"); break; - default: shader_addline(buffer, "_unrecognized(%#x)", ins->flags); + case VKD3DSI_NONE: + break; + case VKD3DSI_RESINFO_RCP_FLOAT: + vkd3d_string_buffer_printf(buffer, "_rcpFloat"); + break; + case VKD3DSI_RESINFO_UINT: + vkd3d_string_buffer_printf(buffer, "_uint"); + break; + default: + vkd3d_string_buffer_printf(buffer, "_unrecognized(%#x)", ins->flags); + break; } break; case VKD3DSIH_SAMPLE_INFO: switch (ins->flags) { - case VKD3DSI_NONE: break; - case VKD3DSI_SAMPLE_INFO_UINT: shader_addline(buffer, "_uint"); break; - default: shader_addline(buffer, "_unrecognized(%#x)", ins->flags); + case VKD3DSI_NONE: + break; + case VKD3DSI_SAMPLE_INFO_UINT: + vkd3d_string_buffer_printf(buffer, "_uint"); + break; + default: + vkd3d_string_buffer_printf(buffer, "_unrecognized(%#x)", ins->flags); + break; } break; + case VKD3DSIH_IMM_ATOMIC_CMP_EXCH: + case VKD3DSIH_IMM_ATOMIC_IADD: + case VKD3DSIH_IMM_ATOMIC_AND: + case VKD3DSIH_IMM_ATOMIC_IMAX: + case VKD3DSIH_IMM_ATOMIC_IMIN: + case VKD3DSIH_IMM_ATOMIC_OR: + case VKD3DSIH_IMM_ATOMIC_UMAX: + case VKD3DSIH_IMM_ATOMIC_UMIN: + case VKD3DSIH_IMM_ATOMIC_EXCH: + case VKD3DSIH_IMM_ATOMIC_XOR: + shader_dump_atomic_op_flags(compiler, ins->flags); + break; + case VKD3DSIH_SYNC: shader_dump_sync_flags(compiler, ins->flags); break; case VKD3DSIH_TEX: if (vkd3d_shader_ver_ge(&compiler->shader_version, 2, 0) && (ins->flags & VKD3DSI_TEXLD_PROJECT)) - shader_addline(buffer, "p"); + vkd3d_string_buffer_printf(buffer, "p"); + break; + + case VKD3DSIH_WAVE_OP_ADD: + case VKD3DSIH_WAVE_OP_IMAX: + case VKD3DSIH_WAVE_OP_IMIN: + case VKD3DSIH_WAVE_OP_MAX: + case VKD3DSIH_WAVE_OP_MIN: + case VKD3DSIH_WAVE_OP_MUL: + case VKD3DSIH_WAVE_OP_UMAX: + case VKD3DSIH_WAVE_OP_UMIN: + vkd3d_string_buffer_printf(&compiler->buffer, (ins->flags & VKD3DSI_WAVE_PREFIX) ? "_prefix" : "_active"); break; case VKD3DSIH_ISHL: case VKD3DSIH_ISHR: case VKD3DSIH_USHR: if (ins->flags & VKD3DSI_SHIFT_UNMASKED) - shader_addline(buffer, "_unmasked"); + vkd3d_string_buffer_printf(buffer, "_unmasked"); /* fall through */ default: shader_dump_precise_flags(compiler, ins->flags); @@ -1753,7 +1919,7 @@ static void shader_dump_icb(struct vkd3d_d3d_asm_compiler *compiler, shader_print_hex_literal(compiler, ", ", icb->data[4 * i + 3], "},\n"); } } - shader_addline(buffer, "}"); + vkd3d_string_buffer_printf(buffer, "}"); } static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, @@ -1765,11 +1931,7 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, compiler->current = ins; if (ins->predicate) - { - vkd3d_string_buffer_printf(buffer, "("); - shader_dump_src_param(compiler, ins->predicate); - vkd3d_string_buffer_printf(buffer, ") "); - } + shader_print_src_param(compiler, "(", ins->predicate, ") "); /* PixWin marks instructions with the coissue flag with a '+' */ if (ins->coissue) @@ -1782,21 +1944,20 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, case VKD3DSIH_DCL: case VKD3DSIH_DCL_UAV_TYPED: vkd3d_string_buffer_printf(buffer, "%s", compiler->colours.opcode); - shader_dump_decl_usage(compiler, &ins->declaration.semantic, ins->flags); + shader_print_dcl_usage(compiler, "_", &ins->declaration.semantic, ins->flags, ""); shader_dump_ins_modifiers(compiler, &ins->declaration.semantic.resource.reg); - vkd3d_string_buffer_printf(buffer, "%s ", compiler->colours.reset); - shader_dump_register(compiler, &ins->declaration.semantic.resource.reg.reg, true); + vkd3d_string_buffer_printf(buffer, "%s", compiler->colours.reset); + shader_print_register(compiler, " ", &ins->declaration.semantic.resource.reg.reg, true, ""); shader_dump_register_space(compiler, ins->declaration.semantic.resource.range.space); break; case VKD3DSIH_DCL_CONSTANT_BUFFER: - vkd3d_string_buffer_printf(buffer, " "); - shader_dump_register(compiler, &ins->declaration.cb.src.reg, true); + shader_print_register(compiler, " ", &ins->declaration.cb.src.reg, true, ""); if (vkd3d_shader_ver_ge(&compiler->shader_version, 6, 0)) shader_print_subscript(compiler, ins->declaration.cb.size, NULL); else if (vkd3d_shader_ver_ge(&compiler->shader_version, 5, 1)) shader_print_subscript(compiler, ins->declaration.cb.size / VKD3D_VEC4_SIZE / sizeof(float), NULL); - shader_addline(buffer, ", %s", + vkd3d_string_buffer_printf(buffer, ", %s", ins->flags & VKD3DSI_INDEXED_DYNAMIC ? "dynamicIndexed" : "immediateIndexed"); shader_dump_register_space(compiler, ins->declaration.cb.range.space); break; @@ -1823,8 +1984,7 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, break; case VKD3DSIH_DCL_INDEX_RANGE: - vkd3d_string_buffer_printf(buffer, " "); - shader_dump_dst_param(compiler, &ins->declaration.index_range.dst, true); + shader_print_dst_param(compiler, " ", &ins->declaration.index_range.dst, true, ""); shader_print_uint_literal(compiler, " ", ins->declaration.index_range.register_count, ""); break; @@ -1840,41 +2000,32 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, break; case VKD3DSIH_DCL_INPUT_PS: - vkd3d_string_buffer_printf(buffer, " "); - shader_dump_interpolation_mode(compiler, ins->flags); - shader_addline(buffer, " "); - shader_dump_dst_param(compiler, &ins->declaration.dst, true); + shader_print_interpolation_mode(compiler, " ", ins->flags, ""); + shader_print_dst_param(compiler, " ", &ins->declaration.dst, true, ""); break; case VKD3DSIH_DCL_INPUT_PS_SGV: case VKD3DSIH_DCL_INPUT_SGV: case VKD3DSIH_DCL_INPUT_SIV: case VKD3DSIH_DCL_OUTPUT_SIV: - vkd3d_string_buffer_printf(buffer, " "); - shader_dump_dst_param(compiler, &ins->declaration.register_semantic.reg, true); - shader_addline(buffer, ", "); - shader_dump_shader_input_sysval_semantic(compiler, ins->declaration.register_semantic.sysval_semantic); + shader_print_dst_param(compiler, " ", &ins->declaration.register_semantic.reg, true, ""); + shader_print_input_sysval_semantic(compiler, ", ", ins->declaration.register_semantic.sysval_semantic, ""); break; case VKD3DSIH_DCL_INPUT_PS_SIV: - vkd3d_string_buffer_printf(buffer, " "); - shader_dump_interpolation_mode(compiler, ins->flags); - shader_addline(buffer, " "); - shader_dump_dst_param(compiler, &ins->declaration.register_semantic.reg, true); - shader_addline(buffer, ", "); - shader_dump_shader_input_sysval_semantic(compiler, ins->declaration.register_semantic.sysval_semantic); + shader_print_interpolation_mode(compiler, " ", ins->flags, ""); + shader_print_dst_param(compiler, " ", &ins->declaration.register_semantic.reg, true, ""); + shader_print_input_sysval_semantic(compiler, ", ", ins->declaration.register_semantic.sysval_semantic, ""); break; case VKD3DSIH_DCL_INPUT: case VKD3DSIH_DCL_OUTPUT: - vkd3d_string_buffer_printf(buffer, " "); - shader_dump_dst_param(compiler, &ins->declaration.dst, true); + shader_print_dst_param(compiler, " ", &ins->declaration.dst, true, ""); break; case VKD3DSIH_DCL_INPUT_PRIMITIVE: case VKD3DSIH_DCL_OUTPUT_TOPOLOGY: - vkd3d_string_buffer_printf(buffer, " "); - shader_dump_primitive_type(compiler, &ins->declaration.primitive_type); + shader_print_primitive_type(compiler, " ", &ins->declaration.primitive_type, ""); break; case VKD3DSIH_DCL_INTERFACE: @@ -1885,23 +2036,19 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, break; case VKD3DSIH_DCL_RESOURCE_RAW: - vkd3d_string_buffer_printf(buffer, " "); - shader_dump_dst_param(compiler, &ins->declaration.raw_resource.resource.reg, true); + shader_print_dst_param(compiler, " ", &ins->declaration.raw_resource.resource.reg, true, ""); shader_dump_register_space(compiler, ins->declaration.raw_resource.resource.range.space); break; case VKD3DSIH_DCL_RESOURCE_STRUCTURED: - vkd3d_string_buffer_printf(buffer, " "); - shader_dump_dst_param(compiler, &ins->declaration.structured_resource.resource.reg, true); + shader_print_dst_param(compiler, " ", &ins->declaration.structured_resource.resource.reg, true, ""); shader_print_uint_literal(compiler, ", ", ins->declaration.structured_resource.byte_stride, ""); shader_dump_register_space(compiler, ins->declaration.structured_resource.resource.range.space); break; case VKD3DSIH_DCL_SAMPLER: - vkd3d_string_buffer_printf(buffer, " "); - shader_dump_register(compiler, &ins->declaration.sampler.src.reg, true); - if (ins->flags == VKD3DSI_SAMPLER_COMPARISON_MODE) - shader_addline(buffer, ", comparisonMode"); + shader_print_register(compiler, " ", &ins->declaration.sampler.src.reg, true, + ins->flags == VKD3DSI_SAMPLER_COMPARISON_MODE ? ", comparisonMode" : ""); shader_dump_register_space(compiler, ins->declaration.sampler.range.space); break; @@ -1916,29 +2063,24 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, break; case VKD3DSIH_DCL_TESSELLATOR_DOMAIN: - vkd3d_string_buffer_printf(buffer, " "); - shader_dump_tessellator_domain(compiler, ins->declaration.tessellator_domain); + shader_print_tessellator_domain(compiler, " ", ins->declaration.tessellator_domain, ""); break; case VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE: - vkd3d_string_buffer_printf(buffer, " "); - shader_dump_tessellator_output_primitive(compiler, ins->declaration.tessellator_output_primitive); + shader_print_tessellator_output_primitive(compiler, " ", ins->declaration.tessellator_output_primitive, ""); break; case VKD3DSIH_DCL_TESSELLATOR_PARTITIONING: - vkd3d_string_buffer_printf(buffer, " "); - shader_dump_tessellator_partitioning(compiler, ins->declaration.tessellator_partitioning); + shader_print_tessellator_partitioning(compiler, " ", ins->declaration.tessellator_partitioning, ""); break; case VKD3DSIH_DCL_TGSM_RAW: - vkd3d_string_buffer_printf(buffer, " "); - shader_dump_dst_param(compiler, &ins->declaration.tgsm_raw.reg, true); + shader_print_dst_param(compiler, " ", &ins->declaration.tgsm_raw.reg, true, ""); shader_print_uint_literal(compiler, ", ", ins->declaration.tgsm_raw.byte_count, ""); break; case VKD3DSIH_DCL_TGSM_STRUCTURED: - vkd3d_string_buffer_printf(buffer, " "); - shader_dump_dst_param(compiler, &ins->declaration.tgsm_structured.reg, true); + shader_print_dst_param(compiler, " ", &ins->declaration.tgsm_structured.reg, true, ""); shader_print_uint_literal(compiler, ", ", ins->declaration.tgsm_structured.byte_stride, ""); shader_print_uint_literal(compiler, ", ", ins->declaration.tgsm_structured.structure_count, ""); break; @@ -1951,15 +2093,13 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, case VKD3DSIH_DCL_UAV_RAW: shader_dump_uav_flags(compiler, ins->flags); - shader_addline(buffer, " "); - shader_dump_dst_param(compiler, &ins->declaration.raw_resource.resource.reg, true); + shader_print_dst_param(compiler, " ", &ins->declaration.raw_resource.resource.reg, true, ""); shader_dump_register_space(compiler, ins->declaration.raw_resource.resource.range.space); break; case VKD3DSIH_DCL_UAV_STRUCTURED: shader_dump_uav_flags(compiler, ins->flags); - shader_addline(buffer, " "); - shader_dump_dst_param(compiler, &ins->declaration.structured_resource.resource.reg, true); + shader_print_dst_param(compiler, " ", &ins->declaration.structured_resource.resource.reg, true, ""); shader_print_uint_literal(compiler, ", ", ins->declaration.structured_resource.byte_stride, ""); shader_dump_register_space(compiler, ins->declaration.structured_resource.resource.range.space); break; @@ -1994,7 +2134,7 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, if (ins->resource_type != VKD3D_SHADER_RESOURCE_NONE) { - shader_addline(buffer, "_indexable("); + vkd3d_string_buffer_printf(buffer, "_indexable("); if (ins->raw) vkd3d_string_buffer_printf(buffer, "raw_"); if (ins->structured) @@ -2002,7 +2142,7 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, shader_dump_resource_type(compiler, ins->resource_type); if (ins->resource_stride) shader_print_uint_literal(compiler, ", stride=", ins->resource_stride, ""); - shader_addline(buffer, ")"); + vkd3d_string_buffer_printf(buffer, ")"); } if (vkd3d_shader_instruction_has_texel_offset(ins)) @@ -2021,37 +2161,200 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, for (i = 0; i < ins->dst_count; ++i) { shader_dump_ins_modifiers(compiler, &ins->dst[i]); - shader_addline(buffer, !i ? " " : ", "); - shader_dump_dst_param(compiler, &ins->dst[i], false); + shader_print_dst_param(compiler, !i ? " " : ", ", &ins->dst[i], false, ""); } /* Other source tokens */ for (i = ins->dst_count; i < (ins->dst_count + ins->src_count); ++i) { - shader_addline(buffer, !i ? " " : ", "); - shader_dump_src_param(compiler, &ins->src[i - ins->dst_count]); + shader_print_src_param(compiler, !i ? " " : ", ", &ins->src[i - ins->dst_count], ""); } break; } - shader_addline(buffer, "\n"); + vkd3d_string_buffer_printf(buffer, "\n"); } -enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, +static const char *get_sysval_semantic_name(enum vkd3d_shader_sysval_semantic semantic) +{ + switch (semantic) + { + case VKD3D_SHADER_SV_NONE: return "NONE"; + case VKD3D_SHADER_SV_POSITION: return "POS"; + case VKD3D_SHADER_SV_CLIP_DISTANCE: return "CLIPDST"; + case VKD3D_SHADER_SV_CULL_DISTANCE: return "CULLDST"; + case VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX: return "RTINDEX"; + case VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX: return "VPINDEX"; + case VKD3D_SHADER_SV_VERTEX_ID: return "VERTID"; + case VKD3D_SHADER_SV_PRIMITIVE_ID: return "PRIMID"; + case VKD3D_SHADER_SV_INSTANCE_ID: return "INSTID"; + case VKD3D_SHADER_SV_IS_FRONT_FACE: return "FFACE"; + case VKD3D_SHADER_SV_SAMPLE_INDEX: return "SAMPLE"; + case VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE: return "QUADEDGE"; + case VKD3D_SHADER_SV_TESS_FACTOR_QUADINT: return "QUADINT"; + case VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE: return "TRIEDGE"; + case VKD3D_SHADER_SV_TESS_FACTOR_TRIINT: return "TRIINT"; + case VKD3D_SHADER_SV_TESS_FACTOR_LINEDET: return "LINEDET"; + case VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN: return "LINEDEN"; + case VKD3D_SHADER_SV_TARGET: return "TARGET"; + case VKD3D_SHADER_SV_DEPTH: return "DEPTH"; + case VKD3D_SHADER_SV_COVERAGE: return "COVERAGE"; + case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: return "DEPTHGE"; + case VKD3D_SHADER_SV_DEPTH_LESS_EQUAL: return "DEPTHLE"; + case VKD3D_SHADER_SV_STENCIL_REF: return "STENCILREF"; + default: return "??"; + } +} + +static const char *get_component_type_name(enum vkd3d_shader_component_type type) +{ + switch (type) + { + case VKD3D_SHADER_COMPONENT_VOID: return "void"; + case VKD3D_SHADER_COMPONENT_UINT: return "uint"; + case VKD3D_SHADER_COMPONENT_INT: return "int"; + case VKD3D_SHADER_COMPONENT_FLOAT: return "float"; + case VKD3D_SHADER_COMPONENT_BOOL: return "bool"; + case VKD3D_SHADER_COMPONENT_DOUBLE: return "double"; + case VKD3D_SHADER_COMPONENT_UINT64: return "uint64"; + default: return "??"; + } +} + +static const char *get_minimum_precision_name(enum vkd3d_shader_minimum_precision prec) +{ + switch (prec) + { + case VKD3D_SHADER_MINIMUM_PRECISION_NONE: return "NONE"; + case VKD3D_SHADER_MINIMUM_PRECISION_FLOAT_16: return "FLOAT_16"; + case VKD3D_SHADER_MINIMUM_PRECISION_FIXED_8_2: return "FIXED_8_2"; + case VKD3D_SHADER_MINIMUM_PRECISION_INT_16: return "INT_16"; + case VKD3D_SHADER_MINIMUM_PRECISION_UINT_16: return "UINT_16"; + default: return "??"; + } +} + +static const char *get_semantic_register_name(enum vkd3d_shader_sysval_semantic semantic) +{ + switch (semantic) + { + case VKD3D_SHADER_SV_DEPTH: return "oDepth"; + case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: return "oDepthGE"; + case VKD3D_SHADER_SV_DEPTH_LESS_EQUAL: return "oDepthLE"; + /* SV_Coverage has name vCoverage when used as an input, + * but it doens't appear in the signature in that case. */ + case VKD3D_SHADER_SV_COVERAGE: return "oMask"; + case VKD3D_SHADER_SV_STENCIL_REF: return "oStencilRef"; + default: return "??"; + } +} + +static enum vkd3d_result dump_signature(struct vkd3d_d3d_asm_compiler *compiler, + const char *name, const char *register_name, const struct shader_signature *signature) +{ + struct vkd3d_string_buffer *buffer = &compiler->buffer; + unsigned int i; + + if (signature->element_count == 0) + return VKD3D_OK; + + vkd3d_string_buffer_printf(buffer, "%s%s%s\n", + compiler->colours.opcode, name, compiler->colours.reset); + + for (i = 0; i < signature->element_count; ++i) + { + struct signature_element *element = &signature->elements[i]; + + vkd3d_string_buffer_printf(buffer, "%s.param%s %s", compiler->colours.opcode, + compiler->colours.reset, element->semantic_name); + + if (element->semantic_index != 0) + vkd3d_string_buffer_printf(buffer, "%u", element->semantic_index); + + if (element->register_index != -1) + { + shader_print_write_mask(compiler, "", element->mask, ""); + vkd3d_string_buffer_printf(buffer, ", %s%s%d%s", compiler->colours.reg, + register_name, element->register_index, compiler->colours.reset); + shader_print_write_mask(compiler, "", element->used_mask, ""); + } + else + { + vkd3d_string_buffer_printf(buffer, ", %s%s%s", compiler->colours.reg, + get_semantic_register_name(element->sysval_semantic), compiler->colours.reset); + } + + if (!element->component_type && !element->sysval_semantic + && !element->min_precision && !element->stream_index) + goto done; + + vkd3d_string_buffer_printf(buffer, ", %s", + get_component_type_name(element->component_type)); + + if (!element->sysval_semantic && !element->min_precision && !element->stream_index) + goto done; + + vkd3d_string_buffer_printf(buffer, ", %s", + get_sysval_semantic_name(element->sysval_semantic)); + + if (!element->min_precision && !element->stream_index) + goto done; + + vkd3d_string_buffer_printf(buffer, ", %s", + get_minimum_precision_name(element->min_precision)); + + if (!element->stream_index) + goto done; + + vkd3d_string_buffer_printf(buffer, ", m%u", + element->stream_index); + + done: + vkd3d_string_buffer_printf(buffer, "\n"); + } + + return VKD3D_OK; +} + +static enum vkd3d_result dump_signatures(struct vkd3d_d3d_asm_compiler *compiler, + const struct vsir_program *program) +{ + enum vkd3d_result ret; + + if ((ret = dump_signature(compiler, ".input", + program->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN ? "vicp" : "v", + &program->input_signature)) < 0) + return ret; + + if ((ret = dump_signature(compiler, ".output", "o", + &program->output_signature)) < 0) + return ret; + + if ((ret = dump_signature(compiler, ".patch_constant", + program->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN ? "vpc" : "o", + &program->patch_constant_signature)) < 0) + return ret; + + vkd3d_string_buffer_printf(&compiler->buffer, "%s.text%s\n", + compiler->colours.opcode, compiler->colours.reset); + + return VKD3D_OK; +} + +enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_code *out, enum vsir_asm_dialect dialect) + struct vkd3d_shader_code *out, enum vsir_asm_flags flags) { const struct vkd3d_shader_version *shader_version = &program->shader_version; enum vkd3d_shader_compile_option_formatting_flags formatting; struct vkd3d_d3d_asm_compiler compiler = { - .dialect = dialect, + .flags = flags, }; enum vkd3d_result result = VKD3D_OK; struct vkd3d_string_buffer *buffer; unsigned int indent, i, j; const char *indent_str; - void *code; static const struct vkd3d_d3d_asm_colours no_colours = { @@ -2064,6 +2367,7 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, .swizzle = "", .version = "", .write_mask = "", + .label = "", }; static const struct vkd3d_d3d_asm_colours colours = { @@ -2076,6 +2380,7 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, .swizzle = "\x1b[93m", .version = "\x1b[36m", .write_mask = "\x1b[93m", + .label = "\x1b[91m", }; formatting = VKD3D_SHADER_COMPILE_OPTION_FORMATTING_INDENT @@ -2109,6 +2414,17 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, shader_get_type_prefix(shader_version->type), shader_version->major, shader_version->minor, compiler.colours.reset); + /* The signatures we emit only make sense for DXBC shaders. D3DBC + * doesn't even have an explicit concept of signature. */ + if (formatting & VKD3D_SHADER_COMPILE_OPTION_FORMATTING_IO_SIGNATURES && shader_version->major >= 4) + { + if ((result = dump_signatures(&compiler, program)) < 0) + { + vkd3d_string_buffer_cleanup(buffer); + return result; + } + } + indent = 0; for (i = 0; i < program->instructions.count; ++i) { @@ -2124,6 +2440,14 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, --indent; break; + case VKD3DSIH_LABEL: + case VKD3DSIH_HS_DECLS: + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_HS_JOIN_PHASE: + indent = 0; + break; + default: break; } @@ -2142,6 +2466,7 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, case VKD3DSIH_IFC: case VKD3DSIH_LOOP: case VKD3DSIH_SWITCH: + case VKD3DSIH_LABEL: ++indent; break; @@ -2150,18 +2475,7 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, } } - if ((code = vkd3d_malloc(buffer->content_size))) - { - memcpy(code, buffer->buffer, buffer->content_size); - out->size = buffer->content_size; - out->code = code; - } - else - { - result = VKD3D_ERROR_OUT_OF_MEMORY; - } - - vkd3d_string_buffer_cleanup(buffer); + vkd3d_shader_code_from_string_buffer(out, buffer); return result; } @@ -2171,7 +2485,7 @@ void vkd3d_shader_trace(const struct vsir_program *program) const char *p, *q, *end; struct vkd3d_shader_code code; - if (vkd3d_dxbc_binary_to_text(program, NULL, &code, VSIR_ASM_VSIR) != VKD3D_OK) + if (d3d_asm_compile(program, NULL, &code, VSIR_ASM_FLAG_DUMP_TYPES) != VKD3D_OK) return; end = (const char *)code.code + code.size; diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c index 27f5c810436..bfd5b52b436 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c @@ -215,8 +215,12 @@ struct vkd3d_shader_sm1_parser struct vkd3d_shader_parser p; + struct + { #define MAX_CONSTANT_COUNT 8192 - uint32_t constant_def_mask[3][MAX_CONSTANT_COUNT / 32]; + uint32_t def_mask[VKD3D_BITMAP_SIZE(MAX_CONSTANT_COUNT)]; + uint32_t count; + } constants[3]; }; /* This table is not order or position dependent. */ @@ -392,11 +396,6 @@ static const enum vkd3d_shader_resource_type resource_type_table[] = /* VKD3D_SM1_RESOURCE_TEXTURE_3D */ VKD3D_SHADER_RESOURCE_TEXTURE_3D, }; -static struct vkd3d_shader_sm1_parser *vkd3d_shader_sm1_parser(struct vkd3d_shader_parser *parser) -{ - return CONTAINING_RECORD(parser, struct vkd3d_shader_sm1_parser, p); -} - static uint32_t read_u32(const uint32_t **ptr) { return *(*ptr)++; @@ -414,7 +413,7 @@ static bool has_relative_address(uint32_t param) static const struct vkd3d_sm1_opcode_info *shader_sm1_get_opcode_info( const struct vkd3d_shader_sm1_parser *sm1, enum vkd3d_sm1_opcode opcode) { - const struct vkd3d_shader_version *version = &sm1->p.program.shader_version; + const struct vkd3d_shader_version *version = &sm1->p.program->shader_version; const struct vkd3d_sm1_opcode_info *info; unsigned int i = 0; @@ -537,13 +536,14 @@ static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool outp const char *name, unsigned int index, enum vkd3d_shader_sysval_semantic sysval, unsigned int register_index, bool is_dcl, unsigned int mask) { + struct vsir_program *program = sm1->p.program; struct shader_signature *signature; struct signature_element *element; if (output) - signature = &sm1->p.shader_desc.output_signature; + signature = &program->output_signature; else - signature = &sm1->p.shader_desc.input_signature; + signature = &program->input_signature; if ((element = find_signature_element(signature, name, index))) { @@ -559,7 +559,8 @@ static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool outp element = &signature->elements[signature->element_count++]; memset(element, 0, sizeof(*element)); - element->semantic_name = name; + if (!(element->semantic_name = vkd3d_strdup(name))) + return false; element->semantic_index = index; element->sysval_semantic = sysval; element->component_type = VKD3D_SHADER_COMPONENT_FLOAT; @@ -568,7 +569,7 @@ static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool outp element->register_count = 1; element->mask = mask; element->used_mask = is_dcl ? 0 : mask; - if (sm1->p.program.shader_version.type == VKD3D_SHADER_TYPE_PIXEL && !output) + if (program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL && !output) element->interpolation_mode = VKD3DSIM_LINEAR; return true; @@ -577,13 +578,14 @@ static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool outp static void add_signature_mask(struct vkd3d_shader_sm1_parser *sm1, bool output, unsigned int register_index, unsigned int mask) { + struct vsir_program *program = sm1->p.program; struct shader_signature *signature; struct signature_element *element; if (output) - signature = &sm1->p.shader_desc.output_signature; + signature = &program->output_signature; else - signature = &sm1->p.shader_desc.input_signature; + signature = &program->input_signature; if (!(element = find_signature_element_by_register_index(signature, register_index))) { @@ -598,7 +600,7 @@ static void add_signature_mask(struct vkd3d_shader_sm1_parser *sm1, bool output, static bool add_signature_element_from_register(struct vkd3d_shader_sm1_parser *sm1, const struct vkd3d_shader_register *reg, bool is_dcl, unsigned int mask) { - const struct vkd3d_shader_version *version = &sm1->p.program.shader_version; + const struct vkd3d_shader_version *version = &sm1->p.program->shader_version; unsigned int register_index = reg->idx[0].offset; switch (reg->type) @@ -701,7 +703,7 @@ static bool add_signature_element_from_register(struct vkd3d_shader_sm1_parser * static bool add_signature_element_from_semantic(struct vkd3d_shader_sm1_parser *sm1, const struct vkd3d_shader_semantic *semantic) { - const struct vkd3d_shader_version *version = &sm1->p.program.shader_version; + const struct vkd3d_shader_version *version = &sm1->p.program->shader_version; const struct vkd3d_shader_register *reg = &semantic->resource.reg.reg; enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE; unsigned int mask = semantic->resource.reg.write_mask; @@ -750,22 +752,20 @@ static bool add_signature_element_from_semantic(struct vkd3d_shader_sm1_parser * static void record_constant_register(struct vkd3d_shader_sm1_parser *sm1, enum vkd3d_shader_d3dbc_constant_register set, uint32_t index, bool from_def) { - struct vkd3d_shader_desc *desc = &sm1->p.shader_desc; - - desc->flat_constant_count[set].used = max(desc->flat_constant_count[set].used, index + 1); + sm1->constants[set].count = max(sm1->constants[set].count, index + 1); if (from_def) { /* d3d shaders have a maximum of 8192 constants; we should not overrun * this array. */ - assert((index / 32) <= ARRAY_SIZE(sm1->constant_def_mask[set])); - bitmap_set(sm1->constant_def_mask[set], index); + assert((index / 32) <= ARRAY_SIZE(sm1->constants[set].def_mask)); + bitmap_set(sm1->constants[set].def_mask, index); } } static void shader_sm1_scan_register(struct vkd3d_shader_sm1_parser *sm1, const struct vkd3d_shader_register *reg, unsigned int mask, bool from_def) { - struct vsir_program *program = &sm1->p.program; + struct vsir_program *program = sm1->p.program; uint32_t register_index = reg->idx[0].offset; switch (reg->type) @@ -826,7 +826,7 @@ static void shader_sm1_read_param(struct vkd3d_shader_sm1_parser *sm1, * VS >= 2.0 have relative addressing (with token) * VS >= 1.0 < 2.0 have relative addressing (without token) * The version check below should work in general. */ - if (sm1->p.program.shader_version.major < 2) + if (sm1->p.program->shader_version.major < 2) { *addr_token = (1u << 31) | ((VKD3DSPR_ADDR << VKD3D_SM1_REGISTER_TYPE_SHIFT2) & VKD3D_SM1_REGISTER_TYPE_MASK2) @@ -855,7 +855,7 @@ static void shader_sm1_skip_opcode(const struct vkd3d_shader_sm1_parser *sm1, co /* Version 2.0+ shaders may contain address tokens, but fortunately they * have a useful length mask - use it here. Version 1.x shaders contain no * such tokens. */ - if (sm1->p.program.shader_version.major >= 2) + if (sm1->p.program->shader_version.major >= 2) { length = (opcode_token & VKD3D_SM1_INSTRUCTION_LENGTH_MASK) >> VKD3D_SM1_INSTRUCTION_LENGTH_SHIFT; *ptr += length; @@ -881,15 +881,6 @@ static void shader_sm1_skip_opcode(const struct vkd3d_shader_sm1_parser *sm1, co *ptr += (opcode_info->dst_count + opcode_info->src_count); } -static void shader_sm1_destroy(struct vkd3d_shader_parser *parser) -{ - struct vkd3d_shader_sm1_parser *sm1 = vkd3d_shader_sm1_parser(parser); - - vsir_program_cleanup(&parser->program); - free_shader_desc(&sm1->p.shader_desc); - vkd3d_free(sm1); -} - static void shader_sm1_read_src_param(struct vkd3d_shader_sm1_parser *sm1, const uint32_t **ptr, struct vkd3d_shader_src_param *src_param) { @@ -899,7 +890,7 @@ static void shader_sm1_read_src_param(struct vkd3d_shader_sm1_parser *sm1, const shader_sm1_read_param(sm1, ptr, &token, &addr_token); if (has_relative_address(token)) { - if (!(src_rel_addr = vsir_program_get_src_params(&sm1->p.program, 1))) + if (!(src_rel_addr = vsir_program_get_src_params(sm1->p.program, 1))) { vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY, "Out of memory."); @@ -920,7 +911,7 @@ static void shader_sm1_read_dst_param(struct vkd3d_shader_sm1_parser *sm1, const shader_sm1_read_param(sm1, ptr, &token, &addr_token); if (has_relative_address(token)) { - if (!(dst_rel_addr = vsir_program_get_src_params(&sm1->p.program, 1))) + if (!(dst_rel_addr = vsir_program_get_src_params(sm1->p.program, 1))) { vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY, "Out of memory."); @@ -1089,7 +1080,7 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str { struct vkd3d_shader_src_param *src_params, *predicate; const struct vkd3d_sm1_opcode_info *opcode_info; - struct vsir_program *program = &sm1->p.program; + struct vsir_program *program = sm1->p.program; struct vkd3d_shader_dst_param *dst_param; const uint32_t **ptr = &sm1->ptr; uint32_t opcode_token; @@ -1226,18 +1217,12 @@ static bool shader_sm1_is_end(struct vkd3d_shader_sm1_parser *sm1) return false; } -const struct vkd3d_shader_parser_ops shader_sm1_parser_ops = -{ - .parser_destroy = shader_sm1_destroy, -}; - -static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, +static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) { const struct vkd3d_shader_location location = {.source_name = compile_info->source_name}; const uint32_t *code = compile_info->source.code; size_t code_size = compile_info->source.size; - struct vkd3d_shader_desc *shader_desc; struct vkd3d_shader_version version; uint16_t shader_type; size_t token_count; @@ -1287,12 +1272,10 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, sm1->end = &code[token_count]; /* Estimate instruction count to avoid reallocation in most shaders. */ - if (!vkd3d_shader_parser_init(&sm1->p, message_context, compile_info->source_name, &version, &shader_sm1_parser_ops, - code_size != ~(size_t)0 ? token_count / 4u + 4 : 16)) + if (!vsir_program_init(program, &version, code_size != ~(size_t)0 ? token_count / 4u + 4 : 16)) return VKD3D_ERROR_OUT_OF_MEMORY; - shader_desc = &sm1->p.shader_desc; - shader_desc->byte_code = code; - shader_desc->byte_code_size = code_size; + + vkd3d_shader_parser_init(&sm1->p, program, message_context, compile_info->source_name); sm1->ptr = sm1->start; return VKD3D_OK; @@ -1306,77 +1289,68 @@ static uint32_t get_external_constant_count(struct vkd3d_shader_sm1_parser *sm1, /* Find the highest constant index which is not written by a DEF * instruction. We can't (easily) use an FFZ function for this since it * needs to be limited by the highest used register index. */ - for (j = sm1->p.shader_desc.flat_constant_count[set].used; j > 0; --j) + for (j = sm1->constants[set].count; j > 0; --j) { - if (!bitmap_is_set(sm1->constant_def_mask[set], j - 1)) + if (!bitmap_is_set(sm1->constants[set].def_mask, j - 1)) return j; } return 0; } -int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) +int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags, + struct vkd3d_shader_message_context *message_context, struct vsir_program *program) { struct vkd3d_shader_instruction_array *instructions; + struct vkd3d_shader_sm1_parser sm1 = {0}; struct vkd3d_shader_instruction *ins; - struct vkd3d_shader_sm1_parser *sm1; unsigned int i; int ret; - if (!(sm1 = vkd3d_calloc(1, sizeof(*sm1)))) - { - ERR("Failed to allocate parser.\n"); - return VKD3D_ERROR_OUT_OF_MEMORY; - } - - if ((ret = shader_sm1_init(sm1, compile_info, message_context)) < 0) + if ((ret = shader_sm1_init(&sm1, program, compile_info, message_context)) < 0) { WARN("Failed to initialise shader parser, ret %d.\n", ret); - vkd3d_free(sm1); return ret; } - instructions = &sm1->p.program.instructions; - while (!shader_sm1_is_end(sm1)) + instructions = &program->instructions; + while (!shader_sm1_is_end(&sm1)) { if (!shader_instruction_array_reserve(instructions, instructions->count + 1)) { ERR("Failed to allocate instructions.\n"); - vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY, "Out of memory."); - shader_sm1_destroy(&sm1->p); + vkd3d_shader_parser_error(&sm1.p, VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY, "Out of memory."); + vsir_program_cleanup(program); return VKD3D_ERROR_OUT_OF_MEMORY; } ins = &instructions->elements[instructions->count]; - shader_sm1_read_instruction(sm1, ins); + shader_sm1_read_instruction(&sm1, ins); if (ins->handler_idx == VKD3DSIH_INVALID) { WARN("Encountered unrecognized or invalid instruction.\n"); - shader_sm1_destroy(&sm1->p); + vsir_program_cleanup(program); return VKD3D_ERROR_INVALID_SHADER; } ++instructions->count; } - for (i = 0; i < ARRAY_SIZE(sm1->p.shader_desc.flat_constant_count); ++i) - sm1->p.shader_desc.flat_constant_count[i].external = get_external_constant_count(sm1, i); + for (i = 0; i < ARRAY_SIZE(program->flat_constant_count); ++i) + program->flat_constant_count[i] = get_external_constant_count(&sm1, i); - if (!sm1->p.failed) - ret = vsir_validate(&sm1->p); + if (!sm1.p.failed) + ret = vkd3d_shader_parser_validate(&sm1.p, config_flags); - if (sm1->p.failed && ret >= 0) + if (sm1.p.failed && ret >= 0) ret = VKD3D_ERROR_INVALID_SHADER; if (ret < 0) { WARN("Failed to parse shader.\n"); - shader_sm1_destroy(&sm1->p); + vsir_program_cleanup(program); return ret; } - *parser = &sm1->p; - return ret; } @@ -1499,47 +1473,74 @@ static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, uns return D3DPS_VERSION(major, minor); } -static D3DXPARAMETER_CLASS sm1_class(const struct hlsl_type *type) +D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) { switch (type->class) { case HLSL_CLASS_ARRAY: - return sm1_class(type->e.array.type); + return hlsl_sm1_class(type->e.array.type); case HLSL_CLASS_MATRIX: assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) return D3DXPC_MATRIX_COLUMNS; else return D3DXPC_MATRIX_ROWS; - case HLSL_CLASS_OBJECT: - return D3DXPC_OBJECT; case HLSL_CLASS_SCALAR: return D3DXPC_SCALAR; case HLSL_CLASS_STRUCT: return D3DXPC_STRUCT; case HLSL_CLASS_VECTOR: return D3DXPC_VECTOR; - default: - ERR("Invalid class %#x.\n", type->class); - vkd3d_unreachable(); + case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_STRING: + case HLSL_CLASS_TEXTURE: + case HLSL_CLASS_VERTEX_SHADER: + return D3DXPC_OBJECT; + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_PASS: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_UAV: + case HLSL_CLASS_VOID: + break; } + + vkd3d_unreachable(); } -static D3DXPARAMETER_TYPE sm1_base_type(const struct hlsl_type *type) +D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type) { - switch (type->base_type) + switch (type->class) { - case HLSL_TYPE_BOOL: - return D3DXPT_BOOL; - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: - return D3DXPT_FLOAT; - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - return D3DXPT_INT; - case HLSL_TYPE_PIXELSHADER: - return D3DXPT_PIXELSHADER; - case HLSL_TYPE_SAMPLER: + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: + case HLSL_CLASS_MATRIX: + switch (type->e.numeric.type) + { + case HLSL_TYPE_BOOL: + return D3DXPT_BOOL; + /* Actually double behaves differently depending on DLL version: + * For <= 36, it maps to D3DXPT_FLOAT. + * For 37-40, it maps to zero (D3DXPT_VOID). + * For >= 41, it maps to 39, which is D3D_SVT_DOUBLE (note D3D_SVT_* + * values are mostly compatible with D3DXPT_*). + * However, the latter two cases look like bugs, and a reasonable + * application certainly wouldn't know what to do with them. + * For fx_2_0 it's always D3DXPT_FLOAT regardless of DLL version. */ + case HLSL_TYPE_DOUBLE: + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + return D3DXPT_FLOAT; + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + return D3DXPT_INT; + default: + vkd3d_unreachable(); + } + + case HLSL_CLASS_SAMPLER: switch (type->sampler_dim) { case HLSL_SAMPLER_DIM_1D: @@ -1557,9 +1558,8 @@ static D3DXPARAMETER_TYPE sm1_base_type(const struct hlsl_type *type) vkd3d_unreachable(); } break; - case HLSL_TYPE_STRING: - return D3DXPT_STRING; - case HLSL_TYPE_TEXTURE: + + case HLSL_CLASS_TEXTURE: switch (type->sampler_dim) { case HLSL_SAMPLER_DIM_1D: @@ -1577,13 +1577,33 @@ static D3DXPARAMETER_TYPE sm1_base_type(const struct hlsl_type *type) vkd3d_unreachable(); } break; - case HLSL_TYPE_VERTEXSHADER: - return D3DXPT_VERTEXSHADER; - case HLSL_TYPE_VOID: + + case HLSL_CLASS_ARRAY: + return hlsl_sm1_base_type(type->e.array.type); + + case HLSL_CLASS_STRUCT: return D3DXPT_VOID; - default: - vkd3d_unreachable(); + + case HLSL_CLASS_STRING: + return D3DXPT_STRING; + + case HLSL_CLASS_PIXEL_SHADER: + return D3DXPT_PIXELSHADER; + + case HLSL_CLASS_VERTEX_SHADER: + return D3DXPT_VERTEXSHADER; + + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_PASS: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_UAV: + case HLSL_CLASS_VOID: + break; } + + vkd3d_unreachable(); } static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type, unsigned int ctab_start) @@ -1620,7 +1640,7 @@ static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, struct hlsl_typ } } - type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm1_class(type), sm1_base_type(array_type))); + type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(hlsl_sm1_class(type), hlsl_sm1_base_type(array_type))); put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx)); put_u32(buffer, vkd3d_make_u32(array_size, field_count)); put_u32(buffer, fields_offset); @@ -1670,7 +1690,7 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe for (r = 0; r <= HLSL_REGSET_LAST; ++r) { - if (var->semantic.name || !var->regs[r].allocated) + if (var->semantic.name || !var->regs[r].allocated || !var->last_read) continue; ++uniform_count; @@ -1708,14 +1728,14 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe for (r = 0; r <= HLSL_REGSET_LAST; ++r) { - if (var->semantic.name || !var->regs[r].allocated) + if (var->semantic.name || !var->regs[r].allocated || !var->last_read) continue; put_u32(buffer, 0); /* name */ if (r == HLSL_REGSET_NUMERIC) { put_u32(buffer, vkd3d_make_u32(D3DXRS_FLOAT4, var->regs[r].id)); - put_u32(buffer, var->data_type->reg_size[r] / 4); + put_u32(buffer, var->bind_count[r]); } else { @@ -1737,7 +1757,7 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe { size_t var_offset, name_offset; - if (var->semantic.name || !var->regs[r].allocated) + if (var->semantic.name || !var->regs[r].allocated || !var->last_read) continue; var_offset = vars_start + (uniform_count * 5 * sizeof(uint32_t)); @@ -1767,6 +1787,7 @@ static uint32_t sm1_encode_register_type(D3DSHADER_PARAM_REGISTER_TYPE type) struct sm1_instruction { D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode; + unsigned int flags; struct sm1_dst_register { @@ -1806,6 +1827,8 @@ static void write_sm1_instruction(struct hlsl_ctx *ctx, struct vkd3d_bytecode_bu uint32_t token = instr->opcode; unsigned int i; + token |= VKD3D_SM1_INSTRUCTION_FLAGS_MASK & (instr->flags << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT); + if (ctx->profile->major_version > 1) token |= (instr->has_dst + instr->src_count) << D3DSI_INSTLENGTH_SHIFT; put_u32(buffer, token); @@ -1969,24 +1992,21 @@ static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b /* Narrowing casts were already lowered. */ assert(src_type->dimx == dst_type->dimx); - switch (dst_type->base_type) + switch (dst_type->e.numeric.type) { case HLSL_TYPE_HALF: case HLSL_TYPE_FLOAT: - switch (src_type->base_type) + switch (src_type->e.numeric.type) { case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - /* Integers are internally represented as floats, so no change is necessary.*/ + case HLSL_TYPE_BOOL: + /* Integrals are internally represented as floats, so no change is necessary.*/ case HLSL_TYPE_HALF: case HLSL_TYPE_FLOAT: write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); break; - case HLSL_TYPE_BOOL: - hlsl_fixme(ctx, &instr->loc, "SM1 cast from bool to float."); - break; - case HLSL_TYPE_DOUBLE: hlsl_fixme(ctx, &instr->loc, "SM1 cast from double to float."); break; @@ -1998,11 +2018,14 @@ static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - switch(src_type->base_type) + switch(src_type->e.numeric.type) { case HLSL_TYPE_HALF: case HLSL_TYPE_FLOAT: - /* A compilation pass applies a FLOOR operation to casts to int, so no change is necessary. */ + /* A compilation pass turns these into FLOOR+REINTERPRET, so we should not + * reach this case unless we are missing something. */ + hlsl_fixme(ctx, &instr->loc, "Unlowered SM1 cast from float to integer."); + break; case HLSL_TYPE_INT: case HLSL_TYPE_UINT: write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); @@ -2028,7 +2051,7 @@ static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b case HLSL_TYPE_BOOL: /* Casts to bool should have already been lowered. */ default: - hlsl_fixme(ctx, &expr->node.loc, "SM1 cast from %s to %s.\n", + hlsl_fixme(ctx, &expr->node.loc, "SM1 cast from %s to %s.", debug_hlsl_type(ctx, src_type), debug_hlsl_type(ctx, dst_type)); break; } @@ -2067,6 +2090,9 @@ static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b D3DDECLUSAGE usage; bool ret; + if ((!output && !var->last_read) || (output && !var->first_write)) + return; + if (hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, ®.type, ®.reg)) { usage = 0; @@ -2242,13 +2268,19 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b assert(instr->reg.allocated); + if (expr->op == HLSL_OP1_REINTERPRET) + { + write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); + return; + } + if (expr->op == HLSL_OP1_CAST) { write_sm1_cast(ctx, buffer, instr); return; } - if (instr->data_type->base_type != HLSL_TYPE_FLOAT) + if (instr->data_type->e.numeric.type != HLSL_TYPE_FLOAT) { /* These need to be lowered. */ hlsl_fixme(ctx, &instr->loc, "SM1 non-float expression."); @@ -2329,7 +2361,23 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b } break; + case HLSL_OP2_LOGIC_AND: + write_sm1_binary_op(ctx, buffer, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case HLSL_OP2_LOGIC_OR: + write_sm1_binary_op(ctx, buffer, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case HLSL_OP2_SLT: + if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) + hlsl_fixme(ctx, &instr->loc, "Lower SLT instructions for pixel shaders."); + write_sm1_binary_op(ctx, buffer, D3DSIO_SLT, &instr->reg, &arg1->reg, &arg2->reg); + break; + case HLSL_OP3_CMP: + if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) + hlsl_fixme(ctx, &instr->loc, "Lower CMP instructions for vertex shaders."); write_sm1_ternary_op(ctx, buffer, D3DSIO_CMP, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); break; @@ -2343,6 +2391,49 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b } } +static void write_sm1_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_block *block); + +static void write_sm1_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) +{ + const struct hlsl_ir_if *iff = hlsl_ir_if(instr); + const struct hlsl_ir_node *condition; + struct sm1_instruction sm1_ifc, sm1_else, sm1_endif; + + condition = iff->condition.node; + assert(condition->data_type->dimx == 1 && condition->data_type->dimy == 1); + + sm1_ifc = (struct sm1_instruction) + { + .opcode = D3DSIO_IFC, + .flags = VKD3D_SHADER_REL_OP_NE, /* Make it a "if_ne" instruction. */ + + .srcs[0].type = D3DSPR_TEMP, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(condition->reg.writemask), + .srcs[0].reg = condition->reg.id, + .srcs[0].mod = 0, + + .srcs[1].type = D3DSPR_TEMP, + .srcs[1].swizzle = hlsl_swizzle_from_writemask(condition->reg.writemask), + .srcs[1].reg = condition->reg.id, + .srcs[1].mod = D3DSPSM_NEG, + + .src_count = 2, + }; + write_sm1_instruction(ctx, buffer, &sm1_ifc); + write_sm1_block(ctx, buffer, &iff->then_block); + + if (!list_empty(&iff->else_block.instrs)) + { + sm1_else = (struct sm1_instruction){.opcode = D3DSIO_ELSE}; + write_sm1_instruction(ctx, buffer, &sm1_else); + write_sm1_block(ctx, buffer, &iff->else_block); + } + + sm1_endif = (struct sm1_instruction){.opcode = D3DSIO_ENDIF}; + write_sm1_instruction(ctx, buffer, &sm1_endif); +} + static void write_sm1_jump(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) { const struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); @@ -2368,7 +2459,7 @@ static void write_sm1_jump(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b } default: - hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.\n", hlsl_jump_type_to_string(jump->type)); + hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); } } @@ -2456,7 +2547,7 @@ static void write_sm1_resource_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ break; default: - hlsl_fixme(ctx, &instr->loc, "Resource load type %u\n", load->load_type); + hlsl_fixme(ctx, &instr->loc, "Resource load type %u.", load->load_type); return; } @@ -2488,7 +2579,7 @@ static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * if (store->lhs.var->data_type->class == HLSL_CLASS_MATRIX) { - FIXME("Matrix writemasks need to be lowered.\n"); + hlsl_fixme(ctx, &instr->loc, "Lower matrix writemasks."); return; } @@ -2543,28 +2634,20 @@ static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer write_sm1_instruction(ctx, buffer, &sm1_instr); } -static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_function_decl *entry_func) +static void write_sm1_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_block *block) { const struct hlsl_ir_node *instr; - LIST_FOR_EACH_ENTRY(instr, &entry_func->body.instrs, struct hlsl_ir_node, entry) + LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) { if (instr->data_type) { - if (instr->data_type->class == HLSL_CLASS_MATRIX) + if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) { - /* These need to be lowered. */ - hlsl_fixme(ctx, &instr->loc, "SM1 matrix expression."); - continue; - } - else if (instr->data_type->class == HLSL_CLASS_OBJECT) - { - hlsl_fixme(ctx, &instr->loc, "Object copy."); + hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class); break; } - - assert(instr->data_type->class == HLSL_CLASS_SCALAR || instr->data_type->class == HLSL_CLASS_VECTOR); } switch (instr->type) @@ -2580,6 +2663,13 @@ static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b write_sm1_expr(ctx, buffer, instr); break; + case HLSL_IR_IF: + if (hlsl_version_ge(ctx, 2, 1)) + write_sm1_if(ctx, buffer, instr); + else + hlsl_fixme(ctx, &instr->loc, "Flatten \"if\" conditionals branches."); + break; + case HLSL_IR_JUMP: write_sm1_jump(ctx, buffer, instr); break; @@ -2617,7 +2707,7 @@ int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun write_sm1_constant_defs(ctx, &buffer); write_sm1_semantic_dcls(ctx, &buffer); write_sm1_sampler_dcls(ctx, &buffer); - write_sm1_instructions(ctx, &buffer, entry_func); + write_sm1_block(ctx, &buffer, &entry_func->body); put_u32(&buffer, D3DSIO_END); diff --git a/libs/vkd3d/libs/vkd3d-shader/dxbc.c b/libs/vkd3d/libs/vkd3d-shader/dxbc.c index 37ebc73c099..4b9f67235aa 100644 --- a/libs/vkd3d/libs/vkd3d-shader/dxbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/dxbc.c @@ -150,7 +150,7 @@ static const char *shader_get_string(const char *data, size_t data_size, size_t } static int parse_dxbc(const struct vkd3d_shader_code *dxbc, struct vkd3d_shader_message_context *message_context, - const char *source_name, struct vkd3d_shader_dxbc_desc *desc) + const char *source_name, uint32_t flags, struct vkd3d_shader_dxbc_desc *desc) { const struct vkd3d_shader_location location = {.source_name = source_name}; struct vkd3d_shader_dxbc_section_desc *sections, *section; @@ -186,17 +186,20 @@ static int parse_dxbc(const struct vkd3d_shader_code *dxbc, struct vkd3d_shader_ checksum[1] = read_u32(&ptr); checksum[2] = read_u32(&ptr); checksum[3] = read_u32(&ptr); - vkd3d_compute_dxbc_checksum(data, data_size, calculated_checksum); - if (memcmp(checksum, calculated_checksum, sizeof(checksum))) + if (!(flags & VKD3D_SHADER_PARSE_DXBC_IGNORE_CHECKSUM)) { - WARN("Checksum {0x%08x, 0x%08x, 0x%08x, 0x%08x} does not match " - "calculated checksum {0x%08x, 0x%08x, 0x%08x, 0x%08x}.\n", - checksum[0], checksum[1], checksum[2], checksum[3], - calculated_checksum[0], calculated_checksum[1], - calculated_checksum[2], calculated_checksum[3]); - vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXBC_INVALID_CHECKSUM, - "Invalid DXBC checksum."); - return VKD3D_ERROR_INVALID_ARGUMENT; + vkd3d_compute_dxbc_checksum(data, data_size, calculated_checksum); + if (memcmp(checksum, calculated_checksum, sizeof(checksum))) + { + WARN("Checksum {0x%08x, 0x%08x, 0x%08x, 0x%08x} does not match " + "calculated checksum {0x%08x, 0x%08x, 0x%08x, 0x%08x}.\n", + checksum[0], checksum[1], checksum[2], checksum[3], + calculated_checksum[0], calculated_checksum[1], + calculated_checksum[2], calculated_checksum[3]); + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXBC_INVALID_CHECKSUM, + "Invalid DXBC checksum."); + return VKD3D_ERROR_INVALID_ARGUMENT; + } } version = read_u32(&ptr); @@ -287,7 +290,7 @@ static int for_each_dxbc_section(const struct vkd3d_shader_code *dxbc, unsigned int i; int ret; - if ((ret = parse_dxbc(dxbc, message_context, source_name, &desc)) < 0) + if ((ret = parse_dxbc(dxbc, message_context, source_name, 0, &desc)) < 0) return ret; for (i = 0; i < desc.section_count; ++i) @@ -313,7 +316,7 @@ int vkd3d_shader_parse_dxbc(const struct vkd3d_shader_code *dxbc, *messages = NULL; vkd3d_shader_message_context_init(&message_context, VKD3D_SHADER_LOG_INFO); - ret = parse_dxbc(dxbc, &message_context, NULL, desc); + ret = parse_dxbc(dxbc, &message_context, NULL, flags, desc); vkd3d_shader_message_context_trace_messages(&message_context); if (!vkd3d_shader_message_context_copy_messages(&message_context, messages) && ret >= 0) @@ -357,7 +360,7 @@ static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *s uint32_t count, header_size; struct signature_element *e; const char *ptr = data; - unsigned int i; + unsigned int i, j; if (!require_space(0, 2, sizeof(uint32_t), section->data.size)) { @@ -400,6 +403,7 @@ static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *s for (i = 0; i < count; ++i) { size_t name_offset; + const char *name; uint32_t mask; e[i].sort_index = i; @@ -410,9 +414,14 @@ static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *s e[i].stream_index = 0; name_offset = read_u32(&ptr); - if (!(e[i].semantic_name = shader_get_string(data, section->data.size, name_offset))) + if (!(name = shader_get_string(data, section->data.size, name_offset)) + || !(e[i].semantic_name = vkd3d_strdup(name))) { WARN("Invalid name offset %#zx (data size %#zx).\n", name_offset, section->data.size); + for (j = 0; j < i; ++j) + { + vkd3d_free((void *)e[j].semantic_name); + } vkd3d_free(e); return VKD3D_ERROR_INVALID_ARGUMENT; } @@ -485,7 +494,7 @@ int shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, struct vkd3d_shader_message_context *message_context, void *context) { - struct vkd3d_shader_desc *desc = context; + struct dxbc_shader_desc *desc = context; int ret; switch (section->tag) @@ -550,7 +559,7 @@ static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, return VKD3D_OK; } -void free_shader_desc(struct vkd3d_shader_desc *desc) +void free_dxbc_shader_desc(struct dxbc_shader_desc *desc) { shader_signature_cleanup(&desc->input_signature); shader_signature_cleanup(&desc->output_signature); @@ -558,7 +567,7 @@ void free_shader_desc(struct vkd3d_shader_desc *desc) } int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, - struct vkd3d_shader_message_context *message_context, const char *source_name, struct vkd3d_shader_desc *desc) + struct vkd3d_shader_message_context *message_context, const char *source_name, struct dxbc_shader_desc *desc) { int ret; @@ -569,7 +578,7 @@ int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, if (ret < 0) { WARN("Failed to parse shader, vkd3d result %d.\n", ret); - free_shader_desc(desc); + free_dxbc_shader_desc(desc); } return ret; diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c index 26a8a5c1cc3..73a8d8687c5 100644 --- a/libs/vkd3d/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c @@ -31,12 +31,16 @@ static const uint64_t GLOBALVAR_FLAG_EXPLICIT_TYPE = 2; static const unsigned int GLOBALVAR_ADDRESS_SPACE_SHIFT = 2; static const uint64_t ALLOCA_FLAG_IN_ALLOCA = 0x20; static const uint64_t ALLOCA_FLAG_EXPLICIT_TYPE = 0x40; -static const uint64_t ALLOCA_ALIGNMENT_MASK = ALLOCA_FLAG_IN_ALLOCA - 1; +static const uint64_t ALLOCA_ALIGNMENT_MASK = 0x1f; static const unsigned int SHADER_DESCRIPTOR_TYPE_COUNT = 4; static const size_t MAX_IR_INSTRUCTIONS_PER_DXIL_INSTRUCTION = 11; static const unsigned int dx_max_thread_group_size[3] = {1024, 1024, 64}; +static const unsigned int MAX_GS_INSTANCE_COUNT = 32; /* kMaxGSInstanceCount */ +static const unsigned int MAX_GS_OUTPUT_TOTAL_SCALARS = 1024; /* kMaxGSOutputTotalScalars */ +static const unsigned int MAX_GS_OUTPUT_STREAMS = 4; + #define VKD3D_SHADER_SWIZZLE_64_MASK \ (VKD3D_SHADER_SWIZZLE_MASK << VKD3D_SHADER_SWIZZLE_SHIFT(0) \ | VKD3D_SHADER_SWIZZLE_MASK << VKD3D_SHADER_SWIZZLE_SHIFT(1)) @@ -103,6 +107,7 @@ enum bitcode_constant_code CST_CODE_INTEGER = 4, CST_CODE_FLOAT = 6, CST_CODE_STRING = 8, + CST_CODE_CE_CAST = 11, CST_CODE_CE_GEP = 12, CST_CODE_CE_INBOUNDS_GEP = 20, CST_CODE_DATA = 22, @@ -282,6 +287,18 @@ enum dxil_element_additional_tag ADDITIONAL_TAG_USED_MASK = 3, }; +enum dxil_input_primitive +{ + INPUT_PRIMITIVE_UNDEFINED = 0, + INPUT_PRIMITIVE_POINT = 1, + INPUT_PRIMITIVE_LINE = 2, + INPUT_PRIMITIVE_TRIANGLE = 3, + INPUT_PRIMITIVE_LINEWITHADJACENCY = 6, + INPUT_PRIMITIVE_TRIANGLEWITHADJACENY = 7, + INPUT_PRIMITIVE_PATCH1 = 8, + INPUT_PRIMITIVE_PATCH32 = 39, +}; + enum dxil_shader_properties_tag { SHADER_PROPERTIES_FLAGS = 0, @@ -342,6 +359,8 @@ enum dx_intrinsic_opcode { DX_LOAD_INPUT = 4, DX_STORE_OUTPUT = 5, + DX_FABS = 6, + DX_SATURATE = 7, DX_ISNAN = 8, DX_ISINF = 9, DX_ISFINITE = 10, @@ -374,8 +393,15 @@ enum dx_intrinsic_opcode DX_IMIN = 38, DX_UMAX = 39, DX_UMIN = 40, + DX_FMAD = 46, + DX_FMA = 47, + DX_IMAD = 48, + DX_UMAD = 49, DX_IBFE = 51, DX_UBFE = 52, + DX_DOT2 = 54, + DX_DOT3 = 55, + DX_DOT4 = 56, DX_CREATE_HANDLE = 57, DX_CBUFFER_LOAD_LEGACY = 59, DX_SAMPLE = 60, @@ -388,16 +414,54 @@ enum dx_intrinsic_opcode DX_TEXTURE_STORE = 67, DX_BUFFER_LOAD = 68, DX_BUFFER_STORE = 69, + DX_BUFFER_UPDATE_COUNTER = 70, DX_GET_DIMENSIONS = 72, + DX_TEXTURE_GATHER = 73, + DX_TEXTURE_GATHER_CMP = 74, + DX_TEX2DMS_GET_SAMPLE_POS = 75, + DX_RT_GET_SAMPLE_POS = 76, + DX_RT_GET_SAMPLE_COUNT = 77, DX_ATOMIC_BINOP = 78, DX_ATOMIC_CMP_XCHG = 79, + DX_BARRIER = 80, + DX_CALCULATE_LOD = 81, + DX_DISCARD = 82, DX_DERIV_COARSEX = 83, DX_DERIV_COARSEY = 84, DX_DERIV_FINEX = 85, DX_DERIV_FINEY = 86, + DX_COVERAGE = 91, + DX_THREAD_ID = 93, + DX_GROUP_ID = 94, + DX_THREAD_ID_IN_GROUP = 95, + DX_FLATTENED_THREAD_ID_IN_GROUP = 96, + DX_EMIT_STREAM = 97, + DX_CUT_STREAM = 98, + DX_EMIT_THEN_CUT_STREAM = 99, + DX_MAKE_DOUBLE = 101, DX_SPLIT_DOUBLE = 102, + DX_LOAD_OUTPUT_CONTROL_POINT = 103, + DX_LOAD_PATCH_CONSTANT = 104, + DX_DOMAIN_LOCATION = 105, + DX_STORE_PATCH_CONSTANT = 106, + DX_OUTPUT_CONTROL_POINT_ID = 107, + DX_PRIMITIVE_ID = 108, + DX_WAVE_IS_FIRST_LANE = 110, + DX_WAVE_GET_LANE_INDEX = 111, + DX_WAVE_GET_LANE_COUNT = 112, + DX_WAVE_ANY_TRUE = 113, + DX_WAVE_ALL_TRUE = 114, + DX_WAVE_ACTIVE_ALL_EQUAL = 115, + DX_WAVE_ACTIVE_BALLOT = 116, + DX_WAVE_READ_LANE_AT = 117, + DX_WAVE_READ_LANE_FIRST = 118, + DX_WAVE_ACTIVE_OP = 119, + DX_WAVE_ACTIVE_BIT = 120, + DX_WAVE_PREFIX_OP = 121, DX_LEGACY_F32TOF16 = 130, DX_LEGACY_F16TOF32 = 131, + DX_WAVE_ALL_BIT_COUNT = 135, + DX_WAVE_PREFIX_BIT_COUNT = 136, DX_RAW_BUFFER_LOAD = 139, DX_RAW_BUFFER_STORE = 140, }; @@ -449,6 +513,32 @@ enum dxil_predicate ICMP_SLE = 41, }; +enum dxil_rmw_code +{ + RMW_XCHG = 0, + RMW_ADD = 1, + RMW_SUB = 2, + RMW_AND = 3, + RMW_NAND = 4, + RMW_OR = 5, + RMW_XOR = 6, + RMW_MAX = 7, + RMW_MIN = 8, + RMW_UMAX = 9, + RMW_UMIN = 10, +}; + +enum dxil_atomic_ordering +{ + ORDERING_NOTATOMIC = 0, + ORDERING_UNORDERED = 1, + ORDERING_MONOTONIC = 2, + ORDERING_ACQUIRE = 3, + ORDERING_RELEASE = 4, + ORDERING_ACQREL = 5, + ORDERING_SEQCST = 6, +}; + enum dxil_atomic_binop_code { ATOMIC_BINOP_ADD, @@ -463,6 +553,29 @@ enum dxil_atomic_binop_code ATOMIC_BINOP_INVALID, }; +enum dxil_sync_flags +{ + SYNC_THREAD_GROUP = 0x1, + SYNC_GLOBAL_UAV = 0x2, + SYNC_THREAD_GROUP_UAV = 0x4, + SYNC_GROUP_SHARED_MEMORY = 0x8, +}; + +enum dxil_wave_bit_op_kind +{ + WAVE_BIT_OP_AND = 0, + WAVE_BIT_OP_OR = 1, + WAVE_BIT_OP_XOR = 2, +}; + +enum dxil_wave_op_kind +{ + WAVE_OP_ADD = 0, + WAVE_OP_MUL = 1, + WAVE_OP_MIN = 2, + WAVE_OP_MAX = 3, +}; + struct sm6_pointer_info { const struct sm6_type *type; @@ -541,7 +654,9 @@ struct sm6_value { const struct sm6_type *type; enum sm6_value_type value_type; + unsigned int structure_stride; bool is_undefined; + bool is_back_ref; union { struct sm6_function_data function; @@ -736,9 +851,12 @@ struct sm6_parser size_t global_symbol_count; const char *entry_point; + const char *patch_constant_function; struct vkd3d_shader_dst_param *output_params; struct vkd3d_shader_dst_param *input_params; + struct vkd3d_shader_dst_param *patch_constant_params; + uint32_t io_regs_declared[VKD3D_BITMAP_SIZE(VKD3DSPR_COUNT)]; struct sm6_function *functions; size_t function_count; @@ -753,6 +871,7 @@ struct sm6_parser unsigned int indexable_temp_count; unsigned int icb_count; + unsigned int tgsm_count; struct sm6_value *values; size_t value_count; @@ -790,11 +909,6 @@ static size_t size_add_with_overflow_check(size_t a, size_t b) return (i < a) ? SIZE_MAX : i; } -static struct sm6_parser *sm6_parser(struct vkd3d_shader_parser *parser) -{ - return CONTAINING_RECORD(parser, struct sm6_parser, p); -} - static bool sm6_parser_is_end(struct sm6_parser *sm6) { return sm6->ptr == sm6->end; @@ -1876,6 +1990,25 @@ static const struct sm6_type *sm6_type_get_pointer_to_type(const struct sm6_type return NULL; } +static const struct sm6_type *sm6_type_get_cmpxchg_result_struct(struct sm6_parser *sm6) +{ + const struct sm6_type *type; + unsigned int i; + + for (i = 0; i < sm6->type_count; ++i) + { + type = &sm6->types[i]; + if (sm6_type_is_struct(type) && type->u.struc->elem_count == 2 + && sm6_type_is_i32(type->u.struc->elem_types[0]) + && sm6_type_is_bool(type->u.struc->elem_types[1])) + { + return type; + } + } + + return NULL; +} + /* Call for aggregate types only. */ static const struct sm6_type *sm6_type_get_element_type_at_index(const struct sm6_type *type, uint64_t elem_idx) { @@ -2110,6 +2243,15 @@ static inline bool sm6_value_is_undef(const struct sm6_value *value) return sm6_value_is_register(value) && value->u.reg.type == VKD3DSPR_UNDEF; } +static bool sm6_value_vector_is_constant_or_undef(const struct sm6_value **values, unsigned int count) +{ + unsigned int i; + for (i = 0; i < count; ++i) + if (!sm6_value_is_constant(values[i]) && !sm6_value_is_undef(values[i])) + return false; + return true; +} + static bool sm6_value_is_icb(const struct sm6_value *value) { return value->value_type == VALUE_TYPE_ICB; @@ -2120,6 +2262,11 @@ static bool sm6_value_is_ssa(const struct sm6_value *value) return sm6_value_is_register(value) && register_is_ssa(&value->u.reg); } +static bool sm6_value_is_numeric_array(const struct sm6_value *value) +{ + return sm6_value_is_register(value) && register_is_numeric_array(&value->u.reg); +} + static inline unsigned int sm6_value_get_constant_uint(const struct sm6_value *value) { if (!sm6_value_is_constant(value)) @@ -2153,7 +2300,7 @@ static struct vkd3d_shader_src_param *instruction_src_params_alloc(struct vkd3d_ { struct vkd3d_shader_src_param *params; - if (!(params = vsir_program_get_src_params(&sm6->p.program, count))) + if (!(params = vsir_program_get_src_params(sm6->p.program, count))) { ERR("Failed to allocate src params.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, @@ -2170,7 +2317,7 @@ static struct vkd3d_shader_dst_param *instruction_dst_params_alloc(struct vkd3d_ { struct vkd3d_shader_dst_param *params; - if (!(params = vsir_program_get_dst_params(&sm6->p.program, count))) + if (!(params = vsir_program_get_dst_params(sm6->p.program, count))) { ERR("Failed to allocate dst params.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, @@ -2199,6 +2346,8 @@ static enum vkd3d_data_type vkd3d_data_type_from_sm6_type(const struct sm6_type return VKD3D_DATA_BOOL; case 8: return VKD3D_DATA_UINT8; + case 16: + return VKD3D_DATA_UINT16; case 32: return VKD3D_DATA_UINT; case 64: @@ -2212,6 +2361,8 @@ static enum vkd3d_data_type vkd3d_data_type_from_sm6_type(const struct sm6_type { switch (type->u.width) { + case 16: + return VKD3D_DATA_HALF; case 32: return VKD3D_DATA_FLOAT; case 64: @@ -2252,6 +2403,12 @@ static void register_init_ssa_scalar(struct vkd3d_shader_register *reg, const st register_init_ssa_vector(reg, sm6_type_get_scalar_type(type, 0), 1, value, sm6); } +static void register_make_constant_uint(struct vkd3d_shader_register *reg, unsigned int value) +{ + vsir_register_init(reg, VKD3DSPR_IMMCONST, VKD3D_DATA_UINT, 0); + reg->u.immconst_u32[0] = value; +} + static void dst_param_init(struct vkd3d_shader_dst_param *param) { param->write_mask = VKD3DSP_WRITEMASK_0; @@ -2301,6 +2458,12 @@ static void src_param_init_scalar(struct vkd3d_shader_src_param *param, unsigned param->modifiers = VKD3DSPSM_NONE; } +static void src_param_init_vector(struct vkd3d_shader_src_param *param, unsigned int component_count) +{ + param->swizzle = VKD3D_SHADER_NO_SWIZZLE & ((1ull << VKD3D_SHADER_SWIZZLE_SHIFT(component_count)) - 1); + param->modifiers = VKD3DSPSM_NONE; +} + static void src_param_init_from_value(struct vkd3d_shader_src_param *param, const struct sm6_value *src) { src_param_init(param); @@ -2315,20 +2478,28 @@ static void src_param_init_vector_from_reg(struct vkd3d_shader_src_param *param, param->reg = *reg; } +static void src_param_make_constant_uint(struct vkd3d_shader_src_param *param, unsigned int value) +{ + src_param_init(param); + register_make_constant_uint(¶m->reg, value); +} + static void register_index_address_init(struct vkd3d_shader_register_index *idx, const struct sm6_value *address, struct sm6_parser *sm6) { if (sm6_value_is_constant(address)) { idx->offset = sm6_value_get_constant_uint(address); + idx->rel_addr = NULL; } else if (sm6_value_is_undef(address)) { idx->offset = 0; + idx->rel_addr = NULL; } else { - struct vkd3d_shader_src_param *rel_addr = vsir_program_get_src_params(&sm6->p.program, 1); + struct vkd3d_shader_src_param *rel_addr = vsir_program_get_src_params(sm6->p.program, 1); if (rel_addr) src_param_init_from_value(rel_addr, address); idx->offset = 0; @@ -2336,14 +2507,18 @@ static void register_index_address_init(struct vkd3d_shader_register_index *idx, } } -static void instruction_dst_param_init_ssa_scalar(struct vkd3d_shader_instruction *ins, struct sm6_parser *sm6) +static bool instruction_dst_param_init_ssa_scalar(struct vkd3d_shader_instruction *ins, struct sm6_parser *sm6) { - struct vkd3d_shader_dst_param *param = instruction_dst_params_alloc(ins, 1, sm6); struct sm6_value *dst = sm6_parser_get_current_value(sm6); + struct vkd3d_shader_dst_param *param; + + if (!(param = instruction_dst_params_alloc(ins, 1, sm6))) + return false; dst_param_init_ssa_scalar(param, dst->type, dst, sm6); param->write_mask = VKD3DSP_WRITEMASK_0; dst->u.reg = param->reg; + return true; } static void instruction_dst_param_init_ssa_vector(struct vkd3d_shader_instruction *ins, @@ -2399,7 +2574,7 @@ static size_t sm6_parser_compute_max_value_count(struct sm6_parser *sm6, * overestimate the value count somewhat, but this should be no problem. */ value_count = size_add_with_overflow_check(value_count, max(block->record_count, 1u) - 1); sm6->value_capacity = max(sm6->value_capacity, value_count); - sm6->functions[sm6->function_count].value_count = value_count; + sm6->functions[sm6->function_count++].value_count = value_count; /* The value count returns to its previous value after handling a function. */ if (value_count < SIZE_MAX) value_count = old_value_count; @@ -2482,6 +2657,26 @@ static bool sm6_value_validate_is_texture_handle(const struct sm6_value *value, return true; } +static bool sm6_value_validate_is_texture_2dms_handle(const struct sm6_value *value, enum dx_intrinsic_opcode op, + struct sm6_parser *sm6) +{ + enum dxil_resource_kind kind; + + if (!sm6_value_validate_is_handle(value, sm6)) + return false; + + kind = value->u.handle.d->kind; + if (!resource_kind_is_multisampled(kind)) + { + WARN("Resource kind %u for op %u is not a 2DMS texture.\n", kind, op); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_RESOURCE_HANDLE, + "Resource kind %u for texture operation %u is not a 2DMS texture.", kind, op); + return false; + } + + return true; +} + static bool sm6_value_validate_is_sampler_handle(const struct sm6_value *value, enum dx_intrinsic_opcode op, struct sm6_parser *sm6) { @@ -2514,6 +2709,18 @@ static bool sm6_value_validate_is_pointer(const struct sm6_value *value, struct return true; } +static bool sm6_value_validate_is_backward_ref(const struct sm6_value *value, struct sm6_parser *sm6) +{ + if (!value->is_back_ref) + { + FIXME("Forward-referenced pointers are not supported.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Forward-referenced pointer declarations are not supported."); + return false; + } + return true; +} + static bool sm6_value_validate_is_numeric(const struct sm6_value *value, struct sm6_parser *sm6) { if (!sm6_type_is_numeric(value->type)) @@ -2539,6 +2746,30 @@ static bool sm6_value_validate_is_bool(const struct sm6_value *value, struct sm6 return true; } +static bool sm6_value_validate_is_pointer_to_i32(const struct sm6_value *value, struct sm6_parser *sm6) +{ + if (!sm6_type_is_pointer(value->type) || !sm6_type_is_i32(value->type->u.pointer.type)) + { + WARN("Operand result type %u is not a pointer to i32.\n", value->type->class); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "An int32 pointer operand passed to a DXIL instruction is not an int32 pointer."); + return false; + } + return true; +} + +static bool sm6_value_validate_is_i32(const struct sm6_value *value, struct sm6_parser *sm6) +{ + if (!sm6_type_is_i32(value->type)) + { + WARN("Operand result type %u is not i32.\n", value->type->class); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "An int32 operand passed to a DXIL instruction is not an int32."); + return false; + } + return true; +} + static const struct sm6_value *sm6_parser_get_value_safe(struct sm6_parser *sm6, unsigned int idx) { if (idx < sm6->value_count) @@ -2686,7 +2917,7 @@ static inline uint64_t decode_rotated_signed_value(uint64_t value) return value << 63; } -static inline float bitcast_uint64_to_float(uint64_t value) +static float bitcast_uint_to_float(unsigned int value) { union { @@ -2710,6 +2941,23 @@ static inline double bitcast_uint64_to_double(uint64_t value) return u.double_value; } +static float register_get_float_value(const struct vkd3d_shader_register *reg) +{ + if (!register_is_constant(reg) || !data_type_is_floating_point(reg->data_type)) + return 0.0; + + if (reg->dimension == VSIR_DIMENSION_VEC4) + WARN("Returning vec4.x.\n"); + + if (reg->type == VKD3DSPR_IMMCONST64) + { + WARN("Truncating double to float.\n"); + return bitcast_uint64_to_double(reg->u.immconst_u64[0]); + } + + return bitcast_uint_to_float(reg->u.immconst_u32[0]); +} + static enum vkd3d_result value_allocate_constant_array(struct sm6_value *dst, const struct sm6_type *type, const uint64_t *operands, struct sm6_parser *sm6) { @@ -2745,7 +2993,7 @@ static enum vkd3d_result value_allocate_constant_array(struct sm6_value *dst, co "Out of memory allocating an immediate constant buffer of count %u.", count); return VKD3D_ERROR_OUT_OF_MEMORY; } - if (!shader_instruction_array_add_icb(&sm6->p.program.instructions, icb)) + if (!shader_instruction_array_add_icb(&sm6->p.program->instructions, icb)) { ERR("Failed to store icb object.\n"); vkd3d_free(icb); @@ -2782,18 +3030,135 @@ static enum vkd3d_result value_allocate_constant_array(struct sm6_value *dst, co return VKD3D_OK; } +static enum vkd3d_result sm6_parser_init_constexpr_gep(struct sm6_parser *sm6, const struct dxil_record *record, + struct sm6_value *dst) +{ + const struct sm6_type *elem_type, *pointee_type, *gep_type, *ptr_type; + struct sm6_value *operands[3]; + unsigned int i, j, offset; + uint64_t value; + + i = 0; + pointee_type = (record->operand_count & 1) ? sm6_parser_get_type(sm6, record->operands[i++]) : NULL; + + if (!dxil_record_validate_operand_count(record, i + 6, i + 6, sm6)) + return VKD3D_ERROR_INVALID_SHADER; + + for (j = 0; i < record->operand_count; i += 2, ++j) + { + if (!(elem_type = sm6_parser_get_type(sm6, record->operands[i]))) + return VKD3D_ERROR_INVALID_SHADER; + + if ((value = record->operands[i + 1]) >= sm6->cur_max_value) + { + WARN("Invalid value index %"PRIu64".\n", value); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Invalid value index %"PRIu64".", value); + return VKD3D_ERROR_INVALID_SHADER; + } + else if (value == sm6->value_count) + { + WARN("Invalid value self-reference at %"PRIu64".\n", value); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Invalid value self-reference for a constexpr GEP."); + return VKD3D_ERROR_INVALID_SHADER; + } + + operands[j] = &sm6->values[value]; + if (value > sm6->value_count) + { + operands[j]->type = elem_type; + } + else if (operands[j]->type != elem_type) + { + WARN("Type mismatch.\n"); + vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_TYPE_MISMATCH, + "Type mismatch in constexpr GEP elements."); + } + } + + if (operands[0]->u.reg.idx_count > 1) + { + WARN("Unsupported stacked GEP.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "A GEP instruction on the result of a previous GEP is unsupported."); + return VKD3D_ERROR_INVALID_SHADER; + } + + if (!sm6_value_is_constant_zero(operands[1])) + { + WARN("Expected constant zero.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "The pointer dereference index for a constexpr GEP instruction is not constant zero."); + return VKD3D_ERROR_INVALID_SHADER; + } + if (!sm6_value_is_constant(operands[2]) || !sm6_type_is_integer(operands[2]->type)) + { + WARN("Element index is not constant int.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "A constexpr GEP element index is not a constant integer."); + return VKD3D_ERROR_INVALID_SHADER; + } + + dst->structure_stride = operands[0]->structure_stride; + + ptr_type = operands[0]->type; + if (!sm6_type_is_pointer(ptr_type)) + { + WARN("Constexpr GEP base value is not a pointer.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "A constexpr GEP base value is not a pointer."); + return VKD3D_ERROR_INVALID_SHADER; + } + + if (!pointee_type) + { + pointee_type = ptr_type->u.pointer.type; + } + else if (pointee_type != ptr_type->u.pointer.type) + { + WARN("Explicit pointee type mismatch.\n"); + vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_TYPE_MISMATCH, + "Explicit pointee type for constexpr GEP does not match the element type."); + } + + offset = sm6_value_get_constant_uint(operands[2]); + if (!(gep_type = sm6_type_get_element_type_at_index(pointee_type, offset))) + { + WARN("Failed to get element type.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Failed to get the element type of a constexpr GEP."); + return VKD3D_ERROR_INVALID_SHADER; + } + + if (!(dst->type = sm6_type_get_pointer_to_type(gep_type, ptr_type->u.pointer.addr_space, sm6))) + { + WARN("Failed to get pointer type for type %u.\n", gep_type->class); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, + "Module does not define a pointer type for a constexpr GEP result."); + return VKD3D_ERROR_INVALID_SHADER; + } + dst->u.reg = operands[0]->u.reg; + dst->u.reg.idx[1].offset = offset; + dst->u.reg.idx[1].is_in_bounds = record->code == CST_CODE_CE_INBOUNDS_GEP; + dst->u.reg.idx_count = 2; + + return VKD3D_OK; +} + static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const struct dxil_block *block) { enum vkd3d_shader_register_type reg_type = VKD3DSPR_INVALID; - const struct sm6_type *type, *elem_type; + const struct sm6_type *type, *elem_type, *ptr_type; + size_t i, base_value_idx, value_idx; enum vkd3d_data_type reg_data_type; const struct dxil_record *record; + const struct sm6_value *src; enum vkd3d_result ret; struct sm6_value *dst; - size_t i, value_idx; uint64_t value; - for (i = 0, type = NULL; i < block->record_count; ++i) + for (i = 0, type = NULL, base_value_idx = sm6->value_count; i < block->record_count; ++i) { sm6->p.location.column = i; record = block->records[i]; @@ -2834,6 +3199,7 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const dst = sm6_parser_get_current_value(sm6); dst->type = type; dst->value_type = VALUE_TYPE_REG; + dst->is_back_ref = true; vsir_register_init(&dst->u.reg, reg_type, reg_data_type, 0); switch (record->code) @@ -2876,9 +3242,9 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const } if (type->u.width == 16) - FIXME("Half float type is not supported yet.\n"); + dst->u.reg.u.immconst_u32[0] = record->operands[0]; else if (type->u.width == 32) - dst->u.reg.u.immconst_f32[0] = bitcast_uint64_to_float(record->operands[0]); + dst->u.reg.u.immconst_f32[0] = bitcast_uint_to_float(record->operands[0]); else if (type->u.width == 64) dst->u.reg.u.immconst_f64[0] = bitcast_uint64_to_double(record->operands[0]); else @@ -2902,6 +3268,54 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const break; + case CST_CODE_CE_GEP: + case CST_CODE_CE_INBOUNDS_GEP: + if ((ret = sm6_parser_init_constexpr_gep(sm6, record, dst)) < 0) + return ret; + break; + + case CST_CODE_CE_CAST: + if (!dxil_record_validate_operand_count(record, 3, 3, sm6)) + return VKD3D_ERROR_INVALID_SHADER; + + if ((value = record->operands[0]) != CAST_BITCAST) + { + WARN("Unhandled constexpr cast op %"PRIu64".\n", value); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Constexpr cast op %"PRIu64" is unhandled.", value); + return VKD3D_ERROR_INVALID_SHADER; + } + + ptr_type = sm6_parser_get_type(sm6, record->operands[1]); + if (!sm6_type_is_pointer(ptr_type)) + { + WARN("Constexpr cast at constant idx %zu is not a pointer.\n", value_idx); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Constexpr cast source operand is not a pointer."); + return VKD3D_ERROR_INVALID_SHADER; + } + + if ((value = record->operands[2]) >= sm6->cur_max_value) + { + WARN("Invalid value index %"PRIu64".\n", value); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Invalid value index %"PRIu64".", value); + return VKD3D_ERROR_INVALID_SHADER; + } + else if (value == value_idx) + { + WARN("Invalid value self-reference at %"PRIu64".\n", value); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Invalid value self-reference for a constexpr cast."); + return VKD3D_ERROR_INVALID_SHADER; + } + + /* Resolve later in case forward refs exist. */ + dst->type = type; + dst->u.reg.type = VKD3DSPR_COUNT; + dst->u.reg.idx[0].offset = value; + break; + case CST_CODE_UNDEF: dxil_record_validate_operand_max_count(record, 0, sm6); dst->u.reg.type = VKD3DSPR_UNDEF; @@ -2911,6 +3325,8 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const default: FIXME("Unhandled constant code %u.\n", record->code); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Constant code %u is unhandled.", record->code); dst->u.reg.type = VKD3DSPR_UNDEF; break; } @@ -2925,6 +3341,29 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const ++sm6->value_count; } + /* Resolve cast forward refs. */ + for (i = base_value_idx; i < sm6->value_count; ++i) + { + dst = &sm6->values[i]; + if (dst->u.reg.type != VKD3DSPR_COUNT) + continue; + + type = dst->type; + + src = &sm6->values[dst->u.reg.idx[0].offset]; + if (!sm6_value_is_numeric_array(src)) + { + WARN("Value is not an array.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Constexpr cast source value is not a global array element."); + return VKD3D_ERROR_INVALID_SHADER; + } + + *dst = *src; + dst->type = type; + dst->u.reg.data_type = vkd3d_data_type_from_sm6_type(type->u.pointer.type); + } + return VKD3D_OK; } @@ -2941,12 +3380,14 @@ static bool bitcode_parse_alignment(uint64_t encoded_alignment, unsigned int *al static struct vkd3d_shader_instruction *sm6_parser_require_space(struct sm6_parser *sm6, size_t extra) { - if (!shader_instruction_array_reserve(&sm6->p.program.instructions, sm6->p.program.instructions.count + extra)) + struct vkd3d_shader_instruction_array *instructions = &sm6->p.program->instructions; + + if (!shader_instruction_array_reserve(instructions, instructions->count + extra)) { ERR("Failed to allocate instruction.\n"); return NULL; } - return &sm6->p.program.instructions.elements[sm6->p.program.instructions.count]; + return &instructions->elements[instructions->count]; } /* Space should be reserved before calling this. It is intended to require no checking of the returned pointer. */ @@ -2956,7 +3397,7 @@ static struct vkd3d_shader_instruction *sm6_parser_add_instruction(struct sm6_pa struct vkd3d_shader_instruction *ins = sm6_parser_require_space(sm6, 1); assert(ins); vsir_instruction_init(ins, &sm6->p.location, handler_idx); - ++sm6->p.program.instructions.count; + ++sm6->p.program->instructions.count; return ins; } @@ -2994,6 +3435,58 @@ static void sm6_parser_declare_indexable_temp(struct sm6_parser *sm6, const stru register_init_with_id(&dst->u.reg, VKD3DSPR_IDXTEMP, data_type, ins->declaration.indexable_temp.register_idx); } +static void sm6_parser_declare_tgsm_raw(struct sm6_parser *sm6, const struct sm6_type *elem_type, + unsigned int alignment, unsigned int init, struct sm6_value *dst) +{ + enum vkd3d_data_type data_type = vkd3d_data_type_from_sm6_type(elem_type); + struct vkd3d_shader_instruction *ins; + unsigned int byte_count; + + ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TGSM_RAW); + dst_param_init(&ins->declaration.tgsm_raw.reg); + register_init_with_id(&ins->declaration.tgsm_raw.reg.reg, VKD3DSPR_GROUPSHAREDMEM, data_type, sm6->tgsm_count++); + dst->u.reg = ins->declaration.tgsm_raw.reg.reg; + dst->structure_stride = 0; + ins->declaration.tgsm_raw.alignment = alignment; + byte_count = elem_type->u.width / 8u; + if (byte_count != 4) + { + FIXME("Unsupported byte count %u.\n", byte_count); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Raw TGSM byte count %u is not supported.", byte_count); + } + ins->declaration.tgsm_raw.byte_count = byte_count; + /* The initialiser value index will be resolved later when forward references can be handled. */ + ins->flags = init; +} + +static void sm6_parser_declare_tgsm_structured(struct sm6_parser *sm6, const struct sm6_type *elem_type, + unsigned int count, unsigned int alignment, unsigned int init, struct sm6_value *dst) +{ + enum vkd3d_data_type data_type = vkd3d_data_type_from_sm6_type(elem_type); + struct vkd3d_shader_instruction *ins; + unsigned int structure_stride; + + ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TGSM_STRUCTURED); + dst_param_init(&ins->declaration.tgsm_structured.reg); + register_init_with_id(&ins->declaration.tgsm_structured.reg.reg, VKD3DSPR_GROUPSHAREDMEM, + data_type, sm6->tgsm_count++); + dst->u.reg = ins->declaration.tgsm_structured.reg.reg; + structure_stride = elem_type->u.width / 8u; + if (structure_stride != 4) + { + FIXME("Unsupported structure stride %u.\n", structure_stride); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Structured TGSM byte stride %u is not supported.", structure_stride); + } + dst->structure_stride = structure_stride; + ins->declaration.tgsm_structured.alignment = alignment; + ins->declaration.tgsm_structured.byte_stride = structure_stride; + ins->declaration.tgsm_structured.structure_count = count; + /* The initialiser value index will be resolved later when forward references can be handled. */ + ins->flags = init; +} + static bool sm6_parser_declare_global(struct sm6_parser *sm6, const struct dxil_record *record) { const struct sm6_type *type, *scalar_type; @@ -3101,6 +3594,7 @@ static bool sm6_parser_declare_global(struct sm6_parser *sm6, const struct dxil_ dst = sm6_parser_get_current_value(sm6); dst->type = type; dst->value_type = VALUE_TYPE_REG; + dst->is_back_ref = true; if (is_constant && !init) { @@ -3119,10 +3613,17 @@ static bool sm6_parser_declare_global(struct sm6_parser *sm6, const struct dxil_ } else if (address_space == ADDRESS_SPACE_GROUPSHARED) { - FIXME("Unsupported TGSM.\n"); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, - "TGSM global variables are not supported."); - return false; + if (!sm6_type_is_numeric(scalar_type)) + { + WARN("Unsupported type class %u.\n", scalar_type->class); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "TGSM variables of type class %u are not supported.", scalar_type->class); + return false; + } + if (count == 1) + sm6_parser_declare_tgsm_raw(sm6, scalar_type, alignment, init, dst); + else + sm6_parser_declare_tgsm_structured(sm6, scalar_type, count, alignment, init, dst); } else { @@ -3158,6 +3659,38 @@ static const struct vkd3d_shader_immediate_constant_buffer *resolve_forward_init return NULL; } +static bool resolve_forward_zero_initialiser(size_t index, struct sm6_parser *sm6) +{ + const struct sm6_value *value; + + if (!index) + return false; + + --index; + if (!(value = sm6_parser_get_value_safe(sm6, index)) + || (!sm6_value_is_icb(value) && !sm6_value_is_constant(value) && !sm6_value_is_undef(value))) + { + WARN("Invalid initialiser index %zu.\n", index); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "TGSM initialiser value index %zu is invalid.", index); + return false; + } + else if ((sm6_value_is_icb(value) && value->u.icb->is_null) || sm6_value_is_constant_zero(value)) + { + return true; + } + else if (sm6_value_is_undef(value)) + { + /* In VSIR, initialisation with undefined values of objects is implied, not explicit. */ + return false; + } + + FIXME("Non-zero initialisers are not supported.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Non-zero TGSM initialisers are not supported."); + return false; +} + static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) { size_t i, count, base_value_idx = sm6->value_count; @@ -3219,9 +3752,9 @@ static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) } /* Resolve initialiser forward references. */ - for (i = 0; i < sm6->p.program.instructions.count; ++i) + for (i = 0; i < sm6->p.program->instructions.count; ++i) { - ins = &sm6->p.program.instructions.elements[i]; + ins = &sm6->p.program->instructions.elements[i]; if (ins->handler_idx == VKD3DSIH_DCL_INDEXABLE_TEMP && ins->declaration.indexable_temp.initialiser) { ins->declaration.indexable_temp.initialiser = resolve_forward_initialiser( @@ -3231,6 +3764,16 @@ static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) { ins->declaration.icb = resolve_forward_initialiser((uintptr_t)ins->declaration.icb, sm6); } + else if (ins->handler_idx == VKD3DSIH_DCL_TGSM_RAW) + { + ins->declaration.tgsm_raw.zero_init = resolve_forward_zero_initialiser(ins->flags, sm6); + ins->flags = 0; + } + else if (ins->handler_idx == VKD3DSIH_DCL_TGSM_STRUCTURED) + { + ins->declaration.tgsm_structured.zero_init = resolve_forward_zero_initialiser(ins->flags, sm6); + ins->flags = 0; + } } for (i = base_value_idx; i < sm6->value_count; ++i) { @@ -3270,22 +3813,80 @@ static void src_params_init_from_operands(struct vkd3d_shader_src_param *src_par src_param_init_from_value(&src_params[i], operands[i]); } -static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shader_signature *s, - enum vkd3d_shader_register_type reg_type, struct vkd3d_shader_dst_param *params) +static enum vkd3d_shader_register_type register_type_from_dxil_semantic_kind( + enum vkd3d_shader_sysval_semantic sysval_semantic) { + switch (sysval_semantic) + { + case VKD3D_SHADER_SV_COVERAGE: + return VKD3DSPR_COVERAGE; + case VKD3D_SHADER_SV_DEPTH: + return VKD3DSPR_DEPTHOUT; + case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: + return VKD3DSPR_DEPTHOUTGE; + case VKD3D_SHADER_SV_DEPTH_LESS_EQUAL: + return VKD3DSPR_DEPTHOUTLE; + default: + return VKD3DSPR_INVALID; + } +} + +static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shader_signature *s, + bool is_input, enum vkd3d_shader_register_type reg_type, struct vkd3d_shader_dst_param *params) +{ + enum vkd3d_shader_type shader_type = sm6->p.program->shader_version.type; + bool is_patch_constant, is_control_point; struct vkd3d_shader_dst_param *param; const struct signature_element *e; unsigned int i, count; + is_patch_constant = reg_type == VKD3DSPR_PATCHCONST; + + is_control_point = false; + if (!is_patch_constant) + { + switch (shader_type) + { + case VKD3D_SHADER_TYPE_DOMAIN: + case VKD3D_SHADER_TYPE_GEOMETRY: + is_control_point = is_input; + break; + + case VKD3D_SHADER_TYPE_HULL: + is_control_point = true; + break; + + default: + break; + } + } + for (i = 0; i < s->element_count; ++i) { e = &s->elements[i]; param = ¶ms[i]; + + if (e->register_index == UINT_MAX) + { + dst_param_io_init(param, e, register_type_from_dxil_semantic_kind(e->sysval_semantic)); + continue; + } + dst_param_io_init(param, e, reg_type); count = 0; - if (e->register_count > 1) + + if (is_control_point) + { + if (reg_type == VKD3DSPR_OUTPUT) + param->reg.idx[count].rel_addr = instruction_array_create_outpointid_param(&sm6->p.program->instructions); param->reg.idx[count++].offset = 0; + } + + if (e->register_count > 1 || (is_patch_constant && vsir_sysval_semantic_is_tess_factor(e->sysval_semantic))) + param->reg.idx[count++].offset = 0; + + assert(count < ARRAY_SIZE(param->reg.idx)); param->reg.idx[count++].offset = i; param->reg.idx_count = count; } @@ -3293,12 +3894,21 @@ static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shade static void sm6_parser_init_output_signature(struct sm6_parser *sm6, const struct shader_signature *output_signature) { - sm6_parser_init_signature(sm6, output_signature, VKD3DSPR_OUTPUT, sm6->output_params); + sm6_parser_init_signature(sm6, output_signature, false, VKD3DSPR_OUTPUT, sm6->output_params); } static void sm6_parser_init_input_signature(struct sm6_parser *sm6, const struct shader_signature *input_signature) { - sm6_parser_init_signature(sm6, input_signature, VKD3DSPR_INPUT, sm6->input_params); + sm6_parser_init_signature(sm6, input_signature, true, VKD3DSPR_INPUT, sm6->input_params); +} + +static void sm6_parser_init_patch_constant_signature(struct sm6_parser *sm6, + const struct shader_signature *patch_constant_signature) +{ + bool is_input = sm6->p.program->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN; + + sm6_parser_init_signature(sm6, patch_constant_signature, is_input, VKD3DSPR_PATCHCONST, + sm6->patch_constant_params); } static const struct sm6_value *sm6_parser_next_function_definition(struct sm6_parser *sm6) @@ -3350,6 +3960,9 @@ struct function_emission_state unsigned int temp_idx; }; +static bool sm6_parser_emit_reg_composite_construct(struct sm6_parser *sm6, const struct vkd3d_shader_register **operand_regs, + unsigned int component_count, struct function_emission_state *state, struct vkd3d_shader_register *reg); + static void sm6_parser_emit_alloca(struct sm6_parser *sm6, const struct dxil_record *record, struct vkd3d_shader_instruction *ins, struct sm6_value *dst) { @@ -3425,6 +4038,130 @@ static void sm6_parser_emit_alloca(struct sm6_parser *sm6, const struct dxil_rec sm6_parser_declare_indexable_temp(sm6, elem_type, type[0]->u.array.count, alignment, true, 0, ins, dst); } +static enum vkd3d_shader_opcode map_dx_atomicrmw_op(uint64_t code) +{ + switch (code) + { + case RMW_ADD: + return VKD3DSIH_IMM_ATOMIC_IADD; + case RMW_AND: + return VKD3DSIH_IMM_ATOMIC_AND; + case RMW_MAX: + return VKD3DSIH_IMM_ATOMIC_IMAX; + case RMW_MIN: + return VKD3DSIH_IMM_ATOMIC_IMIN; + case RMW_OR: + return VKD3DSIH_IMM_ATOMIC_OR; + case RMW_UMAX: + return VKD3DSIH_IMM_ATOMIC_UMAX; + case RMW_UMIN: + return VKD3DSIH_IMM_ATOMIC_UMIN; + case RMW_XCHG: + return VKD3DSIH_IMM_ATOMIC_EXCH; + case RMW_XOR: + return VKD3DSIH_IMM_ATOMIC_XOR; + default: + /* DXIL currently doesn't use SUB and NAND. */ + return VKD3DSIH_INVALID; + } +} + +static void sm6_parser_emit_atomicrmw(struct sm6_parser *sm6, const struct dxil_record *record, + struct function_emission_state *state, struct sm6_value *dst) +{ + struct vkd3d_shader_register coord, const_offset, const_zero; + const struct vkd3d_shader_register *regs[2]; + struct vkd3d_shader_dst_param *dst_params; + struct vkd3d_shader_src_param *src_params; + struct vkd3d_shader_instruction *ins; + const struct sm6_value *ptr, *src; + enum vkd3d_shader_opcode op; + unsigned int i = 0; + bool is_volatile; + uint64_t code; + + if (!(ptr = sm6_parser_get_value_by_ref(sm6, record, NULL, &i)) + || !sm6_value_validate_is_pointer_to_i32(ptr, sm6) + || !sm6_value_validate_is_backward_ref(ptr, sm6)) + return; + + if (ptr->u.reg.type != VKD3DSPR_GROUPSHAREDMEM) + { + WARN("Register is not groupshared.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "The destination register for an atomicrmw instruction is not groupshared memory."); + return; + } + + dst->type = ptr->type->u.pointer.type; + + if (!(src = sm6_parser_get_value_by_ref(sm6, record, dst->type, &i))) + return; + + if (!dxil_record_validate_operand_count(record, i + 4, i + 4, sm6)) + return; + + if ((op = map_dx_atomicrmw_op(code = record->operands[i++])) == VKD3DSIH_INVALID) + { + FIXME("Unhandled atomicrmw op %"PRIu64".\n", code); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Operation %"PRIu64" for an atomicrmw instruction is unhandled.", code); + return; + } + + is_volatile = record->operands[i++]; + + /* It's currently not possible to specify an atomic ordering in HLSL, and it defaults to seq_cst. */ + if ((code = record->operands[i++]) != ORDERING_SEQCST) + FIXME("Unhandled atomic ordering %"PRIu64".\n", code); + + if ((code = record->operands[i]) != 1) + WARN("Ignoring synchronisation scope %"PRIu64".\n", code); + + if (ptr->structure_stride) + { + if (ptr->u.reg.idx[1].rel_addr) + { + regs[0] = &ptr->u.reg.idx[1].rel_addr->reg; + } + else + { + register_make_constant_uint(&const_offset, ptr->u.reg.idx[1].offset); + regs[0] = &const_offset; + } + register_make_constant_uint(&const_zero, 0); + regs[1] = &const_zero; + if (!sm6_parser_emit_reg_composite_construct(sm6, regs, 2, state, &coord)) + return; + } + + ins = state->ins; + vsir_instruction_init(ins, &sm6->p.location, op); + ins->flags = is_volatile ? VKD3DARF_SEQ_CST | VKD3DARF_VOLATILE : VKD3DARF_SEQ_CST; + + if (!(src_params = instruction_src_params_alloc(ins, 2, sm6))) + return; + if (ptr->structure_stride) + src_param_init_vector_from_reg(&src_params[0], &coord); + else + src_param_make_constant_uint(&src_params[0], 0); + src_param_init_from_value(&src_params[1], src); + + dst_params = instruction_dst_params_alloc(ins, 2, sm6); + register_init_ssa_scalar(&dst_params[0].reg, dst->type, dst, sm6); + dst_param_init(&dst_params[0]); + + dst_params[1].reg = ptr->u.reg; + /* The groupshared register has data type UAV when accessed. */ + dst_params[1].reg.data_type = VKD3D_DATA_UAV; + dst_params[1].reg.idx[1].rel_addr = NULL; + dst_params[1].reg.idx[1].offset = ~0u; + dst_params[1].reg.idx_count = 1; + dst_param_init(&dst_params[1]); + + dst->u.reg = dst_params[0].reg; +} + static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_type *type_a, const struct sm6_type *type_b, struct sm6_parser *sm6) { @@ -3756,6 +4493,25 @@ static bool sm6_parser_emit_coordinate_construct(struct sm6_parser *sm6, const s return sm6_parser_emit_reg_composite_construct(sm6, operand_regs, component_count, state, reg); } +static enum vkd3d_shader_opcode sm6_dx_map_void_op(enum dx_intrinsic_opcode op) +{ + switch (op) + { + case DX_WAVE_IS_FIRST_LANE: + return VKD3DSIH_WAVE_IS_FIRST_LANE; + default: + vkd3d_unreachable(); + } +} + +static void sm6_parser_emit_dx_void(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) +{ + struct vkd3d_shader_instruction *ins = state->ins; + vsir_instruction_init(ins, &sm6->p.location, sm6_dx_map_void_op(op)); + instruction_dst_param_init_ssa_scalar(ins, sm6); +} + static enum vkd3d_shader_opcode map_dx_unary_op(enum dx_intrinsic_opcode op) { switch (op) @@ -3820,6 +4576,18 @@ static enum vkd3d_shader_opcode map_dx_unary_op(enum dx_intrinsic_opcode op) return VKD3DSIH_F32TOF16; case DX_LEGACY_F16TOF32: return VKD3DSIH_F16TOF32; + case DX_WAVE_ACTIVE_ALL_EQUAL: + return VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL; + case DX_WAVE_ALL_BIT_COUNT: + return VKD3DSIH_WAVE_ALL_BIT_COUNT; + case DX_WAVE_ALL_TRUE: + return VKD3DSIH_WAVE_ALL_TRUE; + case DX_WAVE_ANY_TRUE: + return VKD3DSIH_WAVE_ANY_TRUE; + case DX_WAVE_PREFIX_BIT_COUNT: + return VKD3DSIH_WAVE_PREFIX_BIT_COUNT; + case DX_WAVE_READ_LANE_FIRST: + return VKD3DSIH_WAVE_READ_LANE_FIRST; default: vkd3d_unreachable(); } @@ -3855,6 +4623,8 @@ static enum vkd3d_shader_opcode map_dx_binary_op(enum dx_intrinsic_opcode op, co return VKD3DSIH_UMAX; case DX_UMIN: return VKD3DSIH_UMIN; + case DX_WAVE_READ_LANE_AT: + return VKD3DSIH_WAVE_READ_LANE_AT; default: vkd3d_unreachable(); } @@ -3974,6 +4744,98 @@ static void sm6_parser_emit_dx_atomic_binop(struct sm6_parser *sm6, enum dx_intr dst->u.reg = dst_params[0].reg; } +static void sm6_parser_emit_dx_barrier(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) +{ + struct vkd3d_shader_instruction *ins = state->ins; + enum dxil_sync_flags flags; + + vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_SYNC); + flags = sm6_value_get_constant_uint(operands[0]); + ins->flags = flags & (SYNC_THREAD_GROUP | SYNC_THREAD_GROUP_UAV); + if (flags & SYNC_GLOBAL_UAV) + ins->flags |= VKD3DSSF_GLOBAL_UAV; + if (flags & SYNC_GROUP_SHARED_MEMORY) + ins->flags |= VKD3DSSF_GROUP_SHARED_MEMORY; + if (flags &= ~(SYNC_THREAD_GROUP | SYNC_GLOBAL_UAV | SYNC_THREAD_GROUP_UAV | SYNC_GROUP_SHARED_MEMORY)) + { + FIXME("Unhandled flags %#x.\n", flags); + vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, + "Barrier flags %#x are unhandled.", flags); + } +} + +static void sm6_parser_emit_dx_buffer_update_counter(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) +{ + struct vkd3d_shader_instruction *ins = state->ins; + struct vkd3d_shader_src_param *src_params; + const struct sm6_value *resource; + unsigned int i; + int8_t inc; + + resource = operands[0]; + if (!sm6_value_validate_is_handle(resource, sm6)) + return; + + if (!sm6_value_is_constant(operands[1])) + { + FIXME("Unsupported dynamic update operand.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "A dynamic update value for a UAV counter operation is not supported."); + return; + } + i = sm6_value_get_constant_uint(operands[1]); + if (i != 1 && i != 255) + { + WARN("Unexpected update value %#x.\n", i); + vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, + "Update value %#x for a UAV counter operation is not supported.", i); + } + inc = i; + + vsir_instruction_init(ins, &sm6->p.location, (inc < 0) ? VKD3DSIH_IMM_ATOMIC_CONSUME : VKD3DSIH_IMM_ATOMIC_ALLOC); + if (!(src_params = instruction_src_params_alloc(ins, 1, sm6))) + return; + src_param_init_vector_from_reg(&src_params[0], &resource->u.handle.reg); + + instruction_dst_param_init_ssa_scalar(ins, sm6); +} + +static void sm6_parser_emit_dx_calculate_lod(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) +{ + const struct sm6_value *resource, *sampler; + struct vkd3d_shader_src_param *src_params; + struct vkd3d_shader_instruction *ins; + struct vkd3d_shader_register coord; + unsigned int clamp; + + resource = operands[0]; + sampler = operands[1]; + if (!sm6_value_validate_is_texture_handle(resource, op, sm6) + || !sm6_value_validate_is_sampler_handle(sampler, op, sm6)) + { + return; + } + + if (!sm6_parser_emit_coordinate_construct(sm6, &operands[2], 3, NULL, state, &coord)) + return; + + clamp = sm6_value_get_constant_uint(operands[5]); + + ins = state->ins; + vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_LOD); + if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) + return; + src_param_init_vector_from_reg(&src_params[0], &coord); + src_params[1].reg = resource->u.handle.reg; + src_param_init_scalar(&src_params[1], !clamp); + src_param_init_vector_from_reg(&src_params[2], &sampler->u.handle.reg); + + instruction_dst_param_init_ssa_scalar(ins, sm6); +} + static void sm6_parser_emit_dx_cbuffer_load(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, const struct sm6_value **operands, struct function_emission_state *state) { @@ -4004,6 +4866,44 @@ static void sm6_parser_emit_dx_cbuffer_load(struct sm6_parser *sm6, enum dx_intr instruction_dst_param_init_ssa_vector(ins, sm6_type_max_vector_size(type), sm6); } +static void sm6_parser_dcl_register_builtin(struct sm6_parser *sm6, enum vkd3d_shader_opcode handler_idx, + enum vkd3d_shader_register_type reg_type, enum vkd3d_data_type data_type, unsigned int component_count) +{ + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_instruction *ins; + + if (!bitmap_is_set(sm6->io_regs_declared, reg_type)) + { + bitmap_set(sm6->io_regs_declared, reg_type); + ins = sm6_parser_add_instruction(sm6, handler_idx); + dst_param = &ins->declaration.dst; + vsir_register_init(&dst_param->reg, reg_type, data_type, 0); + dst_param_init_vector(dst_param, component_count); + } +} + +static void sm6_parser_emit_dx_input_register_mov(struct sm6_parser *sm6, + struct vkd3d_shader_instruction *ins, enum vkd3d_shader_register_type reg_type, enum vkd3d_data_type data_type) +{ + struct vkd3d_shader_src_param *src_param; + + vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); + + if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) + return; + sm6_parser_dcl_register_builtin(sm6, VKD3DSIH_DCL_INPUT, reg_type, data_type, 1); + vsir_register_init(&src_param->reg, reg_type, data_type, 0); + src_param_init(src_param); + + instruction_dst_param_init_ssa_scalar(ins, sm6); +} + +static void sm6_parser_emit_dx_coverage(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) +{ + sm6_parser_emit_dx_input_register_mov(sm6, state->ins, VKD3DSPR_COVERAGE, VKD3D_DATA_UINT); +} + static const struct sm6_descriptor_info *sm6_parser_get_descriptor(struct sm6_parser *sm6, enum vkd3d_shader_descriptor_type type, unsigned int id, const struct sm6_value *address) { @@ -4065,6 +4965,208 @@ static void sm6_parser_emit_dx_create_handle(struct sm6_parser *sm6, enum dx_int ins->handler_idx = VKD3DSIH_NOP; } +static void sm6_parser_emit_dx_stream(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) +{ + struct vkd3d_shader_instruction *ins = state->ins; + struct vkd3d_shader_src_param *src_param; + unsigned int i; + + vsir_instruction_init(ins, &sm6->p.location, (op == DX_CUT_STREAM) ? VKD3DSIH_CUT_STREAM : VKD3DSIH_EMIT_STREAM); + + if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) + return; + + i = sm6_value_get_constant_uint(operands[0]); + if (i >= MAX_GS_OUTPUT_STREAMS) + { + WARN("Invalid stream index %u.\n", i); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Output stream index %u is invalid.", i); + } + + /* VKD3D_DATA_UNUSED would be more reasonable, but TPF uses data type 0 here. */ + register_init_with_id(&src_param->reg, VKD3DSPR_STREAM, 0, i); + src_param_init(src_param); + + if (op == DX_EMIT_THEN_CUT_STREAM) + { + ++state->ins; + ++state->code_block->instruction_count; + sm6_parser_emit_dx_stream(sm6, DX_CUT_STREAM, operands, state); + } +} + +static void sm6_parser_emit_dx_discard(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) +{ + struct vkd3d_shader_instruction *ins = state->ins; + struct vkd3d_shader_src_param *src_param; + + vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_DISCARD); + + if ((src_param = instruction_src_params_alloc(ins, 1, sm6))) + src_param_init_from_value(src_param, operands[0]); +} + +static void sm6_parser_emit_dx_domain_location(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) +{ + struct vkd3d_shader_instruction *ins = state->ins; + struct vkd3d_shader_src_param *src_param; + unsigned int component_idx; + + vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); + + if ((component_idx = sm6_value_get_constant_uint(operands[0])) >= 3) + { + WARN("Invalid component index %u.\n", component_idx); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Invalid domain location component index %u.", component_idx); + component_idx = 0; + } + + if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) + return; + sm6_parser_dcl_register_builtin(sm6, VKD3DSIH_DCL_INPUT, VKD3DSPR_TESSCOORD, VKD3D_DATA_FLOAT, 3); + vsir_register_init(&src_param->reg, VKD3DSPR_TESSCOORD, VKD3D_DATA_FLOAT, 0); + src_param->reg.dimension = VSIR_DIMENSION_VEC4; + src_param_init_scalar(src_param, component_idx); + + instruction_dst_param_init_ssa_scalar(ins, sm6); +} + +static void sm6_parser_emit_dx_dot(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) +{ + struct vkd3d_shader_src_param *src_params; + struct vkd3d_shader_instruction *ins; + struct vkd3d_shader_register regs[2]; + enum vkd3d_shader_opcode handler_idx; + unsigned int component_count; + + switch (op) + { + case DX_DOT2: + handler_idx = VKD3DSIH_DP2; + component_count = 2; + break; + case DX_DOT3: + handler_idx = VKD3DSIH_DP3; + component_count = 3; + break; + case DX_DOT4: + handler_idx = VKD3DSIH_DP4; + component_count = 4; + break; + default: + vkd3d_unreachable(); + } + + if (!sm6_parser_emit_composite_construct(sm6, &operands[0], component_count, state, ®s[0])) + return; + if (!sm6_parser_emit_composite_construct(sm6, &operands[component_count], component_count, state, ®s[1])) + return; + + ins = state->ins; + vsir_instruction_init(ins, &sm6->p.location, handler_idx); + if (!(src_params = instruction_src_params_alloc(ins, 2, sm6))) + return; + src_param_init_vector_from_reg(&src_params[0], ®s[0]); + src_param_init_vector_from_reg(&src_params[1], ®s[1]); + + instruction_dst_param_init_ssa_scalar(ins, sm6); +} + +static void sm6_parser_emit_dx_fabs(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) +{ + struct vkd3d_shader_instruction *ins = state->ins; + struct vkd3d_shader_src_param *src_param; + + vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); + if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) + return; + src_param_init_from_value(src_param, operands[0]); + src_param->modifiers = VKD3DSPSM_ABS; + + instruction_dst_param_init_ssa_scalar(ins, sm6); +} + +static void sm6_parser_emit_dx_compute_builtin(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) +{ + unsigned int component_count = 3, component_idx = 0; + struct vkd3d_shader_instruction *ins = state->ins; + struct vkd3d_shader_src_param *src_param; + enum vkd3d_shader_register_type reg_type; + + switch (op) + { + case DX_THREAD_ID: + reg_type = VKD3DSPR_THREADID; + break; + case DX_GROUP_ID: + reg_type = VKD3DSPR_THREADGROUPID; + break; + case DX_THREAD_ID_IN_GROUP: + reg_type = VKD3DSPR_LOCALTHREADID; + break; + case DX_FLATTENED_THREAD_ID_IN_GROUP: + reg_type = VKD3DSPR_LOCALTHREADINDEX; + component_count = 1; + break; + default: + vkd3d_unreachable(); + } + + sm6_parser_dcl_register_builtin(sm6, VKD3DSIH_DCL_INPUT, reg_type, VKD3D_DATA_UINT, component_count); + vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); + if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) + return; + vsir_register_init(&src_param->reg, reg_type, VKD3D_DATA_UINT, 0); + if (component_count > 1) + { + src_param->reg.dimension = VSIR_DIMENSION_VEC4; + component_idx = sm6_value_get_constant_uint(operands[0]); + } + src_param_init_scalar(src_param, component_idx); + + instruction_dst_param_init_ssa_scalar(ins, sm6); +} + +static enum vkd3d_shader_opcode sm6_dx_map_ma_op(enum dx_intrinsic_opcode op, const struct sm6_type *type) +{ + switch (op) + { + case DX_FMA: + return VKD3DSIH_DFMA; + case DX_FMAD: + return VKD3DSIH_MAD; + case DX_IMAD: + case DX_UMAD: + return VKD3DSIH_IMAD; + default: + vkd3d_unreachable(); + } +} + +static void sm6_parser_emit_dx_ma(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) +{ + struct vkd3d_shader_instruction *ins = state->ins; + struct vkd3d_shader_src_param *src_params; + unsigned int i; + + vsir_instruction_init(ins, &sm6->p.location, sm6_dx_map_ma_op(op, operands[0]->type)); + if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) + return; + for (i = 0; i < 3; ++i) + src_param_init_from_value(&src_params[i], operands[i]); + + instruction_dst_param_init_ssa_scalar(ins, sm6); +} + static void sm6_parser_emit_dx_get_dimensions(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, const struct sm6_value **operands, struct function_emission_state *state) { @@ -4171,18 +5273,44 @@ static void sm6_parser_emit_dx_tertiary(struct sm6_parser *sm6, enum dx_intrinsi static void sm6_parser_emit_dx_load_input(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, const struct sm6_value **operands, struct function_emission_state *state) { + bool is_control_point = op == DX_LOAD_OUTPUT_CONTROL_POINT; + bool is_patch_constant = op == DX_LOAD_PATCH_CONSTANT; struct vkd3d_shader_instruction *ins = state->ins; + struct vsir_program *program = sm6->p.program; + unsigned int count, row_index, column_index; + const struct vkd3d_shader_dst_param *params; struct vkd3d_shader_src_param *src_param; const struct shader_signature *signature; - unsigned int row_index, column_index; const struct signature_element *e; row_index = sm6_value_get_constant_uint(operands[0]); column_index = sm6_value_get_constant_uint(operands[2]); + if (is_control_point && operands[3]->is_undefined) + { + /* dxcompiler will compile source which does this, so let it pass. */ + WARN("Control point id is undefined.\n"); + vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_UNDEFINED_OPERAND, + "The index for a control point load is undefined."); + } + vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); - signature = &sm6->p.shader_desc.input_signature; + if (is_patch_constant) + { + signature = &program->patch_constant_signature; + params = sm6->patch_constant_params; + } + else if (is_control_point) + { + signature = &program->output_signature; + params = sm6->output_params; + } + else + { + signature = &program->input_signature; + params = sm6->input_params; + } if (row_index >= signature->element_count) { WARN("Invalid row index %u.\n", row_index); @@ -4194,14 +5322,54 @@ static void sm6_parser_emit_dx_load_input(struct sm6_parser *sm6, enum dx_intrin if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) return; - src_param->reg = sm6->input_params[row_index].reg; + src_param->reg = params[row_index].reg; src_param_init_scalar(src_param, column_index); + count = 0; + if (e->register_count > 1) - register_index_address_init(&src_param->reg.idx[0], operands[1], sm6); + register_index_address_init(&src_param->reg.idx[count++], operands[1], sm6); + + if (!is_patch_constant && !operands[3]->is_undefined) + { + assert(src_param->reg.idx_count > count); + register_index_address_init(&src_param->reg.idx[count], operands[3], sm6); + } instruction_dst_param_init_ssa_scalar(ins, sm6); } +static void sm6_parser_emit_dx_make_double(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) +{ + struct vkd3d_shader_src_param *src_params; + struct vkd3d_shader_instruction *ins; + struct vkd3d_shader_register reg; + + if (!sm6_parser_emit_composite_construct(sm6, &operands[0], 2, state, ®)) + return; + + ins = state->ins; + vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); + if (!(src_params = instruction_src_params_alloc(ins, 1, sm6))) + return; + src_params[0].reg = reg; + src_param_init_vector(&src_params[0], 2); + + instruction_dst_param_init_ssa_scalar(ins, sm6); +} + +static void sm6_parser_emit_dx_output_control_point_id(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) +{ + sm6_parser_emit_dx_input_register_mov(sm6, state->ins, VKD3DSPR_OUTPOINTID, VKD3D_DATA_UINT); +} + +static void sm6_parser_emit_dx_primitive_id(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) +{ + sm6_parser_emit_dx_input_register_mov(sm6, state->ins, VKD3DSPR_PRIMID, VKD3D_DATA_UINT); +} + static void sm6_parser_emit_dx_raw_buffer_load(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, const struct sm6_value **operands, struct function_emission_state *state) { @@ -4424,6 +5592,59 @@ static void sm6_parser_emit_dx_buffer_store(struct sm6_parser *sm6, enum dx_intr dst_param->reg = resource->u.handle.reg; } +static void sm6_parser_emit_dx_get_sample_count(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) +{ + struct vkd3d_shader_instruction *ins = state->ins; + struct vkd3d_shader_src_param *src_param; + + vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_SAMPLE_INFO); + ins->flags = VKD3DSI_SAMPLE_INFO_UINT; + + if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) + return; + vsir_register_init(&src_param->reg, VKD3DSPR_RASTERIZER, VKD3D_DATA_FLOAT, 0); + src_param->reg.dimension = VSIR_DIMENSION_VEC4; + src_param_init(src_param); + + instruction_dst_param_init_ssa_scalar(ins, sm6); + ins->dst->reg.data_type = VKD3D_DATA_FLOAT; +} + +static void sm6_parser_emit_dx_get_sample_pos(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) +{ + struct vkd3d_shader_instruction *ins = state->ins; + struct vkd3d_shader_src_param *src_params; + const struct sm6_value *resource = NULL; + + if (op == DX_TEX2DMS_GET_SAMPLE_POS) + { + resource = operands[0]; + if (!sm6_value_validate_is_texture_2dms_handle(resource, op, sm6)) + return; + } + + vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_SAMPLE_POS); + + if (!(src_params = instruction_src_params_alloc(ins, 2, sm6))) + return; + if (op == DX_TEX2DMS_GET_SAMPLE_POS) + { + src_param_init_vector_from_reg(&src_params[0], &resource->u.handle.reg); + src_param_init_from_value(&src_params[1], operands[1]); + } + else + { + src_param_init_vector(&src_params[0], 2); + vsir_register_init(&src_params[0].reg, VKD3DSPR_RASTERIZER, VKD3D_DATA_FLOAT, 0); + src_params[0].reg.dimension = VSIR_DIMENSION_VEC4; + src_param_init_from_value(&src_params[1], operands[0]); + } + + instruction_dst_param_init_ssa_vector(ins, 2, sm6); +} + static unsigned int sm6_value_get_texel_offset(const struct sm6_value *value) { return sm6_value_is_undef(value) ? 0 : sm6_value_get_constant_uint(value); @@ -4521,6 +5742,21 @@ static void sm6_parser_emit_dx_sample(struct sm6_parser *sm6, enum dx_intrinsic_ instruction_dst_param_init_ssa_vector(ins, component_count, sm6); } +static void sm6_parser_emit_dx_saturate(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) +{ + struct vkd3d_shader_instruction *ins = state->ins; + struct vkd3d_shader_src_param *src_param; + + vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); + if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) + return; + src_param_init_from_value(src_param, operands[0]); + + if (instruction_dst_param_init_ssa_scalar(ins, sm6)) + ins->dst->modifiers = VKD3DSPDM_SATURATE; +} + static void sm6_parser_emit_dx_sincos(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, const struct sm6_value **operands, struct function_emission_state *state) { @@ -4561,7 +5797,9 @@ static void sm6_parser_emit_dx_split_double(struct sm6_parser *sm6, enum dx_intr static void sm6_parser_emit_dx_store_output(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, const struct sm6_value **operands, struct function_emission_state *state) { + bool is_patch_constant = op == DX_STORE_PATCH_CONSTANT; struct vkd3d_shader_instruction *ins = state->ins; + struct vsir_program *program = sm6->p.program; struct vkd3d_shader_src_param *src_param; struct vkd3d_shader_dst_param *dst_param; const struct shader_signature *signature; @@ -4572,7 +5810,7 @@ static void sm6_parser_emit_dx_store_output(struct sm6_parser *sm6, enum dx_intr row_index = sm6_value_get_constant_uint(operands[0]); column_index = sm6_value_get_constant_uint(operands[2]); - signature = &sm6->p.shader_desc.output_signature; + signature = is_patch_constant ? &program->patch_constant_signature : &program->output_signature; if (row_index >= signature->element_count) { WARN("Invalid row index %u.\n", row_index); @@ -4604,14 +5842,82 @@ static void sm6_parser_emit_dx_store_output(struct sm6_parser *sm6, enum dx_intr if (!(dst_param = instruction_dst_params_alloc(ins, 1, sm6))) return; dst_param_init_scalar(dst_param, column_index); - dst_param->reg = sm6->output_params[row_index].reg; + dst_param->reg = is_patch_constant ? sm6->patch_constant_params[row_index].reg : sm6->output_params[row_index].reg; if (e->register_count > 1) register_index_address_init(&dst_param->reg.idx[0], operands[1], sm6); + if (e->register_index == UINT_MAX) + { + sm6_parser_dcl_register_builtin(sm6, VKD3DSIH_DCL_OUTPUT, dst_param->reg.type, + dst_param->reg.data_type, vsir_write_mask_component_count(e->mask)); + } + if ((src_param = instruction_src_params_alloc(ins, 1, sm6))) src_param_init_from_value(src_param, value); } +static void sm6_parser_emit_dx_texture_gather(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) +{ + struct vkd3d_shader_register coord, offset; + const struct sm6_value *resource, *sampler; + struct vkd3d_shader_src_param *src_params; + struct vkd3d_shader_instruction *ins; + unsigned int swizzle; + bool extended_offset; + + resource = operands[0]; + sampler = operands[1]; + if (!sm6_value_validate_is_texture_handle(resource, op, sm6) + || !sm6_value_validate_is_sampler_handle(sampler, op, sm6)) + { + return; + } + + if (!sm6_parser_emit_coordinate_construct(sm6, &operands[2], VKD3D_VEC4_SIZE, NULL, state, &coord)) + return; + + if ((extended_offset = !sm6_value_vector_is_constant_or_undef(&operands[6], 2)) + && !sm6_parser_emit_coordinate_construct(sm6, &operands[6], 2, NULL, state, &offset)) + { + return; + } + + ins = state->ins; + if (op == DX_TEXTURE_GATHER) + { + instruction_init_with_resource(ins, extended_offset ? VKD3DSIH_GATHER4_PO : VKD3DSIH_GATHER4, resource, sm6); + if (!(src_params = instruction_src_params_alloc(ins, 3 + extended_offset, sm6))) + return; + } + else + { + instruction_init_with_resource(ins, extended_offset ? VKD3DSIH_GATHER4_PO_C : VKD3DSIH_GATHER4_C, resource, sm6); + if (!(src_params = instruction_src_params_alloc(ins, 4 + extended_offset, sm6))) + return; + src_param_init_from_value(&src_params[3 + extended_offset], operands[9]); + } + + src_param_init_vector_from_reg(&src_params[0], &coord); + if (extended_offset) + src_param_init_vector_from_reg(&src_params[1], &offset); + else + instruction_set_texel_offset(ins, &operands[6], sm6); + src_param_init_vector_from_reg(&src_params[1 + extended_offset], &resource->u.handle.reg); + src_param_init_vector_from_reg(&src_params[2 + extended_offset], &sampler->u.handle.reg); + /* Swizzle stored in the sampler parameter is the scalar component index to be gathered. */ + swizzle = sm6_value_get_constant_uint(operands[8]); + if (swizzle >= VKD3D_VEC4_SIZE) + { + WARN("Invalid swizzle %#x.\n", swizzle); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Swizzle %#x for a texture gather operation is invalid.", swizzle); + } + src_params[2 + extended_offset].swizzle = swizzle; + + instruction_dst_param_init_ssa_vector(ins, VKD3D_VEC4_SIZE, sm6); +} + static void sm6_parser_emit_dx_texture_load(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, const struct sm6_value **operands, struct function_emission_state *state) { @@ -4707,6 +6013,131 @@ static void sm6_parser_emit_dx_texture_store(struct sm6_parser *sm6, enum dx_int dst_param_init_with_mask(dst_param, write_mask); } +static void sm6_parser_emit_dx_wave_active_ballot(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) +{ + struct vkd3d_shader_instruction *ins = state->ins; + struct vkd3d_shader_src_param *src_param; + + vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_WAVE_ACTIVE_BALLOT); + if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) + return; + src_param_init_from_value(src_param, operands[0]); + + instruction_dst_param_init_ssa_vector(ins, VKD3D_VEC4_SIZE, sm6); +} + +static enum vkd3d_shader_opcode sm6_dx_map_wave_bit_op(enum dxil_wave_bit_op_kind op, + struct sm6_parser *sm6) +{ + switch (op) + { + case WAVE_BIT_OP_AND: + return VKD3DSIH_WAVE_ACTIVE_BIT_AND; + case WAVE_BIT_OP_OR: + return VKD3DSIH_WAVE_ACTIVE_BIT_OR; + case WAVE_BIT_OP_XOR: + return VKD3DSIH_WAVE_ACTIVE_BIT_XOR; + default: + FIXME("Unhandled wave bit op %u.\n", op); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_UNHANDLED_INTRINSIC, + "Wave bit operation %u is unhandled.\n", op); + return VKD3DSIH_INVALID; + } +} + +static void sm6_parser_emit_dx_wave_active_bit(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) +{ + struct vkd3d_shader_instruction *ins = state->ins; + struct vkd3d_shader_src_param *src_param; + enum dxil_wave_bit_op_kind wave_op; + enum vkd3d_shader_opcode opcode; + + wave_op = sm6_value_get_constant_uint(operands[1]); + + if ((opcode = sm6_dx_map_wave_bit_op(wave_op, sm6)) == VKD3DSIH_INVALID) + return; + vsir_instruction_init(ins, &sm6->p.location, opcode); + + if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) + return; + src_param_init_from_value(src_param, operands[0]); + + instruction_dst_param_init_ssa_scalar(ins, sm6); +} + +static enum vkd3d_shader_opcode sm6_dx_map_wave_op(enum dxil_wave_op_kind op, bool is_signed, bool is_float, + struct sm6_parser *sm6) +{ + switch (op) + { + case WAVE_OP_ADD: + return VKD3DSIH_WAVE_OP_ADD; + case WAVE_OP_MUL: + return VKD3DSIH_WAVE_OP_MUL; + case WAVE_OP_MIN: + if (is_float) + return VKD3DSIH_WAVE_OP_MIN; + return is_signed ? VKD3DSIH_WAVE_OP_IMIN : VKD3DSIH_WAVE_OP_UMIN; + case WAVE_OP_MAX: + if (is_float) + return VKD3DSIH_WAVE_OP_MAX; + return is_signed ? VKD3DSIH_WAVE_OP_IMAX : VKD3DSIH_WAVE_OP_UMAX; + default: + FIXME("Unhandled wave op %u.\n", op); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_UNHANDLED_INTRINSIC, + "Wave operation %u is unhandled.\n", op); + return VKD3DSIH_INVALID; + } +} + +static void sm6_parser_emit_dx_wave_op(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) +{ + struct vkd3d_shader_instruction *ins = state->ins; + struct vkd3d_shader_src_param *src_param; + enum vkd3d_shader_opcode opcode; + enum dxil_wave_op_kind wave_op; + bool is_signed; + + wave_op = sm6_value_get_constant_uint(operands[1]); + is_signed = !sm6_value_get_constant_uint(operands[2]); + opcode = sm6_dx_map_wave_op(wave_op, is_signed, sm6_type_is_floating_point(operands[0]->type), sm6); + + if (opcode == VKD3DSIH_INVALID) + return; + + vsir_instruction_init(ins, &sm6->p.location, opcode); + ins->flags = (op == DX_WAVE_PREFIX_OP) ? VKD3DSI_WAVE_PREFIX : 0; + + if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) + return; + src_param_init_from_value(src_param, operands[0]); + + instruction_dst_param_init_ssa_scalar(ins, sm6); +} + +static void sm6_parser_emit_dx_wave_builtin(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) +{ + enum vkd3d_shader_register_type type; + + switch (op) + { + case DX_WAVE_GET_LANE_COUNT: + type = VKD3DSPR_WAVELANECOUNT; + break; + case DX_WAVE_GET_LANE_INDEX: + type = VKD3DSPR_WAVELANEINDEX; + break; + default: + vkd3d_unreachable(); + } + + sm6_parser_emit_dx_input_register_mov(sm6, state->ins, type, VKD3D_DATA_UINT); +} + struct sm6_dx_opcode_info { const char *ret_type; @@ -4723,6 +6154,7 @@ struct sm6_dx_opcode_info C -> constant or undefined int8/16/32 i -> int32 m -> int16/32/64 + n -> any numeric f -> float d -> double e -> half/float @@ -4730,6 +6162,7 @@ struct sm6_dx_opcode_info H -> handle D -> Dimensions S -> splitdouble + V -> 4 x i32 v -> void o -> overloaded R -> matches the return type @@ -4741,29 +6174,47 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = [DX_ATAN ] = {"g", "R", sm6_parser_emit_dx_unary}, [DX_ATOMIC_BINOP ] = {"o", "HciiiR", sm6_parser_emit_dx_atomic_binop}, [DX_ATOMIC_CMP_XCHG ] = {"o", "HiiiRR", sm6_parser_emit_dx_atomic_binop}, + [DX_BARRIER ] = {"v", "c", sm6_parser_emit_dx_barrier}, [DX_BFREV ] = {"m", "R", sm6_parser_emit_dx_unary}, [DX_BUFFER_LOAD ] = {"o", "Hii", sm6_parser_emit_dx_buffer_load}, [DX_BUFFER_STORE ] = {"v", "Hiiooooc", sm6_parser_emit_dx_buffer_store}, + [DX_BUFFER_UPDATE_COUNTER ] = {"i", "H8", sm6_parser_emit_dx_buffer_update_counter}, + [DX_CALCULATE_LOD ] = {"f", "HHfffb", sm6_parser_emit_dx_calculate_lod}, [DX_CBUFFER_LOAD_LEGACY ] = {"o", "Hi", sm6_parser_emit_dx_cbuffer_load}, [DX_COS ] = {"g", "R", sm6_parser_emit_dx_sincos}, [DX_COUNT_BITS ] = {"i", "m", sm6_parser_emit_dx_unary}, + [DX_COVERAGE ] = {"i", "", sm6_parser_emit_dx_coverage}, [DX_CREATE_HANDLE ] = {"H", "ccib", sm6_parser_emit_dx_create_handle}, + [DX_CUT_STREAM ] = {"v", "c", sm6_parser_emit_dx_stream}, [DX_DERIV_COARSEX ] = {"e", "R", sm6_parser_emit_dx_unary}, [DX_DERIV_COARSEY ] = {"e", "R", sm6_parser_emit_dx_unary}, [DX_DERIV_FINEX ] = {"e", "R", sm6_parser_emit_dx_unary}, [DX_DERIV_FINEY ] = {"e", "R", sm6_parser_emit_dx_unary}, + [DX_DISCARD ] = {"v", "1", sm6_parser_emit_dx_discard}, + [DX_DOMAIN_LOCATION ] = {"f", "c", sm6_parser_emit_dx_domain_location}, + [DX_DOT2 ] = {"g", "RRRR", sm6_parser_emit_dx_dot}, + [DX_DOT3 ] = {"g", "RRRRRR", sm6_parser_emit_dx_dot}, + [DX_DOT4 ] = {"g", "RRRRRRRR", sm6_parser_emit_dx_dot}, + [DX_EMIT_STREAM ] = {"v", "c", sm6_parser_emit_dx_stream}, + [DX_EMIT_THEN_CUT_STREAM ] = {"v", "c", sm6_parser_emit_dx_stream}, [DX_EXP ] = {"g", "R", sm6_parser_emit_dx_unary}, + [DX_FABS ] = {"g", "R", sm6_parser_emit_dx_fabs}, [DX_FIRST_BIT_HI ] = {"i", "m", sm6_parser_emit_dx_unary}, [DX_FIRST_BIT_LO ] = {"i", "m", sm6_parser_emit_dx_unary}, [DX_FIRST_BIT_SHI ] = {"i", "m", sm6_parser_emit_dx_unary}, + [DX_FLATTENED_THREAD_ID_IN_GROUP ] = {"i", "", sm6_parser_emit_dx_compute_builtin}, + [DX_FMA ] = {"g", "RRR", sm6_parser_emit_dx_ma}, + [DX_FMAD ] = {"g", "RRR", sm6_parser_emit_dx_ma}, [DX_FMAX ] = {"g", "RR", sm6_parser_emit_dx_binary}, [DX_FMIN ] = {"g", "RR", sm6_parser_emit_dx_binary}, [DX_FRC ] = {"g", "R", sm6_parser_emit_dx_unary}, [DX_GET_DIMENSIONS ] = {"D", "Hi", sm6_parser_emit_dx_get_dimensions}, + [DX_GROUP_ID ] = {"i", "c", sm6_parser_emit_dx_compute_builtin}, [DX_IBFE ] = {"m", "iiR", sm6_parser_emit_dx_tertiary}, [DX_HCOS ] = {"g", "R", sm6_parser_emit_dx_unary}, [DX_HSIN ] = {"g", "R", sm6_parser_emit_dx_unary}, [DX_HTAN ] = {"g", "R", sm6_parser_emit_dx_unary}, + [DX_IMAD ] = {"m", "RRR", sm6_parser_emit_dx_ma}, [DX_IMAX ] = {"m", "RR", sm6_parser_emit_dx_binary}, [DX_IMIN ] = {"m", "RR", sm6_parser_emit_dx_binary}, [DX_ISFINITE ] = {"1", "g", sm6_parser_emit_dx_unary}, @@ -4772,7 +6223,12 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = [DX_LEGACY_F16TOF32 ] = {"f", "i", sm6_parser_emit_dx_unary}, [DX_LEGACY_F32TOF16 ] = {"i", "f", sm6_parser_emit_dx_unary}, [DX_LOAD_INPUT ] = {"o", "ii8i", sm6_parser_emit_dx_load_input}, + [DX_LOAD_OUTPUT_CONTROL_POINT ] = {"o", "ii8i", sm6_parser_emit_dx_load_input}, + [DX_LOAD_PATCH_CONSTANT ] = {"o", "ii8", sm6_parser_emit_dx_load_input}, [DX_LOG ] = {"g", "R", sm6_parser_emit_dx_unary}, + [DX_MAKE_DOUBLE ] = {"d", "ii", sm6_parser_emit_dx_make_double}, + [DX_OUTPUT_CONTROL_POINT_ID ] = {"i", "", sm6_parser_emit_dx_output_control_point_id}, + [DX_PRIMITIVE_ID ] = {"i", "", sm6_parser_emit_dx_primitive_id}, [DX_RAW_BUFFER_LOAD ] = {"o", "Hii8i", sm6_parser_emit_dx_raw_buffer_load}, [DX_RAW_BUFFER_STORE ] = {"v", "Hiioooocc", sm6_parser_emit_dx_raw_buffer_store}, [DX_ROUND_NE ] = {"g", "R", sm6_parser_emit_dx_unary}, @@ -4780,22 +6236,46 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = [DX_ROUND_PI ] = {"g", "R", sm6_parser_emit_dx_unary}, [DX_ROUND_Z ] = {"g", "R", sm6_parser_emit_dx_unary}, [DX_RSQRT ] = {"g", "R", sm6_parser_emit_dx_unary}, + [DX_RT_GET_SAMPLE_COUNT ] = {"i", "", sm6_parser_emit_dx_get_sample_count}, + [DX_RT_GET_SAMPLE_POS ] = {"o", "i", sm6_parser_emit_dx_get_sample_pos}, [DX_SAMPLE ] = {"o", "HHffffiiif", sm6_parser_emit_dx_sample}, [DX_SAMPLE_B ] = {"o", "HHffffiiiff", sm6_parser_emit_dx_sample}, [DX_SAMPLE_C ] = {"o", "HHffffiiiff", sm6_parser_emit_dx_sample}, [DX_SAMPLE_C_LZ ] = {"o", "HHffffiiif", sm6_parser_emit_dx_sample}, [DX_SAMPLE_GRAD ] = {"o", "HHffffiiifffffff", sm6_parser_emit_dx_sample}, [DX_SAMPLE_LOD ] = {"o", "HHffffiiif", sm6_parser_emit_dx_sample}, + [DX_SATURATE ] = {"g", "R", sm6_parser_emit_dx_saturate}, [DX_SIN ] = {"g", "R", sm6_parser_emit_dx_sincos}, [DX_SPLIT_DOUBLE ] = {"S", "d", sm6_parser_emit_dx_split_double}, [DX_SQRT ] = {"g", "R", sm6_parser_emit_dx_unary}, [DX_STORE_OUTPUT ] = {"v", "ii8o", sm6_parser_emit_dx_store_output}, + [DX_STORE_PATCH_CONSTANT ] = {"v", "ii8o", sm6_parser_emit_dx_store_output}, [DX_TAN ] = {"g", "R", sm6_parser_emit_dx_unary}, + [DX_TEX2DMS_GET_SAMPLE_POS ] = {"o", "Hi", sm6_parser_emit_dx_get_sample_pos}, + [DX_TEXTURE_GATHER ] = {"o", "HHffffiic", sm6_parser_emit_dx_texture_gather}, + [DX_TEXTURE_GATHER_CMP ] = {"o", "HHffffiicf", sm6_parser_emit_dx_texture_gather}, [DX_TEXTURE_LOAD ] = {"o", "HiiiiCCC", sm6_parser_emit_dx_texture_load}, [DX_TEXTURE_STORE ] = {"v", "Hiiiooooc", sm6_parser_emit_dx_texture_store}, + [DX_THREAD_ID ] = {"i", "c", sm6_parser_emit_dx_compute_builtin}, + [DX_THREAD_ID_IN_GROUP ] = {"i", "c", sm6_parser_emit_dx_compute_builtin}, [DX_UBFE ] = {"m", "iiR", sm6_parser_emit_dx_tertiary}, + [DX_UMAD ] = {"m", "RRR", sm6_parser_emit_dx_ma}, [DX_UMAX ] = {"m", "RR", sm6_parser_emit_dx_binary}, [DX_UMIN ] = {"m", "RR", sm6_parser_emit_dx_binary}, + [DX_WAVE_ACTIVE_ALL_EQUAL ] = {"1", "n", sm6_parser_emit_dx_unary}, + [DX_WAVE_ACTIVE_BALLOT ] = {"V", "1", sm6_parser_emit_dx_wave_active_ballot}, + [DX_WAVE_ACTIVE_BIT ] = {"m", "Rc", sm6_parser_emit_dx_wave_active_bit}, + [DX_WAVE_ACTIVE_OP ] = {"n", "Rcc", sm6_parser_emit_dx_wave_op}, + [DX_WAVE_ALL_BIT_COUNT ] = {"i", "1", sm6_parser_emit_dx_unary}, + [DX_WAVE_ALL_TRUE ] = {"1", "1", sm6_parser_emit_dx_unary}, + [DX_WAVE_ANY_TRUE ] = {"1", "1", sm6_parser_emit_dx_unary}, + [DX_WAVE_GET_LANE_COUNT ] = {"i", "", sm6_parser_emit_dx_wave_builtin}, + [DX_WAVE_GET_LANE_INDEX ] = {"i", "", sm6_parser_emit_dx_wave_builtin}, + [DX_WAVE_IS_FIRST_LANE ] = {"1", "", sm6_parser_emit_dx_void}, + [DX_WAVE_PREFIX_BIT_COUNT ] = {"i", "1", sm6_parser_emit_dx_unary}, + [DX_WAVE_PREFIX_OP ] = {"n", "Rcc", sm6_parser_emit_dx_wave_op}, + [DX_WAVE_READ_LANE_AT ] = {"n", "Ri", sm6_parser_emit_dx_binary}, + [DX_WAVE_READ_LANE_FIRST ] = {"n", "R", sm6_parser_emit_dx_unary}, }; static bool sm6_parser_validate_operand_type(struct sm6_parser *sm6, const struct sm6_value *value, char info_type, @@ -4827,6 +6307,8 @@ static bool sm6_parser_validate_operand_type(struct sm6_parser *sm6, const struc return sm6_type_is_i32(type); case 'm': return sm6_type_is_i16_i32_i64(type); + case 'n': + return sm6_type_is_numeric(type); case 'f': return sm6_type_is_float(type); case 'd': @@ -4841,6 +6323,8 @@ static bool sm6_parser_validate_operand_type(struct sm6_parser *sm6, const struc return sm6_type_is_struct(type) && !strcmp(type->u.struc->name, "dx.types.Dimensions"); case 'S': return sm6_type_is_struct(type) && !strcmp(type->u.struc->name, "dx.types.splitdouble"); + case 'V': + return sm6_type_is_struct(type) && !strcmp(type->u.struc->name, "dx.types.fouri32"); case 'v': return !type; case 'o': @@ -5055,7 +6539,10 @@ static enum vkd3d_shader_opcode sm6_map_cast_op(uint64_t code, const struct sm6_ break; case CAST_ZEXT: case CAST_SEXT: - /* nop or min precision. TODO: native 16-bit */ + /* nop or min precision. TODO: native 16-bit. + * Extension instructions could be emitted for min precision, but in Windows + * the AMD RX 580 simply drops such instructions, which makes sense as no + * assumptions should be made about any behaviour which depends on bit width. */ if (to->u.width == from->u.width || (to->u.width == 32 && from->u.width == 16)) { op = VKD3DSIH_NOP; @@ -5187,8 +6674,8 @@ static const struct sm6_cmp_info *sm6_map_cmp2_op(uint64_t code) [FCMP_OLT] = {VKD3DSIH_LTO}, [FCMP_OLE] = {VKD3DSIH_GEO, true}, [FCMP_ONE] = {VKD3DSIH_NEO}, - [FCMP_ORD] = {VKD3DSIH_INVALID}, - [FCMP_UNO] = {VKD3DSIH_INVALID}, + [FCMP_ORD] = {VKD3DSIH_ORD}, + [FCMP_UNO] = {VKD3DSIH_UNO}, [FCMP_UEQ] = {VKD3DSIH_EQU}, [FCMP_UGT] = {VKD3DSIH_LTU, true}, [FCMP_UGE] = {VKD3DSIH_GEU}, @@ -5248,6 +6735,15 @@ static void sm6_parser_emit_cmp2(struct sm6_parser *sm6, const struct dxil_recor code = record->operands[i++]; + /* dxcompiler occasionally emits bool not-equal-to-false, which is a no-op. Bool comparisons + * do not otherwise occur, so deleting these avoids the need for backend support. */ + if (sm6_type_is_bool(type_a) && code == ICMP_NE && sm6_value_is_constant_zero(b)) + { + ins->handler_idx = VKD3DSIH_NOP; + *dst = *a; + return; + } + if ((!is_int && !is_fp) || is_int != (code >= ICMP_EQ)) { FIXME("Invalid operation %"PRIu64" on type class %u.\n", code, type_a->class); @@ -5304,6 +6800,88 @@ static void sm6_parser_emit_cmp2(struct sm6_parser *sm6, const struct dxil_recor instruction_dst_param_init_ssa_scalar(ins, sm6); } +static void sm6_parser_emit_cmpxchg(struct sm6_parser *sm6, const struct dxil_record *record, + struct vkd3d_shader_instruction *ins, struct sm6_value *dst) +{ + uint64_t success_ordering, failure_ordering; + struct vkd3d_shader_dst_param *dst_params; + struct vkd3d_shader_src_param *src_params; + const struct sm6_value *ptr, *cmp, *new; + const struct sm6_type *type; + unsigned int i = 0; + bool is_volatile; + uint64_t code; + + if (!(ptr = sm6_parser_get_value_by_ref(sm6, record, NULL, &i)) + || !sm6_value_validate_is_pointer_to_i32(ptr, sm6) + || !sm6_value_validate_is_backward_ref(ptr, sm6)) + return; + + if (ptr->u.reg.type != VKD3DSPR_GROUPSHAREDMEM) + { + WARN("Register is not groupshared.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "The destination register for a cmpxchg instruction is not groupshared memory."); + return; + } + + if (!(dst->type = sm6_type_get_cmpxchg_result_struct(sm6))) + { + WARN("Failed to find result struct.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, + "Module does not define a result struct type for a cmpxchg instruction."); + return; + } + + type = ptr->type->u.pointer.type; + cmp = sm6_parser_get_value_by_ref(sm6, record, type, &i); + new = sm6_parser_get_value_by_ref(sm6, record, type, &i); + if (!cmp || !new) + return; + + if (!sm6_value_validate_is_i32(cmp, sm6) + || !sm6_value_validate_is_i32(new, sm6) + || !dxil_record_validate_operand_count(record, i + 3, i + 5, sm6)) + { + return; + } + + is_volatile = record->operands[i++]; + success_ordering = record->operands[i++]; + + if ((code = record->operands[i++]) != 1) + FIXME("Ignoring synchronisation scope %"PRIu64".\n", code); + + failure_ordering = (record->operand_count > i) ? record->operands[i++] : success_ordering; + + /* It's currently not possible to specify an atomic ordering in HLSL, and it defaults to seq_cst. */ + if (success_ordering != ORDERING_SEQCST) + FIXME("Unhandled success ordering %"PRIu64".\n", success_ordering); + if (success_ordering != failure_ordering) + FIXME("Unhandled failure ordering %"PRIu64".\n", failure_ordering); + + if (record->operand_count > i && record->operands[i]) + FIXME("Ignoring weak cmpxchg.\n"); + + vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_IMM_ATOMIC_CMP_EXCH); + ins->flags = is_volatile ? VKD3DARF_SEQ_CST | VKD3DARF_VOLATILE : VKD3DARF_SEQ_CST; + + if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) + return; + src_param_make_constant_uint(&src_params[0], 0); + src_param_init_from_value(&src_params[1], cmp); + src_param_init_from_value(&src_params[2], new); + + if (!(dst_params = instruction_dst_params_alloc(ins, 2, sm6))) + return; + register_init_ssa_scalar(&dst_params[0].reg, dst->type, dst, sm6); + dst_param_init(&dst_params[0]); + dst_params[1].reg = ptr->u.reg; + dst_param_init(&dst_params[1]); + + dst->u.reg = dst_params[0].reg; +} + static void sm6_parser_emit_extractval(struct sm6_parser *sm6, const struct dxil_record *record, struct vkd3d_shader_instruction *ins, struct sm6_value *dst) { @@ -5459,6 +7037,7 @@ static void sm6_parser_emit_gep(struct sm6_parser *sm6, const struct dxil_record register_index_address_init(®->idx[1], elem_value, sm6); reg->idx[1].is_in_bounds = is_in_bounds; reg->idx_count = 2; + dst->structure_stride = src->structure_stride; ins->handler_idx = VKD3DSIH_NOP; } @@ -5467,8 +7046,8 @@ static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_recor struct vkd3d_shader_instruction *ins, struct sm6_value *dst) { const struct sm6_type *elem_type = NULL, *pointee_type; - struct vkd3d_shader_src_param *src_param; - unsigned int alignment, i = 0; + unsigned int alignment, operand_count, i = 0; + struct vkd3d_shader_src_param *src_params; const struct sm6_value *ptr; uint64_t alignment_code; @@ -5476,6 +7055,7 @@ static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_recor return; if (!sm6_value_validate_is_register(ptr, sm6) || !sm6_value_validate_is_pointer(ptr, sm6) + || !sm6_value_validate_is_backward_ref(ptr, sm6) || !dxil_record_validate_operand_count(record, i + 2, i + 3, sm6)) return; @@ -5505,12 +7085,34 @@ static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_recor if (record->operands[i]) WARN("Ignoring volatile modifier.\n"); - vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); + if (ptr->structure_stride) + { + assert(ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); + vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_LD_STRUCTURED); - if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) - return; - src_param_init_from_value(&src_param[0], ptr); - src_param->reg.alignment = alignment; + if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) + return; + if (ptr->u.reg.idx[1].rel_addr) + src_params[0] = *ptr->u.reg.idx[1].rel_addr; + else + src_param_make_constant_uint(&src_params[0], ptr->u.reg.idx[1].offset); + /* Struct offset is always zero as there is no struct, just an array. */ + src_param_make_constant_uint(&src_params[1], 0); + src_param_init_from_value(&src_params[2], ptr); + src_params[2].reg.alignment = alignment; + } + else + { + operand_count = 1 + (ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); + vsir_instruction_init(ins, &sm6->p.location, (operand_count > 1) ? VKD3DSIH_LD_RAW : VKD3DSIH_MOV); + + if (!(src_params = instruction_src_params_alloc(ins, operand_count, sm6))) + return; + if (operand_count > 1) + src_param_make_constant_uint(&src_params[0], 0); + src_param_init_from_value(&src_params[operand_count - 1], ptr); + src_params[operand_count - 1].reg.alignment = alignment; + } instruction_dst_param_init_ssa_scalar(ins, sm6); } @@ -5628,16 +7230,17 @@ static void sm6_parser_emit_ret(struct sm6_parser *sm6, const struct dxil_record static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_record *record, struct vkd3d_shader_instruction *ins, struct sm6_value *dst) { - struct vkd3d_shader_src_param *src_param; + unsigned int i = 0, alignment, operand_count; + struct vkd3d_shader_src_param *src_params; struct vkd3d_shader_dst_param *dst_param; const struct sm6_type *pointee_type; const struct sm6_value *ptr, *src; - unsigned int i = 0, alignment; uint64_t alignment_code; if (!(ptr = sm6_parser_get_value_by_ref(sm6, record, NULL, &i)) || !sm6_value_validate_is_register(ptr, sm6) - || !sm6_value_validate_is_pointer(ptr, sm6)) + || !sm6_value_validate_is_pointer(ptr, sm6) + || !sm6_value_validate_is_backward_ref(ptr, sm6)) { return; } @@ -5665,16 +7268,40 @@ static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_reco if (record->operands[i]) WARN("Ignoring volatile modifier.\n"); - vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); + if (ptr->structure_stride) + { + assert(ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); + vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_STORE_STRUCTURED); - if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) - return; - src_param_init_from_value(&src_param[0], src); + if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) + return; + if (ptr->u.reg.idx[1].rel_addr) + src_params[0] = *ptr->u.reg.idx[1].rel_addr; + else + src_param_make_constant_uint(&src_params[0], ptr->u.reg.idx[1].offset); + /* Struct offset is always zero as there is no struct, just an array. */ + src_param_make_constant_uint(&src_params[1], 0); + src_param_init_from_value(&src_params[2], src); + } + else + { + operand_count = 1 + (ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); + vsir_instruction_init(ins, &sm6->p.location, (operand_count > 1) ? VKD3DSIH_STORE_RAW : VKD3DSIH_MOV); + + if (!(src_params = instruction_src_params_alloc(ins, operand_count, sm6))) + return; + if (operand_count > 1) + src_param_make_constant_uint(&src_params[0], 0); + src_param_init_from_value(&src_params[operand_count - 1], src); + } dst_param = instruction_dst_params_alloc(ins, 1, sm6); dst_param_init(dst_param); dst_param->reg = ptr->u.reg; dst_param->reg.alignment = alignment; + /* Groupshared stores contain the address in the src params. */ + if (dst_param->reg.type != VKD3DSPR_IDXTEMP) + dst_param->reg.idx_count = 1; } static void sm6_parser_emit_switch(struct sm6_parser *sm6, const struct dxil_record *record, @@ -5855,6 +7482,25 @@ static bool sm6_metadata_get_uint64_value(const struct sm6_parser *sm6, return true; } +static bool sm6_metadata_get_float_value(const struct sm6_parser *sm6, + const struct sm6_metadata_value *m, float *f) +{ + const struct sm6_value *value; + + if (!m || m->type != VKD3D_METADATA_VALUE) + return false; + + value = m->u.value; + if (!sm6_value_is_constant(value)) + return false; + if (!sm6_type_is_floating_point(value->type)) + return false; + + *f = register_get_float_value(&value->u.reg); + + return true; +} + static void sm6_parser_metadata_attachment_block_init(struct sm6_parser *sm6, const struct dxil_block *target_block, const struct dxil_block *block) { @@ -6124,6 +7770,7 @@ static enum vkd3d_result sm6_function_resolve_phi_incomings(const struct sm6_fun static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const struct dxil_block *block, struct sm6_function *function) { + struct vsir_program *program = sm6->p.program; struct vkd3d_shader_instruction *ins; size_t i, block_idx, block_count; const struct dxil_record *record; @@ -6132,11 +7779,6 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const struct sm6_block *code_block; struct sm6_value *dst; - if (sm6->function_count) - { - FIXME("Multiple functions are not supported yet.\n"); - return VKD3D_ERROR_INVALID_SHADER; - } if (!(function->declaration = sm6_parser_next_function_definition(sm6))) { WARN("Failed to find definition to match function body.\n"); @@ -6207,6 +7849,7 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const fwd_type = dst->type; dst->type = NULL; dst->value_type = VALUE_TYPE_REG; + dst->is_back_ref = true; is_terminator = false; record = block->records[i]; @@ -6215,6 +7858,13 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const case FUNC_CODE_INST_ALLOCA: sm6_parser_emit_alloca(sm6, record, ins, dst); break; + case FUNC_CODE_INST_ATOMICRMW: + { + struct function_emission_state state = {code_block, ins}; + sm6_parser_emit_atomicrmw(sm6, record, &state, dst); + program->temp_count = max(program->temp_count, state.temp_idx); + break; + } case FUNC_CODE_INST_BINOP: sm6_parser_emit_binop(sm6, record, ins, dst); break; @@ -6226,7 +7876,7 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const { struct function_emission_state state = {code_block, ins}; sm6_parser_emit_call(sm6, record, &state, dst); - sm6->p.program.temp_count = max(sm6->p.program.temp_count, state.temp_idx); + program->temp_count = max(program->temp_count, state.temp_idx); break; } case FUNC_CODE_INST_CAST: @@ -6235,6 +7885,9 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const case FUNC_CODE_INST_CMP2: sm6_parser_emit_cmp2(sm6, record, ins, dst); break; + case FUNC_CODE_INST_CMPXCHG: + sm6_parser_emit_cmpxchg(sm6, record, ins, dst); + break; case FUNC_CODE_INST_EXTRACTVAL: sm6_parser_emit_extractval(sm6, record, ins, dst); break; @@ -6497,9 +8150,10 @@ static void sm6_parser_emit_label(struct sm6_parser *sm6, unsigned int label_id) static enum vkd3d_result sm6_function_emit_blocks(const struct sm6_function *function, struct sm6_parser *sm6) { + struct vsir_program *program = sm6->p.program; unsigned int i; - sm6->p.program.block_count = function->block_count; + program->block_count = function->block_count; for (i = 0; i < function->block_count; ++i) { @@ -6515,9 +8169,9 @@ static enum vkd3d_result sm6_function_emit_blocks(const struct sm6_function *fun sm6_parser_emit_label(sm6, block->id); sm6_block_emit_phi(block, sm6); - memcpy(&sm6->p.program.instructions.elements[sm6->p.program.instructions.count], block->instructions, + memcpy(&program->instructions.elements[program->instructions.count], block->instructions, block->instruction_count * sizeof(*block->instructions)); - sm6->p.program.instructions.count += block->instruction_count; + program->instructions.count += block->instruction_count; sm6_block_emit_terminator(block, sm6); } @@ -6800,14 +8454,53 @@ static const enum vkd3d_shader_sysval_semantic sysval_semantic_table[] = { [SEMANTIC_KIND_ARBITRARY] = VKD3D_SHADER_SV_NONE, [SEMANTIC_KIND_VERTEXID] = VKD3D_SHADER_SV_VERTEX_ID, + [SEMANTIC_KIND_INSTANCEID] = VKD3D_SHADER_SV_INSTANCE_ID, [SEMANTIC_KIND_POSITION] = VKD3D_SHADER_SV_POSITION, + [SEMANTIC_KIND_CLIPDISTANCE] = VKD3D_SHADER_SV_CLIP_DISTANCE, + [SEMANTIC_KIND_CULLDISTANCE] = VKD3D_SHADER_SV_CULL_DISTANCE, + [SEMANTIC_KIND_PRIMITIVEID] = VKD3D_SHADER_SV_PRIMITIVE_ID, [SEMANTIC_KIND_ISFRONTFACE] = VKD3D_SHADER_SV_IS_FRONT_FACE, + [SEMANTIC_KIND_COVERAGE] = VKD3D_SHADER_SV_COVERAGE, [SEMANTIC_KIND_TARGET] = VKD3D_SHADER_SV_TARGET, + [SEMANTIC_KIND_DEPTH] = VKD3D_SHADER_SV_DEPTH, + [SEMANTIC_KIND_DEPTHLESSEQUAL] = VKD3D_SHADER_SV_DEPTH_LESS_EQUAL, + [SEMANTIC_KIND_DEPTHGREATEREQUAL] = VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL, }; -static enum vkd3d_shader_sysval_semantic sysval_semantic_from_dxil_semantic_kind(enum dxil_semantic_kind kind) +static enum vkd3d_shader_sysval_semantic sysval_semantic_from_dxil_semantic_kind(enum dxil_semantic_kind kind, + enum vkd3d_tessellator_domain domain) { - if (kind < ARRAY_SIZE(sysval_semantic_table)) + if (kind == SEMANTIC_KIND_TESSFACTOR) + { + switch (domain) + { + case VKD3D_TESSELLATOR_DOMAIN_LINE: + return VKD3D_SHADER_SV_TESS_FACTOR_LINEDET; + case VKD3D_TESSELLATOR_DOMAIN_TRIANGLE: + return VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE; + case VKD3D_TESSELLATOR_DOMAIN_QUAD: + return VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE; + default: + /* Error is handled during parsing. */ + return VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE; + } + } + else if (kind == SEMANTIC_KIND_INSIDETESSFACTOR) + { + switch (domain) + { + case VKD3D_TESSELLATOR_DOMAIN_LINE: + return VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN; + case VKD3D_TESSELLATOR_DOMAIN_TRIANGLE: + return VKD3D_SHADER_SV_TESS_FACTOR_TRIINT; + case VKD3D_TESSELLATOR_DOMAIN_QUAD: + return VKD3D_SHADER_SV_TESS_FACTOR_QUADINT; + default: + /* Error is handled during parsing. */ + return VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE; + } + } + else if (kind < ARRAY_SIZE(sysval_semantic_table)) { return sysval_semantic_table[kind]; } @@ -7448,7 +9141,7 @@ static enum vkd3d_result sm6_parser_descriptor_type_init(struct sm6_parser *sm6, } ++sm6->descriptor_count; - ++sm6->p.program.instructions.count; + ++sm6->p.program->instructions.count; } return VKD3D_OK; @@ -7563,12 +9256,13 @@ static void signature_element_read_additional_element_values(struct signature_el } static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const struct sm6_metadata_value *m, - struct shader_signature *s) + struct shader_signature *s, enum vkd3d_tessellator_domain tessellator_domain) { unsigned int i, j, column_count, operand_count, index; const struct sm6_metadata_node *node, *element_node; struct signature_element *elements, *e; unsigned int values[10]; + bool is_register; if (!m) return VKD3D_OK; @@ -7656,7 +9350,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const e->min_precision = minimum_precision_from_dxil_component_type(values[2]); j = values[3]; - e->sysval_semantic = sysval_semantic_from_dxil_semantic_kind(j); + e->sysval_semantic = sysval_semantic_from_dxil_semantic_kind(j, tessellator_domain); if (j != SEMANTIC_KIND_ARBITRARY && j != SEMANTIC_KIND_TARGET && e->sysval_semantic == VKD3D_SHADER_SV_NONE) { WARN("Unhandled semantic kind %u.\n", j); @@ -7677,7 +9371,18 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const column_count = values[7]; e->register_index = values[8]; e->target_location = e->register_index; - if (e->register_index > MAX_REG_OUTPUT || e->register_count > MAX_REG_OUTPUT - e->register_index) + + if ((is_register = e->register_index == UINT_MAX)) + { + if (register_type_from_dxil_semantic_kind(e->sysval_semantic) == VKD3DSPR_INVALID) + { + WARN("Unhandled I/O register semantic kind %u.\n", j); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, + "DXIL semantic kind %u is unhandled for an I/O register.", j); + return VKD3D_ERROR_INVALID_SHADER; + } + } + else if (e->register_index > MAX_REG_OUTPUT || e->register_count > MAX_REG_OUTPUT - e->register_index) { WARN("Invalid row start %u with row count %u.\n", e->register_index, e->register_count); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, @@ -7685,8 +9390,9 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const e->register_index, e->register_count); return VKD3D_ERROR_INVALID_SHADER; } + index = values[9]; - if (index >= VKD3D_VEC4_SIZE || column_count > VKD3D_VEC4_SIZE - index) + if (index != UINT8_MAX && (index >= VKD3D_VEC4_SIZE || column_count > VKD3D_VEC4_SIZE - index)) { WARN("Invalid column start %u with count %u.\n", index, column_count); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, @@ -7696,10 +9402,17 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const e->mask = vkd3d_write_mask_from_component_count(column_count); e->used_mask = e->mask; - e->mask <<= index; - signature_element_read_additional_element_values(e, element_node, sm6); - e->used_mask <<= index; + + if (index != UINT8_MAX) + { + e->mask <<= index; + e->used_mask <<= index; + } + + /* DXIL reads/writes uint for bool I/O. */ + if (e->component_type == VKD3D_SHADER_COMPONENT_BOOL) + e->component_type = VKD3D_SHADER_COMPONENT_UINT; m = element_node->operands[4]; if (!sm6_metadata_value_is_node(m)) @@ -7732,15 +9445,32 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const } } - vkd3d_free(s->elements); + for (i = 0; i < operand_count; ++i) + { + if ((elements[i].semantic_name = vkd3d_strdup(elements[i].semantic_name))) + continue; + + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, + "Failed to allocate signature element semantic name."); + for (j = 0; j < i; ++j) + { + vkd3d_free((void *)elements[j].semantic_name); + } + vkd3d_free(elements); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + shader_signature_cleanup(s); s->elements = elements; s->element_count = operand_count; return VKD3D_OK; } -static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, const struct sm6_metadata_value *m) +static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, const struct sm6_metadata_value *m, + enum vkd3d_tessellator_domain tessellator_domain) { + struct vsir_program *program = sm6->p.program; enum vkd3d_result ret; if (!sm6_metadata_value_is_node(m)) @@ -7752,19 +9482,24 @@ static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, cons } if (m->u.node->operand_count && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[0], - &sm6->p.shader_desc.input_signature)) < 0) + &program->input_signature, tessellator_domain)) < 0) { return ret; } if (m->u.node->operand_count > 1 && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[1], - &sm6->p.shader_desc.output_signature)) < 0) + &program->output_signature, tessellator_domain)) < 0) + { + return ret; + } + if (m->u.node->operand_count > 1 && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[2], + &program->patch_constant_signature, tessellator_domain)) < 0) { return ret; } - /* TODO: patch constant signature in operand 2. */ - sm6_parser_init_input_signature(sm6, &sm6->p.shader_desc.input_signature); - sm6_parser_init_output_signature(sm6, &sm6->p.shader_desc.output_signature); + sm6_parser_init_input_signature(sm6, &program->input_signature); + sm6_parser_init_output_signature(sm6, &program->output_signature); + sm6_parser_init_patch_constant_signature(sm6, &program->patch_constant_signature); return VKD3D_OK; } @@ -7793,14 +9528,15 @@ static void sm6_parser_emit_global_flags(struct sm6_parser *sm6, const struct sm static enum vkd3d_result sm6_parser_emit_thread_group(struct sm6_parser *sm6, const struct sm6_metadata_value *m) { + struct vkd3d_shader_version *version = &sm6->p.program->shader_version; const struct sm6_metadata_node *node; struct vkd3d_shader_instruction *ins; unsigned int group_sizes[3]; unsigned int i; - if (sm6->p.program.shader_version.type != VKD3D_SHADER_TYPE_COMPUTE) + if (version->type != VKD3D_SHADER_TYPE_COMPUTE) { - WARN("Shader of type %#x has thread group dimensions.\n", sm6->p.program.shader_version.type); + WARN("Shader of type %#x has thread group dimensions.\n", version->type); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, "Shader has thread group dimensions but is not a compute shader."); return VKD3D_ERROR_INVALID_SHADER; @@ -7850,10 +9586,350 @@ static enum vkd3d_result sm6_parser_emit_thread_group(struct sm6_parser *sm6, co return VKD3D_OK; } +static void sm6_parser_emit_dcl_count(struct sm6_parser *sm6, enum vkd3d_shader_opcode handler_idx, unsigned int count) +{ + struct vkd3d_shader_instruction *ins; + + ins = sm6_parser_add_instruction(sm6, handler_idx); + ins->declaration.count = count; +} + +static void sm6_parser_emit_dcl_primitive_topology(struct sm6_parser *sm6, + enum vkd3d_shader_opcode handler_idx, enum vkd3d_primitive_type primitive_type, + unsigned int patch_vertex_count) +{ + struct vkd3d_shader_instruction *ins; + + ins = sm6_parser_add_instruction(sm6, handler_idx); + ins->declaration.primitive_type.type = primitive_type; + ins->declaration.primitive_type.patch_vertex_count = patch_vertex_count; +} + +static void sm6_parser_emit_dcl_tessellator_domain(struct sm6_parser *sm6, + enum vkd3d_tessellator_domain tessellator_domain) +{ + struct vkd3d_shader_instruction *ins; + + if (tessellator_domain == VKD3D_TESSELLATOR_DOMAIN_INVALID || tessellator_domain >= VKD3D_TESSELLATOR_DOMAIN_COUNT) + { + WARN("Unhandled domain %u.\n", tessellator_domain); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, + "Domain shader tessellator domain %u is unhandled.", tessellator_domain); + } + + ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TESSELLATOR_DOMAIN); + ins->declaration.tessellator_domain = tessellator_domain; +} + +static void sm6_parser_validate_control_point_count(struct sm6_parser *sm6, unsigned int count, + const char *type) +{ + if (!count || count > 32) + { + WARN("%s control point count %u invalid.\n", type, count); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, + "%s control point count %u is invalid.", type, count); + } +} + +static void sm6_parser_emit_dcl_tessellator_partitioning(struct sm6_parser *sm6, + enum vkd3d_shader_tessellator_partitioning tessellator_partitioning) +{ + struct vkd3d_shader_instruction *ins; + + if (!tessellator_partitioning || tessellator_partitioning > VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN) + { + WARN("Unhandled partitioning %u.\n", tessellator_partitioning); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, + "Hull shader tessellator partitioning %u is unhandled.", tessellator_partitioning); + } + + ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TESSELLATOR_PARTITIONING); + ins->declaration.tessellator_partitioning = tessellator_partitioning; +} + +static void sm6_parser_emit_dcl_tessellator_output_primitive(struct sm6_parser *sm6, + enum vkd3d_shader_tessellator_output_primitive primitive) +{ + struct vkd3d_shader_instruction *ins; + + if (!primitive || primitive > VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CCW) + { + WARN("Unhandled output primitive %u.\n", primitive); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, + "Hull shader tessellator output primitive %u is unhandled.", primitive); + } + + ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE); + ins->declaration.tessellator_output_primitive = primitive; +} + +static void sm6_parser_emit_dcl_max_tessellation_factor(struct sm6_parser *sm6, struct sm6_metadata_value *m) +{ + struct vkd3d_shader_instruction *ins; + float max_tessellation_factor; + + if (!sm6_metadata_get_float_value(sm6, m, &max_tessellation_factor)) + { + WARN("Max tess factor property is not a float value.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, + "Hull shader max tessellation factor property operand is not a float."); + return; + } + + /* Exclude non-finite values. */ + if (!(max_tessellation_factor >= 1.0f && max_tessellation_factor <= 64.0f)) + { + WARN("Invalid max tess factor %f.\n", max_tessellation_factor); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, + "Hull shader max tessellation factor %f is invalid.", max_tessellation_factor); + } + + ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_HS_MAX_TESSFACTOR); + ins->declaration.max_tessellation_factor = max_tessellation_factor; +} + +static void sm6_parser_gs_properties_init(struct sm6_parser *sm6, const struct sm6_metadata_value *m) +{ + enum vkd3d_primitive_type input_primitive = VKD3D_PT_TRIANGLELIST, output_primitive; + unsigned int i, input_control_point_count = 1, patch_vertex_count = 0; + const struct sm6_metadata_node *node; + unsigned int operands[5] = {0}; + + if (!m || !sm6_metadata_value_is_node(m)) + { + WARN("Missing or invalid GS properties.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, + "Geometry shader properties node is missing or invalid."); + return; + } + + node = m->u.node; + if (node->operand_count < ARRAY_SIZE(operands)) + { + WARN("Invalid operand count %u.\n", node->operand_count); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT, + "Geometry shader properties operand count %u is invalid.", node->operand_count); + return; + } + if (node->operand_count > ARRAY_SIZE(operands)) + { + WARN("Ignoring %zu extra operands.\n", node->operand_count - ARRAY_SIZE(operands)); + vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, + "Ignoring %zu extra operands for geometry shader properties.", + node->operand_count - ARRAY_SIZE(operands)); + } + + for (i = 0; i < node->operand_count; ++i) + { + if (!sm6_metadata_get_uint_value(sm6, node->operands[i], &operands[i])) + { + WARN("GS property at index %u is not a uint value.\n", i); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, + "Geometry shader properties operand at index %u is not an integer.", i); + } + } + + switch (i = operands[0]) + { + case INPUT_PRIMITIVE_POINT: + input_primitive = VKD3D_PT_POINTLIST; + input_control_point_count = 1; + break; + + case INPUT_PRIMITIVE_LINE: + input_primitive = VKD3D_PT_LINELIST; + input_control_point_count = 2; + break; + + case INPUT_PRIMITIVE_TRIANGLE: + input_primitive = VKD3D_PT_TRIANGLELIST; + input_control_point_count = 3; + break; + + case INPUT_PRIMITIVE_LINEWITHADJACENCY: + input_primitive = VKD3D_PT_LINELIST_ADJ; + input_control_point_count = 4; + break; + + case INPUT_PRIMITIVE_TRIANGLEWITHADJACENY: + input_primitive = VKD3D_PT_TRIANGLELIST_ADJ; + input_control_point_count = 6; + break; + + default: + if (i >= INPUT_PRIMITIVE_PATCH1 && i <= INPUT_PRIMITIVE_PATCH32) + { + input_primitive = VKD3D_PT_PATCH; + patch_vertex_count = i - INPUT_PRIMITIVE_PATCH1 + 1; + break; + } + + WARN("Unhandled input primitive %u.\n", i); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, + "Geometry shader input primitive %u is unhandled.", i); + break; + } + + sm6_parser_emit_dcl_primitive_topology(sm6, VKD3DSIH_DCL_INPUT_PRIMITIVE, input_primitive, patch_vertex_count); + sm6->p.program->input_control_point_count = input_control_point_count; + + i = operands[1]; + /* Max total scalar count sets an upper limit. We would need to scan outputs to be more precise. */ + if (i > MAX_GS_OUTPUT_TOTAL_SCALARS) + { + WARN("GS output vertex count %u invalid.\n", i); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, + "Geometry shader output vertex count %u is invalid.", i); + } + sm6_parser_emit_dcl_count(sm6, VKD3DSIH_DCL_VERTICES_OUT, i); + + if (operands[2] > 1) + { + FIXME("Unhandled stream mask %#x.\n", operands[2]); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, + "Geometry shader stream mask %#x is unhandled.", operands[2]); + } + + output_primitive = operands[3]; + if (output_primitive == VKD3D_PT_UNDEFINED || output_primitive >= VKD3D_PT_COUNT) + { + WARN("Unhandled output primitive %u.\n", output_primitive); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, + "Geometry shader output primitive %u is unhandled.", output_primitive); + output_primitive = VKD3D_PT_TRIANGLELIST; + } + sm6_parser_emit_dcl_primitive_topology(sm6, VKD3DSIH_DCL_OUTPUT_TOPOLOGY, output_primitive, 0); + + i = operands[4]; + if (!i || i > MAX_GS_INSTANCE_COUNT) + { + WARN("GS instance count %u invalid.\n", i); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, + "Geometry shader instance count %u is invalid.", i); + } + sm6_parser_emit_dcl_count(sm6, VKD3DSIH_DCL_GS_INSTANCES, i); +} + +static enum vkd3d_tessellator_domain sm6_parser_ds_properties_init(struct sm6_parser *sm6, + const struct sm6_metadata_value *m) +{ + const struct sm6_metadata_node *node; + unsigned int operands[2] = {0}; + unsigned int i; + + if (!m || !sm6_metadata_value_is_node(m)) + { + WARN("Missing or invalid DS properties.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, + "Domain shader properties node is missing or invalid."); + return 0; + } + + node = m->u.node; + if (node->operand_count < ARRAY_SIZE(operands)) + { + WARN("Invalid operand count %u.\n", node->operand_count); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT, + "Domain shader properties operand count %u is invalid.", node->operand_count); + return 0; + } + if (node->operand_count > ARRAY_SIZE(operands)) + { + WARN("Ignoring %zu extra operands.\n", node->operand_count - ARRAY_SIZE(operands)); + vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, + "Ignoring %zu extra operands for domain shader properties.", + node->operand_count - ARRAY_SIZE(operands)); + } + + for (i = 0; i < node->operand_count; ++i) + { + if (!sm6_metadata_get_uint_value(sm6, node->operands[i], &operands[i])) + { + WARN("DS property at index %u is not a uint value.\n", i); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, + "Domain shader properties operand at index %u is not an integer.", i); + } + } + + sm6_parser_emit_dcl_tessellator_domain(sm6, operands[0]); + sm6_parser_validate_control_point_count(sm6, operands[1], "Domain shader input"); + sm6->p.program->input_control_point_count = operands[1]; + + return operands[0]; +} + +static enum vkd3d_tessellator_domain sm6_parser_hs_properties_init(struct sm6_parser *sm6, + const struct sm6_metadata_value *m) +{ + struct vsir_program *program = sm6->p.program; + const struct sm6_metadata_node *node; + unsigned int operands[6] = {0}; + unsigned int i; + + if (!m || !sm6_metadata_value_is_node(m)) + { + WARN("Missing or invalid HS properties.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, + "Hull shader properties node is missing or invalid."); + return 0; + } + + node = m->u.node; + if (node->operand_count < 7) + { + WARN("Invalid operand count %u.\n", node->operand_count); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT, + "Hull shader properties operand count %u is invalid.", node->operand_count); + return 0; + } + if (node->operand_count > 7) + { + WARN("Ignoring %u extra operands.\n", node->operand_count - 7); + vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, + "Ignoring %u extra operands for hull shader properties.", node->operand_count - 7); + } + + m = node->operands[0]; + if (!sm6_metadata_value_is_value(m) || !sm6_value_is_function_dcl(m->u.value)) + { + WARN("Patch constant function node is not a function value.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, + "Hull shader patch constant function node is not a function value."); + } + else + { + sm6->patch_constant_function = m->u.value->u.function.name; + } + + for (i = 1; i < min(node->operand_count, ARRAY_SIZE(operands)); ++i) + { + if (!sm6_metadata_get_uint_value(sm6, node->operands[i], &operands[i])) + { + WARN("HS property at index %u is not a uint value.\n", i); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, + "Hull shader properties operand at index %u is not an integer.", i); + } + } + + sm6_parser_validate_control_point_count(sm6, operands[1], "Hull shader input"); + program->input_control_point_count = operands[1]; + sm6_parser_validate_control_point_count(sm6, operands[2], "Hull shader output"); + sm6_parser_emit_dcl_count(sm6, VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT, operands[2]); + program->output_control_point_count = operands[2]; + sm6_parser_emit_dcl_tessellator_domain(sm6, operands[3]); + sm6_parser_emit_dcl_tessellator_partitioning(sm6, operands[4]); + sm6_parser_emit_dcl_tessellator_output_primitive(sm6, operands[5]); + sm6_parser_emit_dcl_max_tessellation_factor(sm6, node->operands[6]); + + return operands[3]; +} + static enum vkd3d_result sm6_parser_entry_point_init(struct sm6_parser *sm6) { const struct sm6_metadata_value *m = sm6_parser_find_named_metadata(sm6, "dx.entryPoints"); const struct sm6_metadata_node *node, *entry_node = m ? m->u.node : NULL; + enum vkd3d_tessellator_domain tessellator_domain = 0; unsigned int i, operand_count, tag; const struct sm6_value *value; enum vkd3d_result ret; @@ -7892,12 +9968,6 @@ static enum vkd3d_result sm6_parser_entry_point_init(struct sm6_parser *sm6) "Entry point function name %s does not match the name in metadata.", sm6->entry_point); } - if (entry_node->operand_count >= 3 && (m = entry_node->operands[2]) - && (ret = sm6_parser_signatures_init(sm6, m)) < 0) - { - return ret; - } - if (entry_node->operand_count >= 5 && (m = entry_node->operands[4])) { if (!sm6_metadata_value_is_node(m)) @@ -7932,6 +10002,15 @@ static enum vkd3d_result sm6_parser_entry_point_init(struct sm6_parser *sm6) case SHADER_PROPERTIES_FLAGS: sm6_parser_emit_global_flags(sm6, node->operands[i + 1]); break; + case SHADER_PROPERTIES_GEOMETRY: + sm6_parser_gs_properties_init(sm6, node->operands[i + 1]); + break; + case SHADER_PROPERTIES_DOMAIN: + tessellator_domain = sm6_parser_ds_properties_init(sm6, node->operands[i + 1]); + break; + case SHADER_PROPERTIES_HULL: + tessellator_domain = sm6_parser_hs_properties_init(sm6, node->operands[i + 1]); + break; case SHADER_PROPERTIES_COMPUTE: if ((ret = sm6_parser_emit_thread_group(sm6, node->operands[i + 1])) < 0) return ret; @@ -7945,6 +10024,12 @@ static enum vkd3d_result sm6_parser_entry_point_init(struct sm6_parser *sm6) } } + if (entry_node->operand_count >= 3 && (m = entry_node->operands[2]) + && (ret = sm6_parser_signatures_init(sm6, m, tessellator_domain)) < 0) + { + return ret; + } + return VKD3D_OK; } @@ -8049,28 +10134,18 @@ static void sm6_functions_cleanup(struct sm6_function *functions, size_t count) vkd3d_free(functions); } -static void sm6_parser_destroy(struct vkd3d_shader_parser *parser) +static void sm6_parser_cleanup(struct sm6_parser *sm6) { - struct sm6_parser *sm6 = sm6_parser(parser); - dxil_block_destroy(&sm6->root_block); dxil_global_abbrevs_cleanup(sm6->abbrevs, sm6->abbrev_count); - vsir_program_cleanup(&parser->program); sm6_type_table_cleanup(sm6->types, sm6->type_count); sm6_symtab_cleanup(sm6->global_symbols, sm6->global_symbol_count); sm6_functions_cleanup(sm6->functions, sm6->function_count); sm6_parser_metadata_cleanup(sm6); vkd3d_free(sm6->descriptors); vkd3d_free(sm6->values); - free_shader_desc(&parser->shader_desc); - vkd3d_free(sm6); } -static const struct vkd3d_shader_parser_ops sm6_parser_ops = -{ - .parser_destroy = sm6_parser_destroy, -}; - static struct sm6_function *sm6_parser_get_function(const struct sm6_parser *sm6, const char *name) { size_t i; @@ -8080,15 +10155,15 @@ static struct sm6_function *sm6_parser_get_function(const struct sm6_parser *sm6 return NULL; } -static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t *byte_code, size_t byte_code_size, - const char *source_name, struct vkd3d_shader_message_context *message_context) +static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_program *program, const char *source_name, + struct vkd3d_shader_message_context *message_context, struct dxbc_shader_desc *dxbc_desc) { - const struct shader_signature *output_signature = &sm6->p.shader_desc.output_signature; - const struct shader_signature *input_signature = &sm6->p.shader_desc.input_signature; + size_t count, length, function_count, expected_function_count, byte_code_size = dxbc_desc->byte_code_size; + struct shader_signature *patch_constant_signature, *output_signature, *input_signature; const struct vkd3d_shader_location location = {.source_name = source_name}; uint32_t version_token, dxil_version, token_count, magic; + const uint32_t *byte_code = dxbc_desc->byte_code; unsigned int chunk_offset, chunk_size; - size_t count, length, function_count; enum bitcode_block_abbreviation abbr; struct vkd3d_shader_version version; struct dxil_block *block; @@ -8176,11 +10251,20 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t /* Estimate instruction count to avoid reallocation in most shaders. */ count = max(token_count, 400) - 400; - vkd3d_shader_parser_init(&sm6->p, message_context, source_name, &version, &sm6_parser_ops, - (count + (count >> 2)) / 2u + 10); + if (!vsir_program_init(program, &version, (count + (count >> 2)) / 2u + 10)) + return VKD3D_ERROR_OUT_OF_MEMORY; + vkd3d_shader_parser_init(&sm6->p, program, message_context, source_name); sm6->ptr = &sm6->start[1]; sm6->bitpos = 2; + input_signature = &program->input_signature; + output_signature = &program->output_signature; + patch_constant_signature = &program->patch_constant_signature; + *input_signature = dxbc_desc->input_signature; + *output_signature = dxbc_desc->output_signature; + *patch_constant_signature = dxbc_desc->patch_constant_signature; + memset(dxbc_desc, 0, sizeof(*dxbc_desc)); + block = &sm6->root_block; if ((ret = dxil_block_init(block, NULL, sm6)) < 0) { @@ -8192,7 +10276,7 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t "DXIL bitcode chunk has invalid bitcode."); else vkd3d_unreachable(); - return ret; + goto fail; } dxil_global_abbrevs_cleanup(sm6->abbrevs, sm6->abbrev_count); @@ -8225,7 +10309,7 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t "DXIL type table is invalid."); else vkd3d_unreachable(); - return ret; + goto fail; } if ((ret = sm6_parser_symtab_init(sm6)) < 0) @@ -8238,16 +10322,19 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t "DXIL value symbol table is invalid."); else vkd3d_unreachable(); - return ret; + goto fail; } - if (!(sm6->output_params = vsir_program_get_dst_params(&sm6->p.program, output_signature->element_count)) - || !(sm6->input_params = vsir_program_get_dst_params(&sm6->p.program, input_signature->element_count))) + if (!(sm6->output_params = vsir_program_get_dst_params(program, output_signature->element_count)) + || !(sm6->input_params = vsir_program_get_dst_params(program, input_signature->element_count)) + || !(sm6->patch_constant_params = vsir_program_get_dst_params(program, + patch_constant_signature->element_count))) { ERR("Failed to allocate input/output parameters.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, "Out of memory allocating input/output parameters."); - return VKD3D_ERROR_OUT_OF_MEMORY; + ret = VKD3D_ERROR_OUT_OF_MEMORY; + goto fail; } function_count = dxil_block_compute_function_count(&sm6->root_block); @@ -8256,7 +10343,8 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t ERR("Failed to allocate function array.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, "Out of memory allocating DXIL function array."); - return VKD3D_ERROR_OUT_OF_MEMORY; + ret = VKD3D_ERROR_OUT_OF_MEMORY; + goto fail; } if (sm6_parser_compute_max_value_count(sm6, &sm6->root_block, 0) == SIZE_MAX) @@ -8264,27 +10352,31 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t WARN("Value array count overflowed.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, "Overflow occurred in the DXIL module value count."); - return VKD3D_ERROR_INVALID_SHADER; + ret = VKD3D_ERROR_INVALID_SHADER; + goto fail; } if (!(sm6->values = vkd3d_calloc(sm6->value_capacity, sizeof(*sm6->values)))) { ERR("Failed to allocate value array.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, "Out of memory allocating DXIL value array."); - return VKD3D_ERROR_OUT_OF_MEMORY; + ret = VKD3D_ERROR_OUT_OF_MEMORY; + goto fail; } + sm6->function_count = 0; sm6->ssa_next_id = 1; if ((ret = sm6_parser_globals_init(sm6)) < 0) { WARN("Failed to load global declarations.\n"); - return ret; + goto fail; } if (!sm6_parser_allocate_named_metadata(sm6)) { ERR("Failed to allocate named metadata array.\n"); - return VKD3D_ERROR_OUT_OF_MEMORY; + ret = VKD3D_ERROR_OUT_OF_MEMORY; + goto fail; } for (i = 0, j = 0; i < sm6->root_block.child_block_count; ++i) @@ -8298,18 +10390,19 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t FIXME("Too many metadata tables.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_METADATA, "A metadata table count greater than %zu is unsupported.", ARRAY_SIZE(sm6->metadata_tables)); - return VKD3D_ERROR_INVALID_SHADER; + ret = VKD3D_ERROR_INVALID_SHADER; + goto fail; } if ((ret = sm6_parser_metadata_init(sm6, block, &sm6->metadata_tables[j++])) < 0) - return ret; + goto fail; } if ((ret = sm6_parser_entry_point_init(sm6)) < 0) - return ret; + goto fail; if ((ret = sm6_parser_resources_init(sm6)) < 0) - return ret; + goto fail; if ((ret = sm6_parser_module_init(sm6, &sm6->root_block, 0)) < 0) { @@ -8319,92 +10412,124 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t else if (ret == VKD3D_ERROR_INVALID_SHADER) vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, "DXIL module is invalid."); - return ret; + goto fail; } - if (!sm6_parser_require_space(sm6, output_signature->element_count + input_signature->element_count)) + if (!sm6_parser_require_space(sm6, output_signature->element_count + input_signature->element_count + + patch_constant_signature->element_count)) { vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, "Out of memory emitting shader signature declarations."); - return VKD3D_ERROR_OUT_OF_MEMORY; + ret = VKD3D_ERROR_OUT_OF_MEMORY; + goto fail; } - sm6->p.program.ssa_count = sm6->ssa_next_id; + program->ssa_count = sm6->ssa_next_id; if (!(fn = sm6_parser_get_function(sm6, sm6->entry_point))) { WARN("Failed to find entry point %s.\n", sm6->entry_point); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_ENTRY_POINT, "The definition of the entry point function '%s' was not found.", sm6->entry_point); - return VKD3D_ERROR_INVALID_SHADER; + ret = VKD3D_ERROR_INVALID_SHADER; + goto fail; } - assert(sm6->function_count == 1); - if ((ret = sm6_function_emit_blocks(fn, sm6)) < 0) - return ret; + if (version.type == VKD3D_SHADER_TYPE_HULL) + { + sm6_parser_add_instruction(sm6, VKD3DSIH_HS_CONTROL_POINT_PHASE); + + if ((ret = sm6_function_emit_blocks(fn, sm6)) < 0) + goto fail; + + if (!(fn = sm6_parser_get_function(sm6, sm6->patch_constant_function))) + { + WARN("Failed to find patch constant function '%s'.\n", sm6->patch_constant_function); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, + "Failed to find the patch constant function '%s' for a hull shader.", + sm6->patch_constant_function); + ret = VKD3D_ERROR_INVALID_SHADER; + goto fail; + } + + sm6_parser_add_instruction(sm6, VKD3DSIH_HS_FORK_PHASE); + if ((ret = sm6_function_emit_blocks(fn, sm6)) < 0) + goto fail; + + expected_function_count = 2; + } + else + { + if ((ret = sm6_function_emit_blocks(fn, sm6)) < 0) + goto fail; + expected_function_count = 1; + } + + if (sm6->function_count > expected_function_count) + { + FIXME("%zu unhandled functions.\n", sm6->function_count - expected_function_count); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, + "%zu functions were not emitted.", sm6->function_count - expected_function_count); + } dxil_block_destroy(&sm6->root_block); return VKD3D_OK; + +fail: + vsir_program_cleanup(program); + return ret; } -int vkd3d_shader_sm6_parser_create(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) +int dxil_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags, + struct vkd3d_shader_message_context *message_context, struct vsir_program *program) { - struct vkd3d_shader_desc *shader_desc; + struct dxbc_shader_desc dxbc_desc = {0}; + struct sm6_parser sm6 = {0}; uint32_t *byte_code = NULL; - struct sm6_parser *sm6; int ret; ERR("Creating a DXIL parser. This is unsupported; you get to keep all the pieces if it breaks.\n"); - if (!(sm6 = vkd3d_calloc(1, sizeof(*sm6)))) - { - ERR("Failed to allocate parser.\n"); - return VKD3D_ERROR_OUT_OF_MEMORY; - } - - shader_desc = &sm6->p.shader_desc; - shader_desc->is_dxil = true; + dxbc_desc.is_dxil = true; if ((ret = shader_extract_from_dxbc(&compile_info->source, message_context, compile_info->source_name, - shader_desc)) < 0) + &dxbc_desc)) < 0) { WARN("Failed to extract shader, vkd3d result %d.\n", ret); - vkd3d_free(sm6); return ret; } - sm6->p.shader_desc = *shader_desc; - shader_desc = &sm6->p.shader_desc; - - if (((uintptr_t)shader_desc->byte_code & (VKD3D_DXBC_CHUNK_ALIGNMENT - 1))) + if (((uintptr_t)dxbc_desc.byte_code & (VKD3D_DXBC_CHUNK_ALIGNMENT - 1))) { /* LLVM bitcode should be 32-bit aligned, but before dxc v1.7.2207 this was not always the case in the DXBC * container due to missing padding after signature names. Get an aligned copy to prevent unaligned access. */ - if (!(byte_code = vkd3d_malloc(align(shader_desc->byte_code_size, VKD3D_DXBC_CHUNK_ALIGNMENT)))) - ERR("Failed to allocate aligned chunk. Unaligned access will occur.\n"); - else - memcpy(byte_code, shader_desc->byte_code, shader_desc->byte_code_size); + if (!(byte_code = vkd3d_malloc(align(dxbc_desc.byte_code_size, VKD3D_DXBC_CHUNK_ALIGNMENT)))) + { + ERR("Failed to allocate aligned chunk.\n"); + free_dxbc_shader_desc(&dxbc_desc); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + memcpy(byte_code, dxbc_desc.byte_code, dxbc_desc.byte_code_size); + dxbc_desc.byte_code = byte_code; } - ret = sm6_parser_init(sm6, byte_code ? byte_code : shader_desc->byte_code, shader_desc->byte_code_size, - compile_info->source_name, message_context); + ret = sm6_parser_init(&sm6, program, compile_info->source_name, message_context, &dxbc_desc); + free_dxbc_shader_desc(&dxbc_desc); vkd3d_free(byte_code); - if (!sm6->p.failed && ret >= 0) - ret = vsir_validate(&sm6->p); + if (!sm6.p.failed && ret >= 0) + ret = vkd3d_shader_parser_validate(&sm6.p, config_flags); - if (sm6->p.failed && ret >= 0) + if (sm6.p.failed && ret >= 0) ret = VKD3D_ERROR_INVALID_SHADER; + sm6_parser_cleanup(&sm6); if (ret < 0) { - WARN("Failed to initialise shader parser.\n"); - sm6_parser_destroy(&sm6->p); + WARN("Failed to parse shader.\n"); return ret; } - *parser = &sm6->p; - return ret; } diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c index bc70d5220fd..57b4ac24212 100644 --- a/libs/vkd3d/libs/vkd3d-shader/fx.c +++ b/libs/vkd3d/libs/vkd3d-shader/fx.c @@ -61,9 +61,9 @@ struct fx_write_context; struct fx_write_context_ops { uint32_t (*write_string)(const char *string, struct fx_write_context *fx); - uint32_t (*write_type)(const struct hlsl_type *type, struct fx_write_context *fx); void (*write_technique)(struct hlsl_ir_var *var, struct fx_write_context *fx); void (*write_pass)(struct hlsl_ir_var *var, struct fx_write_context *fx); + bool are_child_effects_supported; }; struct fx_write_context @@ -82,10 +82,23 @@ struct fx_write_context uint32_t technique_count; uint32_t group_count; uint32_t buffer_count; + uint32_t shared_buffer_count; uint32_t numeric_variable_count; + uint32_t shared_numeric_variable_count; uint32_t object_variable_count; + uint32_t shared_object_count; + uint32_t shader_count; + uint32_t parameter_count; + uint32_t dsv_count; + uint32_t rtv_count; + uint32_t texture_count; + uint32_t uav_count; + uint32_t sampler_state_count; int status; + bool child_effect; + bool include_empty_buffers; + const struct fx_write_context_ops *ops; }; @@ -97,6 +110,11 @@ static void set_status(struct fx_write_context *fx, int status) fx->status = status; } +static bool has_annotations(const struct hlsl_ir_var *var) +{ + return var->annotations && !list_empty(&var->annotations->vars); +} + static uint32_t write_string(const char *string, struct fx_write_context *fx) { return fx->ops->write_string(string, fx); @@ -104,26 +122,37 @@ static uint32_t write_string(const char *string, struct fx_write_context *fx) static void write_pass(struct hlsl_ir_var *var, struct fx_write_context *fx) { + if (var->state_block_count) + hlsl_fixme(fx->ctx, &var->loc, "Write state block assignments."); + fx->ops->write_pass(var, fx); } +static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx); +static const char * get_fx_4_type_name(const struct hlsl_type *type); + static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context *fx) { + const struct hlsl_type *element_type; struct type_entry *type_entry; unsigned int elements_count; const char *name; + assert(fx->ctx->profile->major_version >= 4); + if (type->class == HLSL_CLASS_ARRAY) { - name = hlsl_get_multiarray_element_type(type)->name; elements_count = hlsl_get_multiarray_size(type); + element_type = hlsl_get_multiarray_element_type(type); } else { - name = type->name; elements_count = 0; + element_type = type; } + name = get_fx_4_type_name(element_type); + LIST_FOR_EACH_ENTRY(type_entry, &fx->types, struct type_entry, entry) { if (strcmp(type_entry->name, name)) @@ -138,7 +167,7 @@ static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context if (!(type_entry = hlsl_alloc(fx->ctx, sizeof(*type_entry)))) return 0; - type_entry->offset = fx->ops->write_type(type, fx); + type_entry->offset = write_fx_4_type(type, fx); type_entry->name = name; type_entry->elements_count = elements_count; @@ -151,6 +180,7 @@ static void fx_write_context_init(struct hlsl_ctx *ctx, const struct fx_write_co struct fx_write_context *fx) { unsigned int version = ctx->profile->major_version; + struct hlsl_ir_var *var; memset(fx, 0, sizeof(*fx)); @@ -174,12 +204,25 @@ static void fx_write_context_init(struct hlsl_ctx *ctx, const struct fx_write_co rb_init(&fx->strings, string_storage_compare); list_init(&fx->types); + + fx->child_effect = fx->ops->are_child_effects_supported && ctx->child_effect; + fx->include_empty_buffers = version == 4 && ctx->include_empty_buffers; + + LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) + { + if (var->storage_modifiers & HLSL_STORAGE_UNIFORM) + { + list_add_tail(&ctx->extern_vars, &var->extern_entry); + var->is_uniform = 1; + } + } + + hlsl_calculate_buffer_offsets(fx->ctx); } static int fx_write_context_cleanup(struct fx_write_context *fx) { struct type_entry *type, *next_type; - int status = fx->status; rb_destroy(&fx->strings, string_storage_destroy, NULL); @@ -189,14 +232,14 @@ static int fx_write_context_cleanup(struct fx_write_context *fx) vkd3d_free(type); } - return status; + return fx->ctx->result; } static bool technique_matches_version(const struct hlsl_ir_var *var, const struct fx_write_context *fx) { const struct hlsl_type *type = var->data_type; - if (type->base_type != HLSL_TYPE_TECHNIQUE) + if (type->class != HLSL_CLASS_TECHNIQUE) return false; return type->e.version >= fx->min_technique_version && type->e.version <= fx->max_technique_version; @@ -266,6 +309,14 @@ static uint32_t get_fx_4_type_size(const struct hlsl_type *type) return type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float) * elements_count; } +static const uint32_t fx_4_numeric_base_type[] = +{ + [HLSL_TYPE_FLOAT] = 1, + [HLSL_TYPE_INT ] = 2, + [HLSL_TYPE_UINT ] = 3, + [HLSL_TYPE_BOOL ] = 4, +}; + static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, struct fx_write_context *fx) { static const unsigned int NUMERIC_BASE_TYPE_SHIFT = 3; @@ -278,13 +329,7 @@ static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, [HLSL_CLASS_VECTOR] = 2, [HLSL_CLASS_MATRIX] = 3, }; - static const uint32_t numeric_base_type[] = - { - [HLSL_TYPE_FLOAT] = 1, - [HLSL_TYPE_INT ] = 2, - [HLSL_TYPE_UINT ] = 3, - [HLSL_TYPE_BOOL ] = 4, - }; + struct hlsl_ctx *ctx = fx->ctx; uint32_t value = 0; switch (type->class) @@ -295,22 +340,20 @@ static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, value |= numeric_type_class[type->class]; break; default: - FIXME("Unexpected type class %u.\n", type->class); - set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); + hlsl_fixme(ctx, &ctx->location, "Not implemented for type class %u.", type->class); return 0; } - switch (type->base_type) + switch (type->e.numeric.type) { case HLSL_TYPE_FLOAT: case HLSL_TYPE_INT: case HLSL_TYPE_UINT: case HLSL_TYPE_BOOL: - value |= (numeric_base_type[type->base_type] << NUMERIC_BASE_TYPE_SHIFT); + value |= (fx_4_numeric_base_type[type->e.numeric.type] << NUMERIC_BASE_TYPE_SHIFT); break; default: - FIXME("Unexpected base type %u.\n", type->base_type); - set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); + hlsl_fixme(ctx, &ctx->location, "Not implemented for base type %u.", type->e.numeric.type); return 0; } @@ -322,20 +365,8 @@ static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, return value; } -static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx) +static const char * get_fx_4_type_name(const struct hlsl_type *type) { - struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; - uint32_t name_offset, offset, size, stride, numeric_desc; - uint32_t elements_count = 0; - const char *name; - static const uint32_t variable_type[] = - { - [HLSL_CLASS_SCALAR] = 1, - [HLSL_CLASS_VECTOR] = 1, - [HLSL_CLASS_MATRIX] = 1, - [HLSL_CLASS_OBJECT] = 2, - [HLSL_CLASS_STRUCT] = 3, - }; static const char * const texture_type_names[] = { [HLSL_SAMPLER_DIM_GENERIC] = "texture", @@ -360,6 +391,41 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co [HLSL_SAMPLER_DIM_STRUCTURED_BUFFER] = "RWStructuredBuffer", }; + switch (type->class) + { + case HLSL_CLASS_SAMPLER: + return "SamplerState"; + + case HLSL_CLASS_TEXTURE: + return texture_type_names[type->sampler_dim]; + + case HLSL_CLASS_UAV: + return uav_type_names[type->sampler_dim]; + + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + return "DepthStencilView"; + + case HLSL_CLASS_RENDER_TARGET_VIEW: + return "RenderTargetView"; + + case HLSL_CLASS_VERTEX_SHADER: + return "VertexShader"; + + case HLSL_CLASS_PIXEL_SHADER: + return "PixelShader"; + + default: + return type->name; + } +} + +static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx) +{ + struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; + uint32_t name_offset, offset, size, stride, numeric_desc; + uint32_t elements_count = 0; + const char *name; + /* Resolve arrays to element type and number of elements. */ if (type->class == HLSL_CLASS_ARRAY) { @@ -367,12 +433,7 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co type = hlsl_get_multiarray_element_type(type); } - if (type->base_type == HLSL_TYPE_TEXTURE) - name = texture_type_names[type->sampler_dim]; - else if (type->base_type == HLSL_TYPE_UAV) - name = uav_type_names[type->sampler_dim]; - else - name = type->name; + name = get_fx_4_type_name(type); name_offset = write_string(name, fx); offset = put_u32_unaligned(buffer, name_offset); @@ -382,11 +443,31 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co case HLSL_CLASS_SCALAR: case HLSL_CLASS_VECTOR: case HLSL_CLASS_MATRIX: - case HLSL_CLASS_OBJECT: - case HLSL_CLASS_STRUCT: - put_u32_unaligned(buffer, variable_type[type->class]); + put_u32_unaligned(buffer, 1); break; - default: + + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_TEXTURE: + case HLSL_CLASS_UAV: + case HLSL_CLASS_VERTEX_SHADER: + put_u32_unaligned(buffer, 2); + break; + + case HLSL_CLASS_STRUCT: + put_u32_unaligned(buffer, 3); + break; + + case HLSL_CLASS_ARRAY: + case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_PASS: + case HLSL_CLASS_TECHNIQUE: + vkd3d_unreachable(); + + case HLSL_CLASS_STRING: + case HLSL_CLASS_VOID: FIXME("Writing type class %u is not implemented.\n", type->class); set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); return 0; @@ -422,13 +503,8 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co put_u32_unaligned(buffer, field_type_offset); } } - else if (type->class == HLSL_CLASS_OBJECT) + else if (type->class == HLSL_CLASS_TEXTURE) { - static const uint32_t object_type[] = - { - [HLSL_TYPE_RENDERTARGETVIEW] = 19, - [HLSL_TYPE_DEPTHSTENCILVIEW] = 20, - }; static const uint32_t texture_type[] = { [HLSL_SAMPLER_DIM_GENERIC] = 9, @@ -442,6 +518,15 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co [HLSL_SAMPLER_DIM_CUBE] = 17, [HLSL_SAMPLER_DIM_CUBEARRAY] = 23, }; + + put_u32_unaligned(buffer, texture_type[type->sampler_dim]); + } + else if (type->class == HLSL_CLASS_SAMPLER) + { + put_u32_unaligned(buffer, 21); + } + else if (type->class == HLSL_CLASS_UAV) + { static const uint32_t uav_type[] = { [HLSL_SAMPLER_DIM_1D] = 31, @@ -453,29 +538,35 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co [HLSL_SAMPLER_DIM_STRUCTURED_BUFFER] = 40, }; - switch (type->base_type) - { - case HLSL_TYPE_DEPTHSTENCILVIEW: - case HLSL_TYPE_RENDERTARGETVIEW: - put_u32_unaligned(buffer, object_type[type->base_type]); - break; - case HLSL_TYPE_TEXTURE: - put_u32_unaligned(buffer, texture_type[type->sampler_dim]); - break; - case HLSL_TYPE_UAV: - put_u32_unaligned(buffer, uav_type[type->sampler_dim]); - break; - default: - FIXME("Object type %u is not supported.\n", type->base_type); - set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); - return 0; - } + put_u32_unaligned(buffer, uav_type[type->sampler_dim]); } - else /* Numeric type */ + else if (type->class == HLSL_CLASS_DEPTH_STENCIL_VIEW) + { + put_u32_unaligned(buffer, 20); + } + else if (type->class == HLSL_CLASS_RENDER_TARGET_VIEW) + { + put_u32_unaligned(buffer, 19); + } + else if (type->class == HLSL_CLASS_PIXEL_SHADER) + { + put_u32_unaligned(buffer, 5); + } + else if (type->class == HLSL_CLASS_VERTEX_SHADER) + { + put_u32_unaligned(buffer, 6); + } + else if (hlsl_is_numeric_type(type)) { numeric_desc = get_fx_4_numeric_type_description(type, fx); put_u32_unaligned(buffer, numeric_desc); } + else + { + FIXME("Type %u is not supported.\n", type->class); + set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); + return 0; + } return offset; } @@ -556,7 +647,7 @@ static void write_groups(struct fx_write_context *fx) { const struct hlsl_type *type = var->data_type; - if (type->base_type == HLSL_TYPE_EFFECT_GROUP) + if (type->class == HLSL_CLASS_EFFECT_GROUP) write_group(var, fx); } } @@ -565,11 +656,71 @@ static uint32_t write_fx_2_string(const char *string, struct fx_write_context *f { struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; const char *s = string ? string : ""; + static const char tail[3]; uint32_t size, offset; size = strlen(s) + 1; offset = put_u32(buffer, size); bytecode_put_bytes(buffer, s, size); + size %= 4; + if (size) + bytecode_put_bytes_unaligned(buffer, tail, 4 - size); + return offset; +} + +static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *name, const struct hlsl_semantic *semantic, + struct fx_write_context *fx) +{ + struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; + uint32_t semantic_offset, offset, elements_count = 0, name_offset; + size_t i; + + /* Resolve arrays to element type and number of elements. */ + if (type->class == HLSL_CLASS_ARRAY) + { + elements_count = hlsl_get_multiarray_size(type); + type = hlsl_get_multiarray_element_type(type); + } + + name_offset = write_string(name, fx); + semantic_offset = write_string(semantic->name, fx); + + offset = put_u32(buffer, hlsl_sm1_base_type(type)); + put_u32(buffer, hlsl_sm1_class(type)); + put_u32(buffer, name_offset); + put_u32(buffer, semantic_offset); + put_u32(buffer, elements_count); + + switch (type->class) + { + case HLSL_CLASS_VECTOR: + put_u32(buffer, type->dimx); + put_u32(buffer, type->dimy); + break; + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_MATRIX: + put_u32(buffer, type->dimy); + put_u32(buffer, type->dimx); + break; + case HLSL_CLASS_STRUCT: + put_u32(buffer, type->e.record.field_count); + break; + default: + ; + } + + if (type->class == HLSL_CLASS_STRUCT) + { + for (i = 0; i < type->e.record.field_count; ++i) + { + const struct hlsl_struct_field *field = &type->e.record.fields[i]; + + /* Validated in check_invalid_object_fields(). */ + assert(hlsl_is_numeric_type(field->type)); + write_fx_2_parameter(field->type, field->name, &field->semantic, fx); + } + } + return offset; } @@ -595,6 +746,161 @@ static void write_fx_2_technique(struct hlsl_ir_var *var, struct fx_write_contex set_u32(buffer, count_offset, count); } +static uint32_t get_fx_2_type_size(const struct hlsl_type *type) +{ + uint32_t size = 0, elements_count; + size_t i; + + if (type->class == HLSL_CLASS_ARRAY) + { + elements_count = hlsl_get_multiarray_size(type); + type = hlsl_get_multiarray_element_type(type); + return get_fx_2_type_size(type) * elements_count; + } + else if (type->class == HLSL_CLASS_STRUCT) + { + for (i = 0; i < type->e.record.field_count; ++i) + { + const struct hlsl_struct_field *field = &type->e.record.fields[i]; + size += get_fx_2_type_size(field->type); + } + + return size; + } + + return type->dimx * type->dimy * sizeof(float); +} + +static uint32_t write_fx_2_initial_value(const struct hlsl_ir_var *var, struct fx_write_context *fx) +{ + struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; + const struct hlsl_type *type = var->data_type; + uint32_t offset, size, elements_count = 1; + + size = get_fx_2_type_size(type); + + if (type->class == HLSL_CLASS_ARRAY) + { + elements_count = hlsl_get_multiarray_size(type); + type = hlsl_get_multiarray_element_type(type); + } + + /* Note that struct fields must all be numeric; + * this was validated in check_invalid_object_fields(). */ + switch (type->class) + { + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: + case HLSL_CLASS_MATRIX: + case HLSL_CLASS_STRUCT: + /* FIXME: write actual initial value */ + offset = put_u32(buffer, 0); + + for (uint32_t i = 1; i < size / sizeof(uint32_t); ++i) + put_u32(buffer, 0); + break; + + default: + /* Objects are given sequential ids. */ + offset = put_u32(buffer, fx->object_variable_count++); + for (uint32_t i = 1; i < elements_count; ++i) + put_u32(buffer, fx->object_variable_count++); + break; + } + + return offset; +} + +static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type *type, + const struct vkd3d_shader_location *loc) +{ + switch (type->class) + { + case HLSL_CLASS_STRUCT: + /* Note that the fields must all be numeric; this was validated in + * check_invalid_object_fields(). */ + return true; + + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: + case HLSL_CLASS_MATRIX: + return true; + + case HLSL_CLASS_ARRAY: + return is_type_supported_fx_2(ctx, type->e.array.type, loc); + + case HLSL_CLASS_TEXTURE: + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + case HLSL_SAMPLER_DIM_2D: + case HLSL_SAMPLER_DIM_3D: + case HLSL_SAMPLER_DIM_CUBE: + case HLSL_SAMPLER_DIM_GENERIC: + return true; + default: + return false; + } + break; + + case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_STRING: + case HLSL_CLASS_VERTEX_SHADER: + hlsl_fixme(ctx, loc, "Write fx 2.0 parameter class %#x.", type->class); + return false; + + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_UAV: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_VOID: + return false; + + case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_PASS: + case HLSL_CLASS_TECHNIQUE: + /* This cannot appear as an extern variable. */ + break; + } + + vkd3d_unreachable(); +} + +static void write_fx_2_parameters(struct fx_write_context *fx) +{ + struct vkd3d_bytecode_buffer *buffer = &fx->structured; + uint32_t desc_offset, value_offset, flags; + struct hlsl_ctx *ctx = fx->ctx; + struct hlsl_ir_var *var; + enum fx_2_parameter_flags + { + IS_SHARED = 0x1, + }; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (!is_type_supported_fx_2(ctx, var->data_type, &var->loc)) + continue; + + desc_offset = write_fx_2_parameter(var->data_type, var->name, &var->semantic, fx); + value_offset = write_fx_2_initial_value(var, fx); + + flags = 0; + if (var->storage_modifiers & HLSL_STORAGE_SHARED) + flags |= IS_SHARED; + + put_u32(buffer, desc_offset); /* Parameter description */ + put_u32(buffer, value_offset); /* Value */ + put_u32(buffer, flags); /* Flags */ + + put_u32(buffer, 0); /* Annotations count */ + if (has_annotations(var)) + hlsl_fixme(ctx, &ctx->location, "Writing annotations for parameters is not implemented."); + + ++fx->parameter_count; + } +} + static const struct fx_write_context_ops fx_2_ops = { .write_string = write_fx_2_string, @@ -604,12 +910,13 @@ static const struct fx_write_context_ops fx_2_ops = static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) { + uint32_t offset, size, technique_count, parameter_count, object_count; struct vkd3d_bytecode_buffer buffer = { 0 }; struct vkd3d_bytecode_buffer *structured; - uint32_t offset, size, technique_count; struct fx_write_context fx; fx_write_context_init(ctx, &fx_2_ops, &fx); + fx.object_variable_count = 1; structured = &fx.structured; /* First entry is always zeroed and skipped. */ @@ -618,12 +925,14 @@ static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) put_u32(&buffer, 0xfeff0901); /* Version. */ offset = put_u32(&buffer, 0); - put_u32(structured, 0); /* Parameter count */ + parameter_count = put_u32(structured, 0); /* Parameter count */ technique_count = put_u32(structured, 0); put_u32(structured, 0); /* Unknown */ - put_u32(structured, 0); /* Object count */ + object_count = put_u32(structured, 0); - /* TODO: parameters */ + write_fx_2_parameters(&fx); + set_u32(structured, parameter_count, fx.parameter_count); + set_u32(structured, object_count, fx.object_variable_count); write_techniques(ctx->globals, &fx); set_u32(structured, technique_count, fx.technique_count); @@ -643,35 +952,39 @@ static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) vkd3d_free(fx.unstructured.data); vkd3d_free(fx.structured.data); - if (!fx.status) + if (!fx.technique_count) + hlsl_error(ctx, &ctx->location, VKD3D_SHADER_ERROR_HLSL_MISSING_TECHNIQUE, "No techniques found."); + + if (fx.status < 0) + ctx->result = fx.status; + + if (!ctx->result) { out->code = buffer.data; out->size = buffer.size; } - if (fx.status < 0) - ctx->result = fx.status; - return fx_write_context_cleanup(&fx); } static const struct fx_write_context_ops fx_4_ops = { .write_string = write_fx_4_string, - .write_type = write_fx_4_type, .write_technique = write_fx_4_technique, .write_pass = write_fx_4_pass, + .are_child_effects_supported = true, }; -static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, struct fx_write_context *fx) +static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, bool shared, struct fx_write_context *fx) { struct vkd3d_bytecode_buffer *buffer = &fx->structured; + uint32_t name_offset, type_offset, value_offset; uint32_t semantic_offset, flags = 0; - uint32_t name_offset, type_offset; enum fx_4_numeric_variable_flags { HAS_EXPLICIT_BIND_POINT = 0x4, }; + struct hlsl_ctx *ctx = fx->ctx; /* Explicit bind point. */ if (var->reg_reservation.reg_type) @@ -686,18 +999,345 @@ static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, struct fx_write semantic_offset = put_u32(buffer, semantic_offset); /* Semantic */ put_u32(buffer, var->buffer_offset); /* Offset in the constant buffer */ - put_u32(buffer, 0); /* FIXME: default value offset */ + value_offset = put_u32(buffer, 0); /* Default value offset */ put_u32(buffer, flags); /* Flags */ - put_u32(buffer, 0); /* Annotations count */ - /* FIXME: write annotations */ + if (shared) + { + fx->shared_numeric_variable_count++; + } + else + { + /* FIXME: write default value */ + set_u32(buffer, value_offset, 0); + + put_u32(buffer, 0); /* Annotations count */ + if (has_annotations(var)) + hlsl_fixme(ctx, &ctx->location, "Writing annotations for numeric variables is not implemented."); + + fx->numeric_variable_count++; + } +} + +struct rhs_named_value +{ + const char *name; + unsigned int value; +}; + +static bool get_fx_4_state_enum_value(const struct rhs_named_value *pairs, + const char *name, unsigned int *value) +{ + while (pairs->name) + { + if (!ascii_strcasecmp(pairs->name, name)) + { + *value = pairs->value; + return true; + } + + pairs++; + } + + return false; +} + +static uint32_t write_fx_4_state_numeric_value(struct hlsl_ir_constant *value, struct fx_write_context *fx) +{ + struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; + struct hlsl_type *data_type = value->node.data_type; + struct hlsl_ctx *ctx = fx->ctx; + uint32_t i, type, offset; + unsigned int count = hlsl_type_component_count(data_type); + + offset = put_u32_unaligned(buffer, count); + + for (i = 0; i < count; ++i) + { + if (hlsl_is_numeric_type(data_type)) + { + switch (data_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + case HLSL_TYPE_BOOL: + type = fx_4_numeric_base_type[data_type->e.numeric.type]; + break; + default: + type = 0; + hlsl_fixme(ctx, &ctx->location, "Unsupported numeric state value type %u.", data_type->e.numeric.type); + } + } + + put_u32_unaligned(buffer, type); + put_u32_unaligned(buffer, value->value.u[i].u); + } + + return offset; +} + +static void write_fx_4_state_assignment(const struct hlsl_ir_var *var, struct hlsl_state_block_entry *entry, + struct fx_write_context *fx) +{ + uint32_t value_offset = 0, assignment_type = 0, rhs_offset; + uint32_t type_offset; + struct vkd3d_bytecode_buffer *buffer = &fx->structured; + struct hlsl_ctx *ctx = fx->ctx; + struct hlsl_ir_node *value = entry->args->node; + + if (entry->lhs_has_index) + hlsl_fixme(ctx, &var->loc, "Unsupported assignment to array element."); + + put_u32(buffer, entry->name_id); + put_u32(buffer, 0); /* TODO: destination index */ + type_offset = put_u32(buffer, 0); + rhs_offset = put_u32(buffer, 0); + + switch (value->type) + { + case HLSL_IR_CONSTANT: + { + struct hlsl_ir_constant *c = hlsl_ir_constant(value); + + value_offset = write_fx_4_state_numeric_value(c, fx); + assignment_type = 1; + break; + } + default: + hlsl_fixme(ctx, &var->loc, "Unsupported assignment type for state %s.", entry->name); + } + + set_u32(buffer, type_offset, assignment_type); + set_u32(buffer, rhs_offset, value_offset); +} + +static bool state_block_contains_state(const char *name, unsigned int start, struct hlsl_state_block *block) +{ + unsigned int i; + + for (i = start; i < block->count; ++i) + { + if (!ascii_strcasecmp(block->entries[i]->name, name)) + return true; + } + + return false; +} + +struct replace_state_context +{ + const struct rhs_named_value *values; + struct hlsl_ir_var *var; +}; + +static bool replace_state_block_constant(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct replace_state_context *replace_context = context; + struct hlsl_ir_stateblock_constant *state_constant; + struct hlsl_ir_node *c; + unsigned int value; + + if (!replace_context->values) + return false; + if (instr->type != HLSL_IR_STATEBLOCK_CONSTANT) + return false; + + state_constant = hlsl_ir_stateblock_constant(instr); + if (!get_fx_4_state_enum_value(replace_context->values, state_constant->name, &value)) + { + hlsl_error(ctx, &replace_context->var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, + "Unrecognized state constant %s.", state_constant->name); + return false; + } + + if (!(c = hlsl_new_uint_constant(ctx, value, &replace_context->var->loc))) + return false; + + list_add_before(&state_constant->node.entry, &c->entry); + hlsl_replace_node(&state_constant->node, c); + + return true; +} + +static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl_state_block_entry *entry, + struct fx_write_context *fx) +{ + static const struct rhs_named_value filter_values[] = + { + { "MIN_MAG_MIP_POINT", 0x00 }, + { "MIN_MAG_POINT_MIP_LINEAR", 0x01 }, + { "MIN_POINT_MAG_LINEAR_MIP_POINT", 0x04 }, + { "MIN_POINT_MAG_MIP_LINEAR", 0x05 }, + { "MIN_LINEAR_MAG_MIP_POINT", 0x10 }, + { "MIN_LINEAR_MAG_POINT_MIP_LINEAR", 0x11 }, + { "MIN_MAG_LINEAR_MIP_POINT", 0x14 }, + { "MIN_MAG_MIP_LINEAR", 0x15 }, + { "ANISOTROPIC", 0x55 }, + { "COMPARISON_MIN_MAG_MIP_POINT", 0x80 }, + { "COMPARISON_MIN_MAG_POINT_MIP_LINEAR", 0x81 }, + { "COMPARISON_MIN_POINT_MAG_LINEAR_MIP_POINT", 0x84 }, + { "COMPARISON_MIN_POINT_MAG_MIP_LINEAR", 0x85 }, + { "COMPARISON_MIN_LINEAR_MAG_MIP_POINT", 0x90 }, + { "COMPARISON_MIN_LINEAR_MAG_POINT_MIP_LINEAR", 0x91 }, + { "COMPARISON_MIN_MAG_LINEAR_MIP_POINT", 0x94 }, + { "COMPARISON_MIN_MAG_MIP_LINEAR", 0x95 }, + { "COMPARISON_ANISOTROPIC", 0xd5 }, + { NULL }, + }; + + static const struct rhs_named_value address_values[] = + { + { "WRAP", 1 }, + { "MIRROR", 2 }, + { "CLAMP", 3 }, + { "BORDER", 4 }, + { "MIRROR_ONCE", 5 }, + { NULL }, + }; + + static const struct rhs_named_value compare_func_values[] = + { + { "NEVER", 1 }, + { "LESS", 2 }, + { "EQUAL", 3 }, + { "LESS_EQUAL", 4 }, + { "GREATER", 5 }, + { "NOT_EQUAL", 6 }, + { "GREATER_EQUAL", 7 }, + { "ALWAYS", 8 }, + { NULL } + }; + + static const struct state + { + const char *name; + enum hlsl_type_class container; + enum hlsl_base_type type; + unsigned int dimx; + uint32_t id; + const struct rhs_named_value *values; + } + states[] = + { + { "Filter", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 45, filter_values }, + { "AddressU", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 46, address_values }, + { "AddressV", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 47, address_values }, + { "AddressW", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 48, address_values }, + { "MipLODBias", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 1, 49 }, + { "MaxAnisotropy", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 50 }, + { "ComparisonFunc", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 51, compare_func_values }, + { "BorderColor", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 4, 52 }, + { "MinLOD", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 1, 53 }, + { "MaxLOD", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 1, 54 }, + /* TODO: "Texture" field */ + }; + const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); + struct replace_state_context replace_context; + struct hlsl_ir_node *node, *cast; + const struct state *state = NULL; + struct hlsl_ctx *ctx = fx->ctx; + struct hlsl_type *state_type; + unsigned int i; + bool progress; + + for (i = 0; i < ARRAY_SIZE(states); ++i) + { + if (type->class == states[i].container + && !ascii_strcasecmp(entry->name, states[i].name)) + { + state = &states[i]; + break; + } + } + + if (!state) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Unrecognized state name %s.", entry->name); + return; + } + + if (entry->args_count != 1) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Unrecognized initializer for the state %s.", + entry->name); + return; + } + + entry->name_id = state->id; + + replace_context.values = state->values; + replace_context.var = var; + + /* Turned named constants to actual constants. */ + hlsl_transform_ir(ctx, replace_state_block_constant, entry->instrs, &replace_context); + + if (state->dimx) + state_type = hlsl_get_vector_type(ctx, state->type, state->dimx); + else + state_type = hlsl_get_scalar_type(ctx, state->type); + + /* Cast to expected property type. */ + node = entry->args->node; + if (!(cast = hlsl_new_cast(ctx, node, state_type, &var->loc))) + return; + list_add_after(&node->entry, &cast->entry); + + hlsl_src_remove(entry->args); + hlsl_src_from_node(entry->args, cast); + + do + { + progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, entry->instrs, NULL); + progress |= hlsl_copy_propagation_execute(ctx, entry->instrs); + } while (progress); +} + +static void write_fx_4_state_object_initializer(struct hlsl_ir_var *var, struct fx_write_context *fx) +{ + uint32_t elements_count = hlsl_get_multiarray_size(var->data_type), i, j; + struct vkd3d_bytecode_buffer *buffer = &fx->structured; + uint32_t count_offset, count; + + for (i = 0; i < elements_count; ++i) + { + struct hlsl_state_block *block; + + count_offset = put_u32(buffer, 0); + + count = 0; + if (var->state_blocks) + { + block = var->state_blocks[i]; + + for (j = 0; j < block->count; ++j) + { + struct hlsl_state_block_entry *entry = block->entries[j]; + + /* Skip if property is reassigned later. This will use the last assignment. */ + if (state_block_contains_state(entry->name, j + 1, block)) + continue; + + /* Resolve special constant names and property names. */ + resolve_fx_4_state_block_values(var, entry, fx); + + write_fx_4_state_assignment(var, entry, fx); + ++count; + } + } + + set_u32(buffer, count_offset, count); + } } static void write_fx_4_object_variable(struct hlsl_ir_var *var, struct fx_write_context *fx) { + const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); + uint32_t elements_count = hlsl_get_multiarray_size(var->data_type); struct vkd3d_bytecode_buffer *buffer = &fx->structured; uint32_t semantic_offset, bind_point = ~0u; - uint32_t name_offset, type_offset; + uint32_t name_offset, type_offset, i; + struct hlsl_ctx *ctx = fx->ctx; if (var->reg_reservation.reg_type) bind_point = var->reg_reservation.reg_index; @@ -712,8 +1352,52 @@ static void write_fx_4_object_variable(struct hlsl_ir_var *var, struct fx_write_ semantic_offset = put_u32(buffer, semantic_offset); /* Semantic */ put_u32(buffer, bind_point); /* Explicit bind point */ + if (fx->child_effect && var->storage_modifiers & HLSL_STORAGE_SHARED) + { + ++fx->shared_object_count; + return; + } + + /* Initializer */ + switch (type->class) + { + case HLSL_CLASS_RENDER_TARGET_VIEW: + fx->rtv_count += elements_count; + break; + case HLSL_CLASS_TEXTURE: + fx->texture_count += elements_count; + break; + case HLSL_CLASS_UAV: + fx->uav_count += elements_count; + break; + + case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_VERTEX_SHADER: + /* FIXME: write shader blobs, once parser support works. */ + for (i = 0; i < elements_count; ++i) + put_u32(buffer, 0); + fx->shader_count += elements_count; + break; + + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + fx->dsv_count += elements_count; + break; + + case HLSL_CLASS_SAMPLER: + write_fx_4_state_object_initializer(var, fx); + fx->sampler_state_count += elements_count; + break; + + default: + hlsl_fixme(ctx, &ctx->location, "Writing initializer for object type %u is not implemented.", + type->e.numeric.type); + } + put_u32(buffer, 0); /* Annotations count */ - /* FIXME: write annotations */ + if (has_annotations(var)) + hlsl_fixme(ctx, &ctx->location, "Writing annotations for object variables is not implemented."); + + ++fx->object_variable_count; } static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx) @@ -729,12 +1413,16 @@ static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx struct hlsl_ctx *ctx = fx->ctx; struct hlsl_ir_var *var; uint32_t count_offset; + bool shared; + + shared = fx->child_effect && b->modifiers & HLSL_STORAGE_SHARED; if (b->reservation.reg_type) bind_point = b->reservation.reg_index; if (b->type == HLSL_BUFFER_TEXTURE) flags |= IS_TBUFFER; - /* FIXME: set 'single' flag for fx_5_0 */ + if (ctx->profile->major_version == 5 && b->modifiers & HLSL_MODIFIER_SINGLE) + flags |= IS_SINGLE; name_offset = write_string(b->name, fx); @@ -744,8 +1432,17 @@ static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx count_offset = put_u32(buffer, 0); put_u32(buffer, bind_point); /* Bind point */ - put_u32(buffer, 0); /* Annotations count */ - /* FIXME: write annotations */ + if (shared) + { + ++fx->shared_buffer_count; + } + else + { + put_u32(buffer, 0); /* Annotations count */ + if (b->annotations) + hlsl_fixme(ctx, &b->loc, "Writing annotations for buffers is not implemented."); + ++fx->buffer_count; + } count = 0; size = 0; @@ -754,73 +1451,76 @@ static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx if (var->buffer != b) continue; - write_fx_4_numeric_variable(var, fx); + write_fx_4_numeric_variable(var, shared, fx); size += get_fx_4_type_size(var->data_type); ++count; } set_u32(buffer, count_offset, count); set_u32(buffer, size_offset, align(size, 16)); - - fx->numeric_variable_count += count; } -static void write_buffers(struct fx_write_context *fx) +static void write_buffers(struct fx_write_context *fx, bool shared) { struct hlsl_buffer *buffer; - struct hlsl_block block; - - hlsl_block_init(&block); - hlsl_prepend_global_uniform_copy(fx->ctx, &block); - hlsl_block_init(&block); - hlsl_calculate_buffer_offsets(fx->ctx); LIST_FOR_EACH_ENTRY(buffer, &fx->ctx->buffers, struct hlsl_buffer, entry) { - if (!buffer->size) + if (!buffer->size && !fx->include_empty_buffers) + continue; + if (!strcmp(buffer->name, "$Params")) + continue; + if (fx->child_effect && (shared != !!(buffer->modifiers & HLSL_STORAGE_SHARED))) continue; write_fx_4_buffer(buffer, fx); - ++fx->buffer_count; } } -static bool is_object_variable(const struct hlsl_ir_var *var) +static bool is_supported_object_variable(const struct hlsl_ctx *ctx, const struct hlsl_ir_var *var) { const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); - if (type->class != HLSL_CLASS_OBJECT) - return false; - - switch (type->base_type) + switch (type->class) { - case HLSL_TYPE_SAMPLER: - case HLSL_TYPE_TEXTURE: - case HLSL_TYPE_UAV: - case HLSL_TYPE_PIXELSHADER: - case HLSL_TYPE_VERTEXSHADER: - case HLSL_TYPE_RENDERTARGETVIEW: + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_TEXTURE: return true; + case HLSL_CLASS_UAV: + if (ctx->profile->major_version < 5) + return false; + if (type->e.resource.rasteriser_ordered) + return false; + return true; + case HLSL_CLASS_VERTEX_SHADER: + return true; + default: return false; } } -static void write_objects(struct fx_write_context *fx) +static void write_objects(struct fx_write_context *fx, bool shared) { + struct hlsl_ctx *ctx = fx->ctx; struct hlsl_ir_var *var; - uint32_t count = 0; - LIST_FOR_EACH_ENTRY(var, &fx->ctx->extern_vars, struct hlsl_ir_var, extern_entry) + if (shared && !fx->child_effect) + return; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - if (!is_object_variable(var)) + if (!is_supported_object_variable(ctx, var)) + continue; + + if (fx->child_effect && (shared != !!(var->storage_modifiers & HLSL_STORAGE_SHARED))) continue; write_fx_4_object_variable(var, fx); - ++count; } - - fx->object_variable_count += count; } static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) @@ -833,10 +1533,10 @@ static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) put_u32(&fx.unstructured, 0); /* Empty string placeholder. */ - write_buffers(&fx); - write_objects(&fx); - /* TODO: shared buffers */ - /* TODO: shared objects */ + write_buffers(&fx, false); + write_objects(&fx, false); + write_buffers(&fx, true); + write_objects(&fx, true); write_techniques(ctx->globals, &fx); @@ -844,20 +1544,20 @@ static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) put_u32(&buffer, fx.buffer_count); /* Buffer count. */ put_u32(&buffer, fx.numeric_variable_count); /* Numeric variable count. */ put_u32(&buffer, fx.object_variable_count); /* Object variable count. */ - put_u32(&buffer, 0); /* Pool buffer count. */ - put_u32(&buffer, 0); /* Pool variable count. */ - put_u32(&buffer, 0); /* Pool object count. */ + put_u32(&buffer, fx.shared_buffer_count); + put_u32(&buffer, fx.shared_numeric_variable_count); + put_u32(&buffer, fx.shared_object_count); put_u32(&buffer, fx.technique_count); size_offset = put_u32(&buffer, 0); /* Unstructured size. */ put_u32(&buffer, 0); /* String count. */ - put_u32(&buffer, 0); /* Texture object count. */ + put_u32(&buffer, fx.texture_count); put_u32(&buffer, 0); /* Depth stencil state count. */ put_u32(&buffer, 0); /* Blend state count. */ put_u32(&buffer, 0); /* Rasterizer state count. */ - put_u32(&buffer, 0); /* Sampler state count. */ - put_u32(&buffer, 0); /* Rendertarget view count. */ - put_u32(&buffer, 0); /* Depth stencil view count. */ - put_u32(&buffer, 0); /* Shader count. */ + put_u32(&buffer, fx.sampler_state_count); + put_u32(&buffer, fx.rtv_count); + put_u32(&buffer, fx.dsv_count); + put_u32(&buffer, fx.shader_count); put_u32(&buffer, 0); /* Inline shader count. */ set_u32(&buffer, size_offset, fx.unstructured.size); @@ -870,15 +1570,15 @@ static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) set_status(&fx, buffer.status); - if (!fx.status) + if (fx.status < 0) + ctx->result = fx.status; + + if (!ctx->result) { out->code = buffer.data; out->size = buffer.size; } - if (fx.status < 0) - ctx->result = fx.status; - return fx_write_context_cleanup(&fx); } @@ -892,8 +1592,8 @@ static int hlsl_fx_5_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) put_u32(&fx.unstructured, 0); /* Empty string placeholder. */ - write_buffers(&fx); - write_objects(&fx); + write_buffers(&fx, false); + write_objects(&fx, false); /* TODO: interface variables */ write_groups(&fx); @@ -902,23 +1602,23 @@ static int hlsl_fx_5_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) put_u32(&buffer, fx.buffer_count); /* Buffer count. */ put_u32(&buffer, fx.numeric_variable_count); /* Numeric variable count. */ put_u32(&buffer, fx.object_variable_count); /* Object variable count. */ - put_u32(&buffer, 0); /* Pool buffer count. */ - put_u32(&buffer, 0); /* Pool variable count. */ - put_u32(&buffer, 0); /* Pool object count. */ + put_u32(&buffer, fx.shared_buffer_count); + put_u32(&buffer, fx.shared_numeric_variable_count); + put_u32(&buffer, fx.shared_object_count); put_u32(&buffer, fx.technique_count); size_offset = put_u32(&buffer, 0); /* Unstructured size. */ put_u32(&buffer, 0); /* String count. */ - put_u32(&buffer, 0); /* Texture object count. */ + put_u32(&buffer, fx.texture_count); put_u32(&buffer, 0); /* Depth stencil state count. */ put_u32(&buffer, 0); /* Blend state count. */ put_u32(&buffer, 0); /* Rasterizer state count. */ - put_u32(&buffer, 0); /* Sampler state count. */ - put_u32(&buffer, 0); /* Rendertarget view count. */ - put_u32(&buffer, 0); /* Depth stencil view count. */ - put_u32(&buffer, 0); /* Shader count. */ + put_u32(&buffer, fx.sampler_state_count); + put_u32(&buffer, fx.rtv_count); + put_u32(&buffer, fx.dsv_count); + put_u32(&buffer, fx.shader_count); put_u32(&buffer, 0); /* Inline shader count. */ put_u32(&buffer, fx.group_count); /* Group count. */ - put_u32(&buffer, 0); /* UAV count. */ + put_u32(&buffer, fx.uav_count); put_u32(&buffer, 0); /* Interface variables count. */ put_u32(&buffer, 0); /* Interface variable element count. */ put_u32(&buffer, 0); /* Class instance elements count. */ @@ -933,15 +1633,15 @@ static int hlsl_fx_5_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) set_status(&fx, buffer.status); - if (!fx.status) + if (fx.status < 0) + ctx->result = fx.status; + + if (!ctx->result) { out->code = buffer.data; out->size = buffer.size; } - if (fx.status < 0) - ctx->result = fx.status; - return fx_write_context_cleanup(&fx); } diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c index bdd03c1e72a..3e482a5fc70 100644 --- a/libs/vkd3d/libs/vkd3d-shader/glsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c @@ -20,29 +20,14 @@ struct vkd3d_glsl_generator { - struct vkd3d_shader_version version; + struct vsir_program *program; struct vkd3d_string_buffer buffer; struct vkd3d_shader_location location; struct vkd3d_shader_message_context *message_context; + unsigned int indent; bool failed; }; -struct vkd3d_glsl_generator *vkd3d_glsl_generator_create(const struct vkd3d_shader_version *version, - struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location) -{ - struct vkd3d_glsl_generator *generator; - - if (!(generator = vkd3d_malloc(sizeof(*generator)))) - return NULL; - - memset(generator, 0, sizeof(*generator)); - generator->version = *version; - vkd3d_string_buffer_init(&generator->buffer); - generator->location = *location; - generator->message_context = message_context; - return generator; -} - static void VKD3D_PRINTF_FUNC(3, 4) vkd3d_glsl_compiler_error( struct vkd3d_glsl_generator *generator, enum vkd3d_shader_error error, const char *fmt, ...) @@ -55,10 +40,23 @@ static void VKD3D_PRINTF_FUNC(3, 4) vkd3d_glsl_compiler_error( generator->failed = true; } +static void shader_glsl_print_indent(struct vkd3d_string_buffer *buffer, unsigned int indent) +{ + vkd3d_string_buffer_printf(buffer, "%*s", 4 * indent, ""); +} + +static void shader_glsl_unhandled(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) +{ + shader_glsl_print_indent(&gen->buffer, gen->indent); + vkd3d_string_buffer_printf(&gen->buffer, "/* */\n", ins->handler_idx); + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled instruction %#x.", ins->handler_idx); +} + static void shader_glsl_ret(struct vkd3d_glsl_generator *generator, const struct vkd3d_shader_instruction *ins) { - const struct vkd3d_shader_version *version = &generator->version; + const struct vkd3d_shader_version *version = &generator->program->shader_version; /* * TODO: Implement in_subroutine @@ -66,6 +64,7 @@ static void shader_glsl_ret(struct vkd3d_glsl_generator *generator, */ if (version->major >= 4) { + shader_glsl_print_indent(&generator->buffer, generator->indent); vkd3d_string_buffer_printf(&generator->buffer, "return;\n"); } } @@ -73,48 +72,57 @@ static void shader_glsl_ret(struct vkd3d_glsl_generator *generator, static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *generator, const struct vkd3d_shader_instruction *instruction) { + generator->location = instruction->location; + switch (instruction->handler_idx) { case VKD3DSIH_DCL_INPUT: case VKD3DSIH_DCL_OUTPUT: case VKD3DSIH_DCL_OUTPUT_SIV: + case VKD3DSIH_NOP: break; case VKD3DSIH_RET: shader_glsl_ret(generator, instruction); break; default: - vkd3d_glsl_compiler_error(generator, - VKD3D_SHADER_ERROR_GLSL_INTERNAL, - "Unhandled instruction %#x", instruction->handler_idx); + shader_glsl_unhandled(generator, instruction); break; } } -int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *generator, - struct vsir_program *program, struct vkd3d_shader_code *out) +static int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *gen, struct vkd3d_shader_code *out) { + const struct vkd3d_shader_instruction_array *instructions = &gen->program->instructions; + struct vkd3d_string_buffer *buffer = &gen->buffer; unsigned int i; void *code; - vkd3d_string_buffer_printf(&generator->buffer, "#version 440\n\n"); - vkd3d_string_buffer_printf(&generator->buffer, "void main()\n{\n"); + ERR("Generating a GLSL shader. This is unsupported; you get to keep all the pieces if it breaks.\n"); - generator->location.column = 0; - for (i = 0; i < program->instructions.count; ++i) + vkd3d_string_buffer_printf(buffer, "#version 440\n\n"); + + vkd3d_string_buffer_printf(buffer, "/* Generated by %s. */\n\n", vkd3d_shader_get_version(NULL, NULL)); + + vkd3d_string_buffer_printf(buffer, "void main()\n{\n"); + + ++gen->indent; + for (i = 0; i < instructions->count; ++i) { - generator->location.line = i + 1; - vkd3d_glsl_handle_instruction(generator, &program->instructions.elements[i]); + vkd3d_glsl_handle_instruction(gen, &instructions->elements[i]); } - if (generator->failed) + vkd3d_string_buffer_printf(buffer, "}\n"); + + if (TRACE_ON()) + vkd3d_string_buffer_trace(buffer); + + if (gen->failed) return VKD3D_ERROR_INVALID_SHADER; - vkd3d_string_buffer_printf(&generator->buffer, "}\n"); - - if ((code = vkd3d_malloc(generator->buffer.buffer_size))) + if ((code = vkd3d_malloc(buffer->buffer_size))) { - memcpy(code, generator->buffer.buffer, generator->buffer.content_size); - out->size = generator->buffer.content_size; + memcpy(code, buffer->buffer, buffer->content_size); + out->size = buffer->content_size; out->code = code; } else return VKD3D_ERROR_OUT_OF_MEMORY; @@ -122,8 +130,33 @@ int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *generator, return VKD3D_OK; } -void vkd3d_glsl_generator_destroy(struct vkd3d_glsl_generator *generator) +static void vkd3d_glsl_generator_cleanup(struct vkd3d_glsl_generator *gen) { - vkd3d_string_buffer_cleanup(&generator->buffer); - vkd3d_free(generator); + vkd3d_string_buffer_cleanup(&gen->buffer); +} + +static void vkd3d_glsl_generator_init(struct vkd3d_glsl_generator *gen, + struct vsir_program *program, struct vkd3d_shader_message_context *message_context) +{ + memset(gen, 0, sizeof(*gen)); + gen->program = program; + vkd3d_string_buffer_init(&gen->buffer); + gen->message_context = message_context; +} + +int glsl_compile(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, + struct vkd3d_shader_message_context *message_context) +{ + struct vkd3d_glsl_generator generator; + int ret; + + if ((ret = vsir_program_normalise(program, config_flags, compile_info, message_context)) < 0) + return ret; + + vkd3d_glsl_generator_init(&generator, program, message_context); + ret = vkd3d_glsl_generator_generate(&generator, out); + vkd3d_glsl_generator_cleanup(&generator); + + return ret; } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c index 538f083df9c..99214fba6de 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c @@ -134,14 +134,43 @@ struct hlsl_ir_var *hlsl_get_var(struct hlsl_scope *scope, const char *name) return hlsl_get_var(scope->upper, name); } -void hlsl_free_var(struct hlsl_ir_var *decl) +static void free_state_block_entry(struct hlsl_state_block_entry *entry) +{ + unsigned int i; + + vkd3d_free(entry->name); + for (i = 0; i < entry->args_count; ++i) + hlsl_src_remove(&entry->args[i]); + vkd3d_free(entry->args); + hlsl_block_cleanup(entry->instrs); + vkd3d_free(entry->instrs); + vkd3d_free(entry); +} + +void hlsl_free_state_block(struct hlsl_state_block *state_block) { unsigned int k; + assert(state_block); + for (k = 0; k < state_block->count; ++k) + free_state_block_entry(state_block->entries[k]); + vkd3d_free(state_block->entries); + vkd3d_free(state_block); +} + +void hlsl_free_var(struct hlsl_ir_var *decl) +{ + unsigned int k, i; + vkd3d_free((void *)decl->name); hlsl_cleanup_semantic(&decl->semantic); for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) vkd3d_free((void *)decl->objects_usage[k]); + + for (i = 0; i < decl->state_block_count; ++i) + hlsl_free_state_block(decl->state_blocks[i]); + vkd3d_free(decl->state_blocks); + vkd3d_free(decl); } @@ -201,50 +230,46 @@ unsigned int hlsl_get_multiarray_size(const struct hlsl_type *type) bool hlsl_type_is_resource(const struct hlsl_type *type) { - if (type->class == HLSL_CLASS_ARRAY) - return hlsl_type_is_resource(type->e.array.type); - - if (type->class == HLSL_CLASS_OBJECT) + switch (type->class) { - switch (type->base_type) - { - case HLSL_TYPE_TEXTURE: - case HLSL_TYPE_SAMPLER: - case HLSL_TYPE_UAV: - return true; - default: - return false; - } + case HLSL_CLASS_ARRAY: + return hlsl_type_is_resource(type->e.array.type); + + case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_TEXTURE: + case HLSL_CLASS_UAV: + return true; + + default: + return false; } - return false; } /* Only intended to be used for derefs (after copies have been lowered to components or vectors) or * resources, since for both their data types span across a single regset. */ static enum hlsl_regset type_get_regset(const struct hlsl_type *type) { - if (hlsl_is_numeric_type(type)) - return HLSL_REGSET_NUMERIC; - - if (type->class == HLSL_CLASS_ARRAY) - return type_get_regset(type->e.array.type); - - if (type->class == HLSL_CLASS_OBJECT) + switch (type->class) { - switch (type->base_type) - { - case HLSL_TYPE_TEXTURE: - return HLSL_REGSET_TEXTURES; + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: + case HLSL_CLASS_MATRIX: + return HLSL_REGSET_NUMERIC; - case HLSL_TYPE_SAMPLER: - return HLSL_REGSET_SAMPLERS; + case HLSL_CLASS_ARRAY: + return type_get_regset(type->e.array.type); - case HLSL_TYPE_UAV: - return HLSL_REGSET_UAVS; + case HLSL_CLASS_SAMPLER: + return HLSL_REGSET_SAMPLERS; - default: - vkd3d_unreachable(); - } + case HLSL_CLASS_TEXTURE: + return HLSL_REGSET_TEXTURES; + + case HLSL_CLASS_UAV: + return HLSL_REGSET_UAVS; + + default: + break; } vkd3d_unreachable(); @@ -330,16 +355,28 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type break; } - case HLSL_CLASS_OBJECT: - { - if (hlsl_type_is_resource(type)) - { - enum hlsl_regset regset = type_get_regset(type); - - type->reg_size[regset] = 1; - } + case HLSL_CLASS_SAMPLER: + type->reg_size[HLSL_REGSET_SAMPLERS] = 1; + break; + + case HLSL_CLASS_TEXTURE: + type->reg_size[HLSL_REGSET_TEXTURES] = 1; + break; + + case HLSL_CLASS_UAV: + type->reg_size[HLSL_REGSET_UAVS] = 1; + break; + + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_PASS: + case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_STRING: + case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_VERTEX_SHADER: + case HLSL_CLASS_VOID: break; - } } } @@ -352,6 +389,25 @@ unsigned int hlsl_type_get_array_element_reg_size(const struct hlsl_type *type, return type->reg_size[regset]; } +static struct hlsl_type *hlsl_new_simple_type(struct hlsl_ctx *ctx, const char *name, enum hlsl_type_class class) +{ + struct hlsl_type *type; + + if (!(type = hlsl_alloc(ctx, sizeof(*type)))) + return NULL; + if (!(type->name = hlsl_strdup(ctx, name))) + { + vkd3d_free(type); + return NULL; + } + type->class = class; + hlsl_type_calculate_reg_size(ctx, type); + + list_add_tail(&ctx->types, &type->entry); + + return type; +} + static struct hlsl_type *hlsl_new_type(struct hlsl_ctx *ctx, const char *name, enum hlsl_type_class type_class, enum hlsl_base_type base_type, unsigned dimx, unsigned dimy) { @@ -365,7 +421,7 @@ static struct hlsl_type *hlsl_new_type(struct hlsl_ctx *ctx, const char *name, e return NULL; } type->class = type_class; - type->base_type = base_type; + type->e.numeric.type = base_type; type->dimx = dimx; type->dimy = dimy; hlsl_type_calculate_reg_size(ctx, type); @@ -377,7 +433,32 @@ static struct hlsl_type *hlsl_new_type(struct hlsl_ctx *ctx, const char *name, e static bool type_is_single_component(const struct hlsl_type *type) { - return type->class == HLSL_CLASS_SCALAR || type->class == HLSL_CLASS_OBJECT; + switch (type->class) + { + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_STRING: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_TEXTURE: + case HLSL_CLASS_UAV: + case HLSL_CLASS_VERTEX_SHADER: + return true; + + case HLSL_CLASS_VECTOR: + case HLSL_CLASS_MATRIX: + case HLSL_CLASS_STRUCT: + case HLSL_CLASS_ARRAY: + return false; + + case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_PASS: + case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_VOID: + break; + } + vkd3d_unreachable(); } /* Given a type and a component index, this function moves one step through the path required to @@ -400,7 +481,7 @@ static unsigned int traverse_path_from_component_index(struct hlsl_ctx *ctx, { case HLSL_CLASS_VECTOR: assert(index < type->dimx); - *type_ptr = hlsl_get_scalar_type(ctx, type->base_type); + *type_ptr = hlsl_get_scalar_type(ctx, type->e.numeric.type); *index_ptr = 0; return index; @@ -410,7 +491,7 @@ static unsigned int traverse_path_from_component_index(struct hlsl_ctx *ctx, bool row_major = hlsl_type_is_row_major(type); assert(index < type->dimx * type->dimy); - *type_ptr = hlsl_get_vector_type(ctx, type->base_type, row_major ? type->dimx : type->dimy); + *type_ptr = hlsl_get_vector_type(ctx, type->e.numeric.type, row_major ? type->dimx : type->dimy); *index_ptr = row_major ? x : y; return row_major ? y : x; } @@ -496,11 +577,21 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty } break; - case HLSL_CLASS_OBJECT: + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_STRING: + case HLSL_CLASS_TEXTURE: + case HLSL_CLASS_UAV: + case HLSL_CLASS_VERTEX_SHADER: assert(idx == 0); break; - default: + case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_PASS: + case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_VOID: vkd3d_unreachable(); } type = next_type; @@ -674,13 +765,13 @@ struct hlsl_type *hlsl_get_element_type_from_path_index(struct hlsl_ctx *ctx, co switch (type->class) { case HLSL_CLASS_VECTOR: - return hlsl_get_scalar_type(ctx, type->base_type); + return hlsl_get_scalar_type(ctx, type->e.numeric.type); case HLSL_CLASS_MATRIX: if (hlsl_type_is_row_major(type)) - return hlsl_get_vector_type(ctx, type->base_type, type->dimx); + return hlsl_get_vector_type(ctx, type->e.numeric.type, type->dimx); else - return hlsl_get_vector_type(ctx, type->base_type, type->dimy); + return hlsl_get_vector_type(ctx, type->e.numeric.type, type->dimy); case HLSL_CLASS_ARRAY: return type->e.array.type; @@ -727,7 +818,6 @@ struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, if (!(type = hlsl_alloc(ctx, sizeof(*type)))) return NULL; type->class = HLSL_CLASS_STRUCT; - type->base_type = HLSL_TYPE_VOID; type->name = name; type->dimy = 1; type->e.record.fields = fields; @@ -746,8 +836,7 @@ struct hlsl_type *hlsl_new_texture_type(struct hlsl_ctx *ctx, enum hlsl_sampler_ if (!(type = hlsl_alloc(ctx, sizeof(*type)))) return NULL; - type->class = HLSL_CLASS_OBJECT; - type->base_type = HLSL_TYPE_TEXTURE; + type->class = HLSL_CLASS_TEXTURE; type->dimx = 4; type->dimy = 1; type->sampler_dim = dim; @@ -765,8 +854,7 @@ struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim if (!(type = hlsl_alloc(ctx, sizeof(*type)))) return NULL; - type->class = HLSL_CLASS_OBJECT; - type->base_type = HLSL_TYPE_UAV; + type->class = HLSL_CLASS_UAV; type->dimx = format->dimx; type->dimy = 1; type->sampler_dim = dim; @@ -784,7 +872,10 @@ static const char * get_case_insensitive_typename(const char *name) "dword", "float", "matrix", + "pixelshader", + "texture", "vector", + "vertexshader", }; unsigned int i; @@ -865,12 +956,24 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type) case HLSL_CLASS_ARRAY: return hlsl_type_component_count(type->e.array.type) * type->e.array.elements_count; - case HLSL_CLASS_OBJECT: + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_STRING: + case HLSL_CLASS_TEXTURE: + case HLSL_CLASS_UAV: + case HLSL_CLASS_VERTEX_SHADER: return 1; - default: - vkd3d_unreachable(); + case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_PASS: + case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_VOID: + break; } + + vkd3d_unreachable(); } bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2) @@ -880,56 +983,73 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 if (t1->class != t2->class) return false; - if (t1->base_type != t2->base_type) - return false; - if (t1->base_type == HLSL_TYPE_SAMPLER || t1->base_type == HLSL_TYPE_TEXTURE - || t1->base_type == HLSL_TYPE_UAV) + + switch (t1->class) { - if (t1->sampler_dim != t2->sampler_dim) - return false; - if ((t1->base_type == HLSL_TYPE_TEXTURE || t1->base_type == HLSL_TYPE_UAV) - && t1->sampler_dim != HLSL_SAMPLER_DIM_GENERIC - && !hlsl_types_are_equal(t1->e.resource.format, t2->e.resource.format)) - return false; - if (t1->base_type == HLSL_TYPE_UAV && t1->e.resource.rasteriser_ordered != t2->e.resource.rasteriser_ordered) - return false; - } - if ((t1->modifiers & HLSL_MODIFIER_ROW_MAJOR) - != (t2->modifiers & HLSL_MODIFIER_ROW_MAJOR)) - return false; - if (t1->dimx != t2->dimx) - return false; - if (t1->dimy != t2->dimy) - return false; - if (t1->class == HLSL_CLASS_STRUCT) - { - size_t i; + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: + case HLSL_CLASS_MATRIX: + if (t1->e.numeric.type != t2->e.numeric.type) + return false; + if ((t1->modifiers & HLSL_MODIFIER_ROW_MAJOR) + != (t2->modifiers & HLSL_MODIFIER_ROW_MAJOR)) + return false; + if (t1->dimx != t2->dimx) + return false; + if (t1->dimy != t2->dimy) + return false; + return true; - if (t1->e.record.field_count != t2->e.record.field_count) - return false; + case HLSL_CLASS_UAV: + if (t1->e.resource.rasteriser_ordered != t2->e.resource.rasteriser_ordered) + return false; + /* fall through */ + case HLSL_CLASS_TEXTURE: + if (t1->sampler_dim != HLSL_SAMPLER_DIM_GENERIC + && !hlsl_types_are_equal(t1->e.resource.format, t2->e.resource.format)) + return false; + /* fall through */ + case HLSL_CLASS_SAMPLER: + if (t1->sampler_dim != t2->sampler_dim) + return false; + return true; - for (i = 0; i < t1->e.record.field_count; ++i) - { - const struct hlsl_struct_field *field1 = &t1->e.record.fields[i]; - const struct hlsl_struct_field *field2 = &t2->e.record.fields[i]; - - if (!hlsl_types_are_equal(field1->type, field2->type)) + case HLSL_CLASS_STRUCT: + if (t1->e.record.field_count != t2->e.record.field_count) return false; - if (strcmp(field1->name, field2->name)) - return false; - } - } - if (t1->class == HLSL_CLASS_ARRAY) - return t1->e.array.elements_count == t2->e.array.elements_count - && hlsl_types_are_equal(t1->e.array.type, t2->e.array.type); - if (t1->class == HLSL_CLASS_OBJECT) - { - if (t1->base_type == HLSL_TYPE_TECHNIQUE && t1->e.version != t2->e.version) - return false; + for (size_t i = 0; i < t1->e.record.field_count; ++i) + { + const struct hlsl_struct_field *field1 = &t1->e.record.fields[i]; + const struct hlsl_struct_field *field2 = &t2->e.record.fields[i]; + + if (!hlsl_types_are_equal(field1->type, field2->type)) + return false; + + if (strcmp(field1->name, field2->name)) + return false; + } + return true; + + case HLSL_CLASS_ARRAY: + return t1->e.array.elements_count == t2->e.array.elements_count + && hlsl_types_are_equal(t1->e.array.type, t2->e.array.type); + + case HLSL_CLASS_TECHNIQUE: + return t1->e.version == t2->e.version; + + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_PASS: + case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_STRING: + case HLSL_CLASS_VERTEX_SHADER: + case HLSL_CLASS_VOID: + return true; } - return true; + vkd3d_unreachable(); } struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, @@ -950,7 +1070,6 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, } } type->class = old->class; - type->base_type = old->base_type; type->dimx = old->dimx; type->dimy = old->dimy; type->modifiers = old->modifiers | modifiers; @@ -962,6 +1081,12 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, switch (old->class) { + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: + case HLSL_CLASS_MATRIX: + type->e.numeric.type = old->e.numeric.type; + break; + case HLSL_CLASS_ARRAY: if (!(type->e.array.type = hlsl_type_clone(ctx, old->e.array.type, default_majority, modifiers))) { @@ -1008,14 +1133,15 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, break; } - case HLSL_CLASS_OBJECT: - if (type->base_type == HLSL_TYPE_TECHNIQUE) - type->e.version = old->e.version; - if (old->base_type == HLSL_TYPE_TEXTURE || old->base_type == HLSL_TYPE_UAV) - { - type->e.resource.format = old->e.resource.format; - type->e.resource.rasteriser_ordered = old->e.resource.rasteriser_ordered; - } + case HLSL_CLASS_UAV: + type->e.resource.rasteriser_ordered = old->e.resource.rasteriser_ordered; + /* fall through */ + case HLSL_CLASS_TEXTURE: + type->e.resource.format = old->e.resource.format; + break; + + case HLSL_CLASS_TECHNIQUE: + type->e.version = old->e.version; break; default: @@ -1346,6 +1472,16 @@ struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_exp return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); } +struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, + struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, struct hlsl_ir_node *arg3) +{ + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {arg1, arg2, arg3}; + + assert(hlsl_types_are_equal(arg1->data_type, arg2->data_type)); + assert(hlsl_types_are_equal(arg1->data_type, arg3->data_type)); + return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); +} + struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc) { @@ -1538,16 +1674,38 @@ struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned if (!(swizzle = hlsl_alloc(ctx, sizeof(*swizzle)))) return NULL; + assert(hlsl_is_numeric_type(val->data_type)); if (components == 1) - type = hlsl_get_scalar_type(ctx, val->data_type->base_type); + type = hlsl_get_scalar_type(ctx, val->data_type->e.numeric.type); else - type = hlsl_get_vector_type(ctx, val->data_type->base_type, components); + type = hlsl_get_vector_type(ctx, val->data_type->e.numeric.type, components); init_node(&swizzle->node, HLSL_IR_SWIZZLE, type, loc); hlsl_src_from_node(&swizzle->val, val); swizzle->swizzle = s; return &swizzle->node; } +struct hlsl_ir_node *hlsl_new_stateblock_constant(struct hlsl_ctx *ctx, const char *name, + struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_stateblock_constant *constant; + struct hlsl_type *type = hlsl_get_scalar_type(ctx, HLSL_TYPE_INT); + + if (!(constant = hlsl_alloc(ctx, sizeof(*constant)))) + return NULL; + + init_node(&constant->node, HLSL_IR_STATEBLOCK_CONSTANT, type, loc); + + if (!(constant->name = hlsl_alloc(ctx, strlen(name) + 1))) + { + vkd3d_free(constant); + return NULL; + } + strcpy(constant->name, name); + + return &constant->node; +} + bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index) { struct hlsl_type *type = index->val.node->data_type; @@ -1557,7 +1715,9 @@ bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index) bool hlsl_index_is_resource_access(struct hlsl_ir_index *index) { - return index->val.node->data_type->class == HLSL_CLASS_OBJECT; + const struct hlsl_type *type = index->val.node->data_type; + + return type->class == HLSL_CLASS_TEXTURE || type->class == HLSL_CLASS_UAV; } bool hlsl_index_chain_has_resource_access(struct hlsl_ir_index *index) @@ -1578,10 +1738,10 @@ struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *v if (!(index = hlsl_alloc(ctx, sizeof(*index)))) return NULL; - if (type->class == HLSL_CLASS_OBJECT) + if (type->class == HLSL_CLASS_TEXTURE || type->class == HLSL_CLASS_UAV) type = type->e.resource.format; else if (type->class == HLSL_CLASS_MATRIX) - type = hlsl_get_vector_type(ctx, type->base_type, type->dimx); + type = hlsl_get_vector_type(ctx, type->e.numeric.type, type->dimx); else type = hlsl_get_element_type_from_path_index(ctx, type, idx); @@ -1868,6 +2028,12 @@ static struct hlsl_ir_node *clone_index(struct hlsl_ctx *ctx, struct clone_instr return dst; } +static struct hlsl_ir_node *clone_stateblock_constant(struct hlsl_ctx *ctx, + struct clone_instr_map *map, struct hlsl_ir_stateblock_constant *constant) +{ + return hlsl_new_stateblock_constant(ctx, constant->name, &constant->node.loc); +} + void hlsl_free_ir_switch_case(struct hlsl_ir_switch_case *c) { hlsl_block_cleanup(&c->body); @@ -1963,6 +2129,9 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, case HLSL_IR_SWIZZLE: return clone_swizzle(ctx, map, hlsl_ir_swizzle(instr)); + + case HLSL_IR_STATEBLOCK_CONSTANT: + return clone_stateblock_constant(ctx, map, hlsl_ir_stateblock_constant(instr)); } vkd3d_unreachable(); @@ -2018,7 +2187,8 @@ struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, } struct hlsl_buffer *hlsl_new_buffer(struct hlsl_ctx *ctx, enum hlsl_buffer_type type, const char *name, - const struct hlsl_reg_reservation *reservation, const struct vkd3d_shader_location *loc) + uint32_t modifiers, const struct hlsl_reg_reservation *reservation, struct hlsl_scope *annotations, + const struct vkd3d_shader_location *loc) { struct hlsl_buffer *buffer; @@ -2026,8 +2196,10 @@ struct hlsl_buffer *hlsl_new_buffer(struct hlsl_ctx *ctx, enum hlsl_buffer_type return NULL; buffer->type = type; buffer->name = name; + buffer->modifiers = modifiers; if (reservation) buffer->reservation = *reservation; + buffer->annotations = annotations; buffer->loc = *loc; list_add_tail(&ctx->buffers, &buffer->entry); return buffer; @@ -2130,6 +2302,19 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru [HLSL_TYPE_BOOL] = "bool", }; + static const char *const dimensions[] = + { + [HLSL_SAMPLER_DIM_1D] = "1D", + [HLSL_SAMPLER_DIM_2D] = "2D", + [HLSL_SAMPLER_DIM_3D] = "3D", + [HLSL_SAMPLER_DIM_CUBE] = "Cube", + [HLSL_SAMPLER_DIM_1DARRAY] = "1DArray", + [HLSL_SAMPLER_DIM_2DARRAY] = "2DArray", + [HLSL_SAMPLER_DIM_2DMS] = "2DMS", + [HLSL_SAMPLER_DIM_2DMSARRAY] = "2DMSArray", + [HLSL_SAMPLER_DIM_CUBEARRAY] = "CubeArray", + }; + if (!(string = hlsl_get_string_buffer(ctx))) return NULL; @@ -2142,18 +2327,18 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru switch (type->class) { case HLSL_CLASS_SCALAR: - assert(type->base_type < ARRAY_SIZE(base_types)); - vkd3d_string_buffer_printf(string, "%s", base_types[type->base_type]); + assert(type->e.numeric.type < ARRAY_SIZE(base_types)); + vkd3d_string_buffer_printf(string, "%s", base_types[type->e.numeric.type]); return string; case HLSL_CLASS_VECTOR: - assert(type->base_type < ARRAY_SIZE(base_types)); - vkd3d_string_buffer_printf(string, "%s%u", base_types[type->base_type], type->dimx); + assert(type->e.numeric.type < ARRAY_SIZE(base_types)); + vkd3d_string_buffer_printf(string, "%s%u", base_types[type->e.numeric.type], type->dimx); return string; case HLSL_CLASS_MATRIX: - assert(type->base_type < ARRAY_SIZE(base_types)); - vkd3d_string_buffer_printf(string, "%s%ux%u", base_types[type->base_type], type->dimy, type->dimx); + assert(type->e.numeric.type < ARRAY_SIZE(base_types)); + vkd3d_string_buffer_printf(string, "%s%ux%u", base_types[type->e.numeric.type], type->dimy, type->dimx); return string; case HLSL_CLASS_ARRAY: @@ -2183,71 +2368,60 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru vkd3d_string_buffer_printf(string, ""); return string; - case HLSL_CLASS_OBJECT: - { - static const char *const dimensions[] = + case HLSL_CLASS_TEXTURE: + if (type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC) { - [HLSL_SAMPLER_DIM_1D] = "1D", - [HLSL_SAMPLER_DIM_2D] = "2D", - [HLSL_SAMPLER_DIM_3D] = "3D", - [HLSL_SAMPLER_DIM_CUBE] = "Cube", - [HLSL_SAMPLER_DIM_1DARRAY] = "1DArray", - [HLSL_SAMPLER_DIM_2DARRAY] = "2DArray", - [HLSL_SAMPLER_DIM_2DMS] = "2DMS", - [HLSL_SAMPLER_DIM_2DMSARRAY] = "2DMSArray", - [HLSL_SAMPLER_DIM_CUBEARRAY] = "CubeArray", - }; - - switch (type->base_type) - { - case HLSL_TYPE_TEXTURE: - if (type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC) - { - vkd3d_string_buffer_printf(string, "Texture"); - return string; - } - - assert(type->e.resource.format->base_type < ARRAY_SIZE(base_types)); - if (type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER) - { - vkd3d_string_buffer_printf(string, "Buffer"); - } - else - { - assert(type->sampler_dim < ARRAY_SIZE(dimensions)); - vkd3d_string_buffer_printf(string, "Texture%s", dimensions[type->sampler_dim]); - } - if ((inner_string = hlsl_type_to_string(ctx, type->e.resource.format))) - { - vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); - hlsl_release_string_buffer(ctx, inner_string); - } - return string; - - case HLSL_TYPE_UAV: - if (type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER) - vkd3d_string_buffer_printf(string, "RWBuffer"); - else if (type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) - vkd3d_string_buffer_printf(string, "RWStructuredBuffer"); - else - vkd3d_string_buffer_printf(string, "RWTexture%s", dimensions[type->sampler_dim]); - if ((inner_string = hlsl_type_to_string(ctx, type->e.resource.format))) - { - vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); - hlsl_release_string_buffer(ctx, inner_string); - } - return string; - - default: - vkd3d_string_buffer_printf(string, ""); - return string; + vkd3d_string_buffer_printf(string, "Texture"); + return string; } - } - default: - vkd3d_string_buffer_printf(string, ""); + assert(hlsl_is_numeric_type(type->e.resource.format)); + assert(type->e.resource.format->e.numeric.type < ARRAY_SIZE(base_types)); + if (type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER) + { + vkd3d_string_buffer_printf(string, "Buffer"); + } + else + { + assert(type->sampler_dim < ARRAY_SIZE(dimensions)); + vkd3d_string_buffer_printf(string, "Texture%s", dimensions[type->sampler_dim]); + } + if ((inner_string = hlsl_type_to_string(ctx, type->e.resource.format))) + { + vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); + hlsl_release_string_buffer(ctx, inner_string); + } return string; + + case HLSL_CLASS_UAV: + if (type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER) + vkd3d_string_buffer_printf(string, "RWBuffer"); + else if (type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + vkd3d_string_buffer_printf(string, "RWStructuredBuffer"); + else + vkd3d_string_buffer_printf(string, "RWTexture%s", dimensions[type->sampler_dim]); + if ((inner_string = hlsl_type_to_string(ctx, type->e.resource.format))) + { + vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); + hlsl_release_string_buffer(ctx, inner_string); + } + return string; + + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_PASS: + case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_STRING: + case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_VERTEX_SHADER: + case HLSL_CLASS_VOID: + break; } + + vkd3d_string_buffer_printf(string, ""); + return string; } struct vkd3d_string_buffer *hlsl_component_to_string(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var, @@ -2525,7 +2699,7 @@ static void dump_ir_constant(struct vkd3d_string_buffer *buffer, const struct hl { const union hlsl_constant_value_component *value = &constant->value.u[x]; - switch (type->base_type) + switch (type->e.numeric.type) { case HLSL_TYPE_BOOL: vkd3d_string_buffer_printf(buffer, "%s ", value->u ? "true" : "false"); @@ -2611,10 +2785,10 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) [HLSL_OP2_MUL] = "*", [HLSL_OP2_NEQUAL] = "!=", [HLSL_OP2_RSHIFT] = ">>", + [HLSL_OP2_SLT] = "slt", [HLSL_OP3_CMP] = "cmp", [HLSL_OP3_DP2ADD] = "dp2add", - [HLSL_OP3_MOVC] = "movc", [HLSL_OP3_TERNARY] = "ternary", }; @@ -2791,6 +2965,12 @@ static void dump_ir_index(struct vkd3d_string_buffer *buffer, const struct hlsl_ vkd3d_string_buffer_printf(buffer, "]"); } +static void dump_ir_stateblock_constant(struct vkd3d_string_buffer *buffer, + const struct hlsl_ir_stateblock_constant *constant) +{ + vkd3d_string_buffer_printf(buffer, "%s", constant->name); +} + static void dump_ir_switch(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_switch *s) { struct hlsl_ir_switch_case *c; @@ -2879,6 +3059,10 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, case HLSL_IR_SWIZZLE: dump_ir_swizzle(buffer, hlsl_ir_swizzle(instr)); break; + + case HLSL_IR_STATEBLOCK_CONSTANT: + dump_ir_stateblock_constant(buffer, hlsl_ir_stateblock_constant(instr)); + break; } } @@ -3051,6 +3235,12 @@ static void free_ir_index(struct hlsl_ir_index *index) vkd3d_free(index); } +static void free_ir_stateblock_constant(struct hlsl_ir_stateblock_constant *constant) +{ + vkd3d_free(constant->name); + vkd3d_free(constant); +} + void hlsl_free_instr(struct hlsl_ir_node *node) { assert(list_empty(&node->uses)); @@ -3108,6 +3298,10 @@ void hlsl_free_instr(struct hlsl_ir_node *node) case HLSL_IR_SWITCH: free_ir_switch(hlsl_ir_switch(node)); break; + + case HLSL_IR_STATEBLOCK_CONSTANT: + free_ir_stateblock_constant(hlsl_ir_stateblock_constant(node)); + break; } } @@ -3273,7 +3467,9 @@ const struct hlsl_profile_info *hlsl_get_target_info(const char *target) {"cs_4_0", VKD3D_SHADER_TYPE_COMPUTE, 4, 0, 0, 0, false}, {"cs_4_1", VKD3D_SHADER_TYPE_COMPUTE, 4, 1, 0, 0, false}, {"cs_5_0", VKD3D_SHADER_TYPE_COMPUTE, 5, 0, 0, 0, false}, + {"cs_5_1", VKD3D_SHADER_TYPE_COMPUTE, 5, 1, 0, 0, false}, {"ds_5_0", VKD3D_SHADER_TYPE_DOMAIN, 5, 0, 0, 0, false}, + {"ds_5_1", VKD3D_SHADER_TYPE_DOMAIN, 5, 1, 0, 0, false}, {"fx_2_0", VKD3D_SHADER_TYPE_EFFECT, 2, 0, 0, 0, false}, {"fx_4_0", VKD3D_SHADER_TYPE_EFFECT, 4, 0, 0, 0, false}, {"fx_4_1", VKD3D_SHADER_TYPE_EFFECT, 4, 1, 0, 0, false}, @@ -3281,7 +3477,9 @@ const struct hlsl_profile_info *hlsl_get_target_info(const char *target) {"gs_4_0", VKD3D_SHADER_TYPE_GEOMETRY, 4, 0, 0, 0, false}, {"gs_4_1", VKD3D_SHADER_TYPE_GEOMETRY, 4, 1, 0, 0, false}, {"gs_5_0", VKD3D_SHADER_TYPE_GEOMETRY, 5, 0, 0, 0, false}, + {"gs_5_1", VKD3D_SHADER_TYPE_GEOMETRY, 5, 1, 0, 0, false}, {"hs_5_0", VKD3D_SHADER_TYPE_HULL, 5, 0, 0, 0, false}, + {"hs_5_1", VKD3D_SHADER_TYPE_HULL, 5, 1, 0, 0, false}, {"ps.1.0", VKD3D_SHADER_TYPE_PIXEL, 1, 0, 0, 0, false}, {"ps.1.1", VKD3D_SHADER_TYPE_PIXEL, 1, 1, 0, 0, false}, {"ps.1.2", VKD3D_SHADER_TYPE_PIXEL, 1, 2, 0, 0, false}, @@ -3309,6 +3507,7 @@ const struct hlsl_profile_info *hlsl_get_target_info(const char *target) {"ps_4_0_level_9_3", VKD3D_SHADER_TYPE_PIXEL, 4, 0, 9, 3, false}, {"ps_4_1", VKD3D_SHADER_TYPE_PIXEL, 4, 1, 0, 0, false}, {"ps_5_0", VKD3D_SHADER_TYPE_PIXEL, 5, 0, 0, 0, false}, + {"ps_5_1", VKD3D_SHADER_TYPE_PIXEL, 5, 1, 0, 0, false}, {"tx_1_0", VKD3D_SHADER_TYPE_TEXTURE, 1, 0, 0, 0, false}, {"vs.1.0", VKD3D_SHADER_TYPE_VERTEX, 1, 0, 0, 0, false}, {"vs.1.1", VKD3D_SHADER_TYPE_VERTEX, 1, 1, 0, 0, false}, @@ -3330,6 +3529,7 @@ const struct hlsl_profile_info *hlsl_get_target_info(const char *target) {"vs_4_0_level_9_3", VKD3D_SHADER_TYPE_VERTEX, 4, 0, 9, 3, false}, {"vs_4_1", VKD3D_SHADER_TYPE_VERTEX, 4, 1, 0, 0, false}, {"vs_5_0", VKD3D_SHADER_TYPE_VERTEX, 5, 0, 0, 0, false}, + {"vs_5_1", VKD3D_SHADER_TYPE_VERTEX, 5, 1, 0, 0, false}, }; for (i = 0; i < ARRAY_SIZE(profiles); ++i) @@ -3351,6 +3551,7 @@ static int compare_function_rb(const void *key, const struct rb_entry *entry) static void declare_predefined_types(struct hlsl_ctx *ctx) { + struct vkd3d_string_buffer *name; unsigned int x, y, bt, i, v; struct hlsl_type *type; @@ -3363,7 +3564,6 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) "uint", "bool", }; - char name[15]; static const char *const variants_float[] = {"min10float", "min16float"}; static const char *const variants_int[] = {"min12int", "min16int"}; @@ -3391,14 +3591,6 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) {"dword", HLSL_CLASS_SCALAR, HLSL_TYPE_UINT, 1, 1}, {"vector", HLSL_CLASS_VECTOR, HLSL_TYPE_FLOAT, 4, 1}, {"matrix", HLSL_CLASS_MATRIX, HLSL_TYPE_FLOAT, 4, 4}, - {"fxgroup", HLSL_CLASS_OBJECT, HLSL_TYPE_EFFECT_GROUP, 1, 1}, - {"pass", HLSL_CLASS_OBJECT, HLSL_TYPE_PASS, 1, 1}, - {"STRING", HLSL_CLASS_OBJECT, HLSL_TYPE_STRING, 1, 1}, - {"TEXTURE", HLSL_CLASS_OBJECT, HLSL_TYPE_TEXTURE, 1, 1}, - {"PIXELSHADER", HLSL_CLASS_OBJECT, HLSL_TYPE_PIXELSHADER, 1, 1}, - {"VERTEXSHADER", HLSL_CLASS_OBJECT, HLSL_TYPE_VERTEXSHADER, 1, 1}, - {"RenderTargetView",HLSL_CLASS_OBJECT, HLSL_TYPE_RENDERTARGETVIEW, 1, 1}, - {"DepthStencilView",HLSL_CLASS_OBJECT, HLSL_TYPE_DEPTHSTENCILVIEW, 1, 1}, }; static const struct @@ -3413,28 +3605,34 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) {"technique11", 11}, }; + if (!(name = hlsl_get_string_buffer(ctx))) + return; + for (bt = 0; bt <= HLSL_TYPE_LAST_SCALAR; ++bt) { for (y = 1; y <= 4; ++y) { for (x = 1; x <= 4; ++x) { - sprintf(name, "%s%ux%u", names[bt], y, x); - type = hlsl_new_type(ctx, name, HLSL_CLASS_MATRIX, bt, x, y); + vkd3d_string_buffer_clear(name); + vkd3d_string_buffer_printf(name, "%s%ux%u", names[bt], y, x); + type = hlsl_new_type(ctx, name->buffer, HLSL_CLASS_MATRIX, bt, x, y); hlsl_scope_add_type(ctx->globals, type); ctx->builtin_types.matrix[bt][x - 1][y - 1] = type; if (y == 1) { - sprintf(name, "%s%u", names[bt], x); - type = hlsl_new_type(ctx, name, HLSL_CLASS_VECTOR, bt, x, y); + vkd3d_string_buffer_clear(name); + vkd3d_string_buffer_printf(name, "%s%u", names[bt], x); + type = hlsl_new_type(ctx, name->buffer, HLSL_CLASS_VECTOR, bt, x, y); hlsl_scope_add_type(ctx->globals, type); ctx->builtin_types.vector[bt][x - 1] = type; if (x == 1) { - sprintf(name, "%s", names[bt]); - type = hlsl_new_type(ctx, name, HLSL_CLASS_SCALAR, bt, x, y); + vkd3d_string_buffer_clear(name); + vkd3d_string_buffer_printf(name, "%s", names[bt]); + type = hlsl_new_type(ctx, name->buffer, HLSL_CLASS_SCALAR, bt, x, y); hlsl_scope_add_type(ctx->globals, type); ctx->builtin_types.scalar[bt] = type; } @@ -3477,22 +3675,25 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) { for (x = 1; x <= 4; ++x) { - sprintf(name, "%s%ux%u", variants[v], y, x); - type = hlsl_new_type(ctx, name, HLSL_CLASS_MATRIX, bt, x, y); + vkd3d_string_buffer_clear(name); + vkd3d_string_buffer_printf(name, "%s%ux%u", variants[v], y, x); + type = hlsl_new_type(ctx, name->buffer, HLSL_CLASS_MATRIX, bt, x, y); type->is_minimum_precision = 1; hlsl_scope_add_type(ctx->globals, type); if (y == 1) { - sprintf(name, "%s%u", variants[v], x); - type = hlsl_new_type(ctx, name, HLSL_CLASS_VECTOR, bt, x, y); + vkd3d_string_buffer_clear(name); + vkd3d_string_buffer_printf(name, "%s%u", variants[v], x); + type = hlsl_new_type(ctx, name->buffer, HLSL_CLASS_VECTOR, bt, x, y); type->is_minimum_precision = 1; hlsl_scope_add_type(ctx->globals, type); if (x == 1) { - sprintf(name, "%s", variants[v]); - type = hlsl_new_type(ctx, name, HLSL_CLASS_SCALAR, bt, x, y); + vkd3d_string_buffer_clear(name); + vkd3d_string_buffer_printf(name, "%s", variants[v]); + type = hlsl_new_type(ctx, name->buffer, HLSL_CLASS_SCALAR, bt, x, y); type->is_minimum_precision = 1; hlsl_scope_add_type(ctx->globals, type); } @@ -3504,12 +3705,20 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) for (bt = 0; bt <= HLSL_SAMPLER_DIM_LAST_SAMPLER; ++bt) { - type = hlsl_new_type(ctx, sampler_names[bt], HLSL_CLASS_OBJECT, HLSL_TYPE_SAMPLER, 1, 1); + type = hlsl_new_simple_type(ctx, sampler_names[bt], HLSL_CLASS_SAMPLER); type->sampler_dim = bt; ctx->builtin_types.sampler[bt] = type; } - ctx->builtin_types.Void = hlsl_new_type(ctx, "void", HLSL_CLASS_OBJECT, HLSL_TYPE_VOID, 1, 1); + ctx->builtin_types.Void = hlsl_new_simple_type(ctx, "void", HLSL_CLASS_VOID); + hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "DepthStencilView", HLSL_CLASS_DEPTH_STENCIL_VIEW)); + hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "fxgroup", HLSL_CLASS_EFFECT_GROUP)); + hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "pass", HLSL_CLASS_PASS)); + hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "pixelshader", HLSL_CLASS_PIXEL_SHADER)); + hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "RenderTargetView", HLSL_CLASS_RENDER_TARGET_VIEW)); + hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "STRING", HLSL_CLASS_STRING)); + hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "texture", HLSL_CLASS_TEXTURE)); + hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "vertexshader", HLSL_CLASS_VERTEX_SHADER)); for (i = 0; i < ARRAY_SIZE(effect_types); ++i) { @@ -3520,10 +3729,12 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) for (i = 0; i < ARRAY_SIZE(technique_types); ++i) { - type = hlsl_new_type(ctx, technique_types[i].name, HLSL_CLASS_OBJECT, HLSL_TYPE_TECHNIQUE, 1, 1); + type = hlsl_new_simple_type(ctx, technique_types[i].name, HLSL_CLASS_TECHNIQUE); type->e.version = technique_types[i].version; hlsl_scope_add_type(ctx->globals, type); } + + hlsl_release_string_buffer(ctx, name); } static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const struct vkd3d_shader_compile_info *compile_info, @@ -3571,27 +3782,46 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const struct vkd3d_shader_compil list_init(&ctx->buffers); if (!(ctx->globals_buffer = hlsl_new_buffer(ctx, HLSL_BUFFER_CONSTANT, - hlsl_strdup(ctx, "$Globals"), NULL, &ctx->location))) + hlsl_strdup(ctx, "$Globals"), 0, NULL, NULL, &ctx->location))) return false; if (!(ctx->params_buffer = hlsl_new_buffer(ctx, HLSL_BUFFER_CONSTANT, - hlsl_strdup(ctx, "$Params"), NULL, &ctx->location))) + hlsl_strdup(ctx, "$Params"), 0, NULL, NULL, &ctx->location))) return false; ctx->cur_buffer = ctx->globals_buffer; + ctx->warn_implicit_truncation = true; + for (i = 0; i < compile_info->option_count; ++i) { const struct vkd3d_shader_compile_option *option = &compile_info->options[i]; - if (option->name == VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ORDER) + switch (option->name) { - if (option->value == VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ROW_MAJOR) - ctx->matrix_majority = HLSL_MODIFIER_ROW_MAJOR; - else if (option->value == VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_COLUMN_MAJOR) - ctx->matrix_majority = HLSL_MODIFIER_COLUMN_MAJOR; - } - else if (option->name == VKD3D_SHADER_COMPILE_OPTION_BACKWARD_COMPATIBILITY) - { - ctx->semantic_compat_mapping = option->value & VKD3D_SHADER_COMPILE_OPTION_BACKCOMPAT_MAP_SEMANTIC_NAMES; + case VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ORDER: + if (option->value == VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ROW_MAJOR) + ctx->matrix_majority = HLSL_MODIFIER_ROW_MAJOR; + else if (option->value == VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_COLUMN_MAJOR) + ctx->matrix_majority = HLSL_MODIFIER_COLUMN_MAJOR; + break; + + case VKD3D_SHADER_COMPILE_OPTION_BACKWARD_COMPATIBILITY: + ctx->semantic_compat_mapping = option->value & VKD3D_SHADER_COMPILE_OPTION_BACKCOMPAT_MAP_SEMANTIC_NAMES; + break; + + case VKD3D_SHADER_COMPILE_OPTION_CHILD_EFFECT: + ctx->child_effect = option->value; + break; + + case VKD3D_SHADER_COMPILE_OPTION_WARN_IMPLICIT_TRUNCATION: + ctx->warn_implicit_truncation = option->value; + break; + + case VKD3D_SHADER_COMPILE_OPTION_INCLUDE_EMPTY_BUFFERS_IN_EFFECTS: + ctx->include_empty_buffers = option->value; + break; + + default: + break; } } @@ -3615,6 +3845,21 @@ static void hlsl_ctx_cleanup(struct hlsl_ctx *ctx) rb_destroy(&ctx->functions, free_function_rb, NULL); + /* State blocks must be free before the variables, because they contain instructions that may + * refer to them. */ + LIST_FOR_EACH_ENTRY_SAFE(scope, next_scope, &ctx->scopes, struct hlsl_scope, entry) + { + LIST_FOR_EACH_ENTRY_SAFE(var, next_var, &scope->vars, struct hlsl_ir_var, scope_entry) + { + for (i = 0; i < var->state_block_count; ++i) + hlsl_free_state_block(var->state_blocks[i]); + vkd3d_free(var->state_blocks); + var->state_blocks = NULL; + var->state_block_count = 0; + var->state_block_capacity = 0; + } + } + LIST_FOR_EACH_ENTRY_SAFE(scope, next_scope, &ctx->scopes, struct hlsl_scope, entry) { LIST_FOR_EACH_ENTRY_SAFE(var, next_var, &scope->vars, struct hlsl_ir_var, scope_entry) @@ -3638,6 +3883,7 @@ static void hlsl_ctx_cleanup(struct hlsl_ctx *ctx) int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) { + enum vkd3d_shader_target_type target_type = compile_info->target_type; const struct vkd3d_shader_hlsl_source_info *hlsl_source_info; struct hlsl_ir_function_decl *decl, *entry_func = NULL; const struct hlsl_profile_info *profile; @@ -3659,25 +3905,25 @@ int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d return VKD3D_ERROR_NOT_IMPLEMENTED; } - if (compile_info->target_type != VKD3D_SHADER_TARGET_FX && profile->type == VKD3D_SHADER_TYPE_EFFECT) + if (target_type != VKD3D_SHADER_TARGET_FX && profile->type == VKD3D_SHADER_TYPE_EFFECT) { vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, "The '%s' target profile is only compatible with the 'fx' target type.", profile->name); return VKD3D_ERROR_INVALID_ARGUMENT; } - else if (compile_info->target_type == VKD3D_SHADER_TARGET_D3D_BYTECODE && profile->major_version > 3) + else if (target_type == VKD3D_SHADER_TARGET_D3D_BYTECODE && profile->major_version > 3) { vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, "The '%s' target profile is incompatible with the 'd3dbc' target type.", profile->name); return VKD3D_ERROR_INVALID_ARGUMENT; } - else if (compile_info->target_type == VKD3D_SHADER_TARGET_DXBC_TPF && profile->major_version < 4) + else if (target_type == VKD3D_SHADER_TARGET_DXBC_TPF && profile->major_version < 4) { vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, "The '%s' target profile is incompatible with the 'dxbc-tpf' target type.", profile->name); return VKD3D_ERROR_INVALID_ARGUMENT; } - else if (compile_info->target_type == VKD3D_SHADER_TARGET_FX && profile->type != VKD3D_SHADER_TYPE_EFFECT) + else if (target_type == VKD3D_SHADER_TARGET_FX && profile->type != VKD3D_SHADER_TYPE_EFFECT) { vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, "The '%s' target profile is incompatible with the 'fx' target type.", profile->name); @@ -3741,8 +3987,41 @@ int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d return VKD3D_ERROR_INVALID_SHADER; } - ret = hlsl_emit_bytecode(&ctx, entry_func, compile_info->target_type, out); + if (target_type == VKD3D_SHADER_TARGET_SPIRV_BINARY + || target_type == VKD3D_SHADER_TARGET_SPIRV_TEXT + || target_type == VKD3D_SHADER_TARGET_D3D_ASM) + { + uint64_t config_flags = vkd3d_shader_init_config_flags(); + struct vkd3d_shader_compile_info info = *compile_info; + struct vsir_program program; + if (profile->major_version < 4) + { + if ((ret = hlsl_emit_bytecode(&ctx, entry_func, VKD3D_SHADER_TARGET_D3D_BYTECODE, &info.source)) < 0) + goto done; + info.source_type = VKD3D_SHADER_SOURCE_D3D_BYTECODE; + ret = d3dbc_parse(&info, config_flags, message_context, &program); + } + else + { + if ((ret = hlsl_emit_bytecode(&ctx, entry_func, VKD3D_SHADER_TARGET_DXBC_TPF, &info.source)) < 0) + goto done; + info.source_type = VKD3D_SHADER_SOURCE_DXBC_TPF; + ret = tpf_parse(&info, config_flags, message_context, &program); + } + if (ret >= 0) + { + ret = vsir_program_compile(&program, config_flags, &info, out, message_context); + vsir_program_cleanup(&program); + } + vkd3d_shader_free_shader_code(&info.source); + } + else + { + ret = hlsl_emit_bytecode(&ctx, entry_func, target_type, out); + } + +done: hlsl_ctx_cleanup(&ctx); return ret; } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h index df0a53b20de..27814f3a56f 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h @@ -78,7 +78,18 @@ enum hlsl_type_class HLSL_CLASS_LAST_NUMERIC = HLSL_CLASS_MATRIX, HLSL_CLASS_STRUCT, HLSL_CLASS_ARRAY, - HLSL_CLASS_OBJECT, + HLSL_CLASS_DEPTH_STENCIL_VIEW, + HLSL_CLASS_EFFECT_GROUP, + HLSL_CLASS_PASS, + HLSL_CLASS_PIXEL_SHADER, + HLSL_CLASS_RENDER_TARGET_VIEW, + HLSL_CLASS_SAMPLER, + HLSL_CLASS_STRING, + HLSL_CLASS_TECHNIQUE, + HLSL_CLASS_TEXTURE, + HLSL_CLASS_UAV, + HLSL_CLASS_VERTEX_SHADER, + HLSL_CLASS_VOID, }; enum hlsl_base_type @@ -90,18 +101,6 @@ enum hlsl_base_type HLSL_TYPE_UINT, HLSL_TYPE_BOOL, HLSL_TYPE_LAST_SCALAR = HLSL_TYPE_BOOL, - HLSL_TYPE_SAMPLER, - HLSL_TYPE_TEXTURE, - HLSL_TYPE_UAV, - HLSL_TYPE_PIXELSHADER, - HLSL_TYPE_VERTEXSHADER, - HLSL_TYPE_PASS, - HLSL_TYPE_RENDERTARGETVIEW, - HLSL_TYPE_DEPTHSTENCILVIEW, - HLSL_TYPE_TECHNIQUE, - HLSL_TYPE_EFFECT_GROUP, - HLSL_TYPE_STRING, - HLSL_TYPE_VOID, }; enum hlsl_sampler_dim @@ -143,17 +142,11 @@ struct hlsl_type struct rb_entry scope_entry; enum hlsl_type_class class; - /* If class is <= HLSL_CLASS_LAST_NUMERIC, then base_type is <= HLSL_TYPE_LAST_SCALAR. - * If class is HLSL_CLASS_OBJECT, then base_type is > HLSL_TYPE_LAST_SCALAR. - * If class is HLSL_CLASS_OBJECT and base_type is HLSL_TYPE_TECHNIQUE, additional version - * field is used to distinguish between technique types. - * Otherwise, base_type is not used. */ - enum hlsl_base_type base_type; - /* If base_type is HLSL_TYPE_SAMPLER, then sampler_dim is <= HLSL_SAMPLER_DIM_LAST_SAMPLER. - * If base_type is HLSL_TYPE_TEXTURE, then sampler_dim can be any value of the enum except + /* If class is HLSL_CLASS_SAMPLER, then sampler_dim is <= HLSL_SAMPLER_DIM_LAST_SAMPLER. + * If class is HLSL_CLASS_TEXTURE, then sampler_dim can be any value of the enum except * HLSL_SAMPLER_DIM_GENERIC and HLSL_SAMPLER_DIM_COMPARISON. - * If base_type is HLSL_TYPE_UAV, then sampler_dim must be one of HLSL_SAMPLER_DIM_1D, + * If class is HLSL_CLASS_UAV, then sampler_dim must be one of HLSL_SAMPLER_DIM_1D, * HLSL_SAMPLER_DIM_2D, HLSL_SAMPLER_DIM_3D, HLSL_SAMPLER_DIM_1DARRAY, HLSL_SAMPLER_DIM_2DARRAY, * HLSL_SAMPLER_DIM_BUFFER, or HLSL_SAMPLER_DIM_STRUCTURED_BUFFER. * Otherwise, sampler_dim is not used */ @@ -171,11 +164,7 @@ struct hlsl_type * If type is HLSL_CLASS_MATRIX, then dimx is the number of columns, and dimy the number of rows. * If type is HLSL_CLASS_ARRAY, then dimx and dimy have the same value as in the type of the array elements. * If type is HLSL_CLASS_STRUCT, then dimx is the sum of (dimx * dimy) of every component, and dimy = 1. - * If type is HLSL_CLASS_OBJECT, dimx and dimy depend on the base_type: - * If base_type is HLSL_TYPE_SAMPLER, then both dimx = 1 and dimy = 1. - * If base_type is HLSL_TYPE_TEXTURE, then dimx = 4 and dimy = 1. - * If base_type is HLSL_TYPE_UAV, then dimx is the dimx of e.resource_format, and dimy = 1. - * Otherwise both dimx = 1 and dimy = 1. */ + */ unsigned int dimx; unsigned int dimy; /* Sample count for HLSL_SAMPLER_DIM_2DMS or HLSL_SAMPLER_DIM_2DMSARRAY. */ @@ -183,6 +172,11 @@ struct hlsl_type union { + /* Additional information if type is numeric. */ + struct + { + enum hlsl_base_type type; + } numeric; /* Additional information if type is HLSL_CLASS_STRUCT. */ struct { @@ -196,8 +190,8 @@ struct hlsl_type /* Array length, or HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT if it is not known yet at parse time. */ unsigned int elements_count; } array; - /* Additional information if the base_type is HLSL_TYPE_TEXTURE or - * HLSL_TYPE_UAV. */ + /* Additional information if the class is HLSL_CLASS_TEXTURE or + * HLSL_CLASS_UAV. */ struct { /* Format of the data contained within the type. */ @@ -298,6 +292,7 @@ enum hlsl_ir_node_type HLSL_IR_STORE, HLSL_IR_SWIZZLE, HLSL_IR_SWITCH, + HLSL_IR_STATEBLOCK_CONSTANT, }; /* Common data for every type of IR instruction node. */ @@ -374,6 +369,8 @@ struct hlsl_attribute #define HLSL_STORAGE_CENTROID 0x00004000 #define HLSL_STORAGE_NOPERSPECTIVE 0x00008000 #define HLSL_STORAGE_LINEAR 0x00010000 +#define HLSL_MODIFIER_SINGLE 0x00020000 +#define HLSL_MODIFIER_EXPORT 0x00040000 #define HLSL_TYPE_MODIFIERS_MASK (HLSL_MODIFIER_PRECISE | HLSL_MODIFIER_VOLATILE | \ HLSL_MODIFIER_CONST | HLSL_MODIFIER_ROW_MAJOR | \ @@ -393,7 +390,7 @@ struct hlsl_attribute struct hlsl_reg_reservation { char reg_type; - unsigned int reg_index; + unsigned int reg_space, reg_index; char offset_type; unsigned int offset_index; @@ -421,6 +418,14 @@ struct hlsl_ir_var /* Scope that contains annotations for this variable. */ struct hlsl_scope *annotations; + /* A dynamic array containing the state block on the variable's declaration, if any. + * An array variable may contain multiple state blocks. + * A technique pass will always contain one. + * These are only really used for effect profiles. */ + struct hlsl_state_block **state_blocks; + unsigned int state_block_count; + size_t state_block_capacity; + /* Indexes of the IR instructions where the variable is first written and last read (liveness * range). The IR instructions are numerated starting from 2, because 0 means unused, and 1 * means function entry. */ @@ -442,9 +447,10 @@ struct hlsl_ir_var enum hlsl_sampler_dim sampler_dim; struct vkd3d_shader_location first_sampler_dim_loc; } *objects_usage[HLSL_REGSET_LAST_OBJECT + 1]; - /* Minimum number of binds required to include all object components actually used in the shader. - * It may be less than the allocation size, e.g. for texture arrays. */ - unsigned int bind_count[HLSL_REGSET_LAST_OBJECT + 1]; + /* Minimum number of binds required to include all components actually used in the shader. + * It may be less than the allocation size, e.g. for texture arrays. + * The bind_count for HLSL_REGSET_NUMERIC is only used in uniforms for now. */ + unsigned int bind_count[HLSL_REGSET_LAST + 1]; /* Whether the shader performs dereferences with non-constant offsets in the variable. */ bool indexable; @@ -456,6 +462,40 @@ struct hlsl_ir_var uint32_t is_separated_resource : 1; }; +/* This struct is used to represent assignments in state block entries: + * name = {args[0], args[1], ...}; + * - or - + * name = args[0] + * - or - + * name[lhs_index] = args[0] + * - or - + * name[lhs_index] = {args[0], args[1], ...}; + */ +struct hlsl_state_block_entry +{ + /* For assignments, the name in the lhs. */ + char *name; + /* Resolved format-specific property identifier. */ + unsigned int name_id; + + /* Whether the lhs in the assignment is indexed and, in that case, its index. */ + bool lhs_has_index; + unsigned int lhs_index; + + /* Instructions present in the rhs. */ + struct hlsl_block *instrs; + + /* For assignments, arguments of the rhs initializer. */ + struct hlsl_src *args; + unsigned int args_count; +}; + +struct hlsl_state_block +{ + struct hlsl_state_block_entry **entries; + size_t count, capacity; +}; + /* Sized array of variables representing a function's parameters. */ struct hlsl_func_parameters { @@ -593,18 +633,15 @@ enum hlsl_ir_expr_op HLSL_OP2_MUL, HLSL_OP2_NEQUAL, HLSL_OP2_RSHIFT, + /* SLT(a, b) retrieves 1.0 if (a < b), else 0.0. Only used for SM1-SM3 target vertex shaders. */ + HLSL_OP2_SLT, /* DP2ADD(a, b, c) computes the scalar product of a.xy and b.xy, * then adds c. */ HLSL_OP3_DP2ADD, - /* MOVC(a, b, c) returns c if a is bitwise zero and b otherwise. - * TERNARY(a, b, c) returns c if a == 0 and b otherwise. - * They differ for floating point numbers, because - * -0.0 == 0.0, but it is not bitwise zero. CMP(a, b, c) returns b - if a >= 0, and c otherwise. It's used only for SM1-SM3 targets, while - SM4+ is using MOVC in such cases. */ + /* TERNARY(a, b, c) returns 'b' if 'a' is true and 'c' otherwise. 'a' must always be boolean. + * CMP(a, b, c) returns 'b' if 'a' >= 0, and 'c' otherwise. It's used only for SM1-SM3 targets. */ HLSL_OP3_CMP, - HLSL_OP3_MOVC, HLSL_OP3_TERNARY, }; @@ -750,6 +787,14 @@ struct hlsl_ir_constant struct hlsl_reg reg; }; +/* Stateblock constants are undeclared values found on state blocks or technique passes descriptions, + * that do not concern regular pixel, vertex, or compute shaders, except for parsing. */ +struct hlsl_ir_stateblock_constant +{ + struct hlsl_ir_node node; + char *name; +}; + struct hlsl_scope { /* Item entry for hlsl_ctx.scopes. */ @@ -798,10 +843,13 @@ struct hlsl_buffer struct vkd3d_shader_location loc; enum hlsl_buffer_type type; const char *name; + uint32_t modifiers; /* Register reserved for this buffer, if any. * If provided, it should be of type 'b' if type is HLSL_BUFFER_CONSTANT and 't' if type is * HLSL_BUFFER_TEXTURE. */ struct hlsl_reg_reservation reservation; + /* Scope that contains annotations for this buffer. */ + struct hlsl_scope *annotations; /* Item entry for hlsl_ctx.buffers */ struct list entry; @@ -920,8 +968,21 @@ struct hlsl_ctx uint32_t found_numthreads : 1; bool semantic_compat_mapping; + bool child_effect; + bool include_empty_buffers; + bool warn_implicit_truncation; }; +static inline bool hlsl_version_ge(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) +{ + return ctx->profile->major_version > major || (ctx->profile->major_version == major && ctx->profile->minor_version >= minor); +} + +static inline bool hlsl_version_lt(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) +{ + return !hlsl_version_ge(ctx, major, minor); +} + struct hlsl_resource_load_params { struct hlsl_type *format; @@ -1009,6 +1070,12 @@ static inline struct hlsl_ir_switch *hlsl_ir_switch(const struct hlsl_ir_node *n return CONTAINING_RECORD(node, struct hlsl_ir_switch, node); } +static inline struct hlsl_ir_stateblock_constant *hlsl_ir_stateblock_constant(const struct hlsl_ir_node *node) +{ + assert(node->type == HLSL_IR_STATEBLOCK_CONSTANT); + return CONTAINING_RECORD(node, struct hlsl_ir_stateblock_constant, node); +} + static inline void hlsl_block_init(struct hlsl_block *block) { list_init(&block->instrs); @@ -1183,6 +1250,7 @@ bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *func); +void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body); int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out); int hlsl_emit_effect_binary(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out); @@ -1201,6 +1269,7 @@ void hlsl_replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new); void hlsl_free_attribute(struct hlsl_attribute *attr); void hlsl_free_instr(struct hlsl_ir_node *node); void hlsl_free_instr_list(struct list *list); +void hlsl_free_state_block(struct hlsl_state_block *state_block); void hlsl_free_type(struct hlsl_type *type); void hlsl_free_var(struct hlsl_ir_var *decl); @@ -1222,7 +1291,8 @@ struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_exp struct hlsl_ir_node *arg2); struct hlsl_ir_node *hlsl_new_bool_constant(struct hlsl_ctx *ctx, bool b, const struct vkd3d_shader_location *loc); struct hlsl_buffer *hlsl_new_buffer(struct hlsl_ctx *ctx, enum hlsl_buffer_type type, const char *name, - const struct hlsl_reg_reservation *reservation, const struct vkd3d_shader_location *loc); + uint32_t modifiers, const struct hlsl_reg_reservation *reservation, struct hlsl_scope *annotations, + const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *decl, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_cast(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_type *type, @@ -1243,6 +1313,8 @@ struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *cond struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type type, struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, + struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, struct hlsl_ir_node *arg3); void hlsl_init_simple_deref_from_var(struct hlsl_deref *deref, struct hlsl_ir_var *var); @@ -1279,6 +1351,8 @@ struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, struct hlsl_struct_field *fields, size_t field_count); struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned int components, struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_stateblock_constant(struct hlsl_ctx *ctx, const char *name, + struct vkd3d_shader_location *loc); struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *template, struct hlsl_type *type, const struct vkd3d_shader_location *loc); struct hlsl_ir_var *hlsl_new_synthetic_var_named(struct hlsl_ctx *ctx, const char *name, @@ -1330,7 +1404,6 @@ unsigned int hlsl_type_get_sm4_offset(const struct hlsl_type *type, unsigned int bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2); void hlsl_calculate_buffer_offsets(struct hlsl_ctx *ctx); -void hlsl_prepend_global_uniform_copy(struct hlsl_ctx *ctx, struct hlsl_block *block); const struct hlsl_type *hlsl_get_multiarray_element_type(const struct hlsl_type *type); unsigned int hlsl_get_multiarray_size(const struct hlsl_type *type); @@ -1352,10 +1425,13 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *block); bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); +bool hlsl_fold_constant_identities(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), struct hlsl_block *block, void *context); +D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type); +D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type); bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg); bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLUSAGE *usage, uint32_t *usage_idx); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l index 558506db108..a5923d8bf8e 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l @@ -76,6 +76,7 @@ case {return KW_CASE; } cbuffer {return KW_CBUFFER; } centroid {return KW_CENTROID; } column_major {return KW_COLUMN_MAJOR; } +ComputeShader {return KW_COMPUTESHADER; } compile {return KW_COMPILE; } const {return KW_CONST; } continue {return KW_CONTINUE; } @@ -83,15 +84,18 @@ DepthStencilState {return KW_DEPTHSTENCILSTATE; } DepthStencilView {return KW_DEPTHSTENCILVIEW; } default {return KW_DEFAULT; } discard {return KW_DISCARD; } +DomainShader {return KW_DOMAINSHADER; } do {return KW_DO; } double {return KW_DOUBLE; } else {return KW_ELSE; } +export {return KW_EXPORT; } extern {return KW_EXTERN; } false {return KW_FALSE; } for {return KW_FOR; } fxgroup {return KW_FXGROUP; } GeometryShader {return KW_GEOMETRYSHADER; } groupshared {return KW_GROUPSHARED; } +HullShader {return KW_HULLSHADER; } if {return KW_IF; } in {return KW_IN; } inline {return KW_INLINE; } @@ -105,7 +109,7 @@ out {return KW_OUT; } packoffset {return KW_PACKOFFSET; } pass {return KW_PASS; } PixelShader {return KW_PIXELSHADER; } -precise {return KW_PRECISE; } +pixelshader {return KW_PIXELSHADER; } RasterizerOrderedBuffer {return KW_RASTERIZERORDEREDBUFFER; } RasterizerOrderedStructuredBuffer {return KW_RASTERIZERORDEREDSTRUCTUREDBUFFER; } RasterizerOrderedTexture1D {return KW_RASTERIZERORDEREDTEXTURE1D; } @@ -163,6 +167,7 @@ typedef {return KW_TYPEDEF; } uniform {return KW_UNIFORM; } vector {return KW_VECTOR; } VertexShader {return KW_VERTEXSHADER; } +vertexshader {return KW_VERTEXSHADER; } void {return KW_VOID; } volatile {return KW_VOLATILE; } while {return KW_WHILE; } @@ -186,7 +191,7 @@ while {return KW_WHILE; } %= {return OP_MODASSIGN; } &= {return OP_ANDASSIGN; } \|= {return OP_ORASSIGN; } -^= {return OP_XORASSIGN; } +\^= {return OP_XORASSIGN; } {IDENTIFIER} { struct hlsl_ctx *ctx = yyget_extra(yyscanner); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y index cd05fd008a6..9c1bdef926d 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y @@ -77,6 +77,10 @@ struct parse_variable_def struct hlsl_type *basic_type; uint32_t modifiers; struct vkd3d_shader_location modifiers_loc; + + struct hlsl_state_block **state_blocks; + unsigned int state_block_count; + size_t state_block_capacity; }; struct parse_function @@ -114,6 +118,12 @@ struct parse_attribute_list const struct hlsl_attribute **attrs; }; +struct state_block_index +{ + bool has_index; + unsigned int index; +}; + } %code provides @@ -158,6 +168,9 @@ static struct list *make_empty_list(struct hlsl_ctx *ctx) static void destroy_block(struct hlsl_block *block) { + if (!block) + return; + hlsl_block_cleanup(block); vkd3d_free(block); } @@ -413,7 +426,7 @@ static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct return NULL; } - if (dst_type->dimx * dst_type->dimy < src_type->dimx * src_type->dimy) + if (dst_type->dimx * dst_type->dimy < src_type->dimx * src_type->dimy && ctx->warn_implicit_truncation) hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION, "Implicit truncation of %s type.", src_type->class == HLSL_CLASS_VECTOR ? "vector" : "matrix"); @@ -438,8 +451,9 @@ static uint32_t add_modifiers(struct hlsl_ctx *ctx, uint32_t modifiers, uint32_t static bool append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *cond_block) { - struct hlsl_ir_node *condition, *not, *iff, *jump; + struct hlsl_ir_node *condition, *cast, *not, *iff, *jump; struct hlsl_block then_block; + struct hlsl_type *bool_type; /* E.g. "for (i = 0; ; ++i)". */ if (list_empty(&cond_block->instrs)) @@ -449,7 +463,12 @@ static bool append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *co check_condition_type(ctx, condition); - if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, condition, &condition->loc))) + bool_type = hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL); + if (!(cast = hlsl_new_cast(ctx, condition, bool_type, &condition->loc))) + return false; + hlsl_block_add_instr(cond_block, cast); + + if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, cast, &condition->loc))) return false; hlsl_block_add_instr(cond_block, not); @@ -640,6 +659,16 @@ static unsigned int initializer_size(const struct parse_initializer *initializer return count; } +static void cleanup_parse_attribute_list(struct parse_attribute_list *attr_list) +{ + unsigned int i = 0; + + assert(attr_list); + for (i = 0; i < attr_list->count; ++i) + hlsl_free_attribute((struct hlsl_attribute *) attr_list->attrs[i]); + vkd3d_free(attr_list->attrs); +} + static void free_parse_initializer(struct parse_initializer *initializer) { destroy_block(initializer->instrs); @@ -817,8 +846,7 @@ static bool add_array_access(struct hlsl_ctx *ctx, struct hlsl_block *block, str const struct hlsl_type *expr_type = array->data_type, *index_type = index->data_type; struct hlsl_ir_node *return_index, *cast; - if (expr_type->class == HLSL_CLASS_OBJECT - && (expr_type->base_type == HLSL_TYPE_TEXTURE || expr_type->base_type == HLSL_TYPE_UAV) + if ((expr_type->class == HLSL_CLASS_TEXTURE || expr_type->class == HLSL_CLASS_UAV) && expr_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) { unsigned int dim_count = hlsl_sampler_dim_count(expr_type->sampler_dim); @@ -925,24 +953,10 @@ static void free_parse_variable_def(struct parse_variable_def *v) vkd3d_free(v->arrays.sizes); vkd3d_free(v->name); hlsl_cleanup_semantic(&v->semantic); + assert(!v->state_blocks); vkd3d_free(v); } -static bool shader_is_sm_5_1(const struct hlsl_ctx *ctx) -{ - return ctx->profile->major_version == 5 && ctx->profile->minor_version >= 1; -} - -static bool shader_profile_version_ge(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) -{ - return ctx->profile->major_version > major || (ctx->profile->major_version == major && ctx->profile->minor_version >= minor); -} - -static bool shader_profile_version_lt(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) -{ - return !shader_profile_version_ge(ctx, major, minor); -} - static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, struct hlsl_type *type, uint32_t modifiers, struct list *defs) { @@ -965,7 +979,7 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, field->type = type; - if (shader_is_sm_5_1(ctx) && type->class == HLSL_CLASS_OBJECT) + if (hlsl_version_ge(ctx, 5, 1) && hlsl_type_is_resource(type)) { for (k = 0; k < v->arrays.count; ++k) unbounded_res_array |= (v->arrays.sizes[k] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); @@ -1115,7 +1129,7 @@ static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters } static bool add_pass(struct hlsl_ctx *ctx, const char *name, struct hlsl_scope *annotations, - const struct vkd3d_shader_location *loc) + struct hlsl_state_block *state_block, const struct vkd3d_shader_location *loc) { struct hlsl_ir_var *var; struct hlsl_type *type; @@ -1125,6 +1139,11 @@ static bool add_pass(struct hlsl_ctx *ctx, const char *name, struct hlsl_scope * return false; var->annotations = annotations; + var->state_blocks = hlsl_alloc(ctx, sizeof(*var->state_blocks)); + var->state_blocks[0] = state_block; + var->state_block_count = 1; + var->state_block_capacity = 1; + if (!hlsl_add_var(ctx, var, false)) { struct hlsl_ir_var *old = hlsl_get_var(ctx->cur_scope, var->name); @@ -1191,17 +1210,18 @@ static bool add_effect_group(struct hlsl_ctx *ctx, const char *name, struct hlsl return true; } -static struct hlsl_reg_reservation parse_reg_reservation(const char *reg_string) +static bool parse_reservation_index(const char *string, char *type, uint32_t *index) { - struct hlsl_reg_reservation reservation = {0}; + if (!sscanf(string + 1, "%u", index)) + return false; - if (!sscanf(reg_string + 1, "%u", &reservation.reg_index)) - { - FIXME("Unsupported register reservation syntax.\n"); - return reservation; - } - reservation.reg_type = ascii_tolower(reg_string[0]); - return reservation; + *type = ascii_tolower(string[0]); + return true; +} + +static bool parse_reservation_space(const char *string, uint32_t *space) +{ + return !ascii_strncasecmp(string, "space", 5) && sscanf(string + 5, "%u", space); } static struct hlsl_reg_reservation parse_packoffset(struct hlsl_ctx *ctx, const char *reg_string, @@ -1210,7 +1230,7 @@ static struct hlsl_reg_reservation parse_packoffset(struct hlsl_ctx *ctx, const struct hlsl_reg_reservation reservation = {0}; char *endptr; - if (shader_profile_version_lt(ctx, 4, 0)) + if (hlsl_version_lt(ctx, 4, 0)) return reservation; reservation.offset_index = strtoul(reg_string + 1, &endptr, 10); @@ -1273,7 +1293,7 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str struct hlsl_ir_node *node; struct hlsl_block expr; unsigned int ret = 0; - bool progress; + struct hlsl_src src; LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) { @@ -1293,6 +1313,7 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str case HLSL_IR_RESOURCE_STORE: case HLSL_IR_STORE: case HLSL_IR_SWITCH: + case HLSL_IR_STATEBLOCK_CONSTANT: hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Expected literal expression."); } @@ -1309,13 +1330,12 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str return 0; } - do - { - progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, &expr, NULL); - progress |= hlsl_copy_propagation_execute(ctx, &expr); - } while (progress); + /* Wrap the node into a src to allow the reference to survive the multiple const passes. */ + hlsl_src_from_node(&src, node_from_block(&expr)); + hlsl_run_const_passes(ctx, &expr); + node = src.node; + hlsl_src_remove(&src); - node = node_from_block(&expr); if (node->type == HLSL_IR_CONSTANT) { constant = hlsl_ir_constant(node); @@ -1334,9 +1354,6 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t2) { - if (t1->base_type > HLSL_TYPE_LAST_SCALAR || t2->base_type > HLSL_TYPE_LAST_SCALAR) - return false; - /* Scalar vars can be converted to pretty much everything */ if ((t1->dimx == 1 && t1->dimy == 1) || (t2->dimx == 1 && t2->dimy == 1)) return true; @@ -1368,10 +1385,6 @@ static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t static enum hlsl_base_type expr_common_base_type(enum hlsl_base_type t1, enum hlsl_base_type t2) { - if (t1 > HLSL_TYPE_LAST_SCALAR || t2 > HLSL_TYPE_LAST_SCALAR) { - FIXME("Unexpected base type.\n"); - return HLSL_TYPE_FLOAT; - } if (t1 == t2) return t1 == HLSL_TYPE_BOOL ? HLSL_TYPE_INT : t1; if (t1 == HLSL_TYPE_DOUBLE || t2 == HLSL_TYPE_DOUBLE) @@ -1475,7 +1488,7 @@ static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct hlsl_block *bl struct hlsl_ir_node *load; struct hlsl_ir_var *var; - scalar_type = hlsl_get_scalar_type(ctx, type->base_type); + scalar_type = hlsl_get_scalar_type(ctx, type->e.numeric.type); if (!(var = hlsl_new_synthetic_var(ctx, "split_op", type, loc))) return NULL; @@ -1525,7 +1538,7 @@ static void check_integer_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node * const struct hlsl_type *type = instr->data_type; struct vkd3d_string_buffer *string; - switch (type->base_type) + switch (type->e.numeric.type) { case HLSL_TYPE_BOOL: case HLSL_TYPE_INT: @@ -1575,13 +1588,13 @@ static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct static struct hlsl_type *get_common_numeric_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node *arg1, const struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc) { - enum hlsl_base_type base = expr_common_base_type(arg1->data_type->base_type, arg2->data_type->base_type); enum hlsl_type_class type; + enum hlsl_base_type base; unsigned int dimx, dimy; if (!expr_common_shape(ctx, arg1->data_type, arg2->data_type, loc, &type, &dimx, &dimy)) return NULL; - + base = expr_common_base_type(arg1->data_type->e.numeric.type, arg2->data_type->e.numeric.type); return hlsl_get_numeric_type(ctx, type, base, dimx, dimy); } @@ -1618,14 +1631,15 @@ static struct hlsl_ir_node *add_binary_comparison_expr(struct hlsl_ctx *ctx, str const struct vkd3d_shader_location *loc) { struct hlsl_type *common_type, *return_type; - enum hlsl_base_type base = expr_common_base_type(arg1->data_type->base_type, arg2->data_type->base_type); enum hlsl_type_class type; + enum hlsl_base_type base; unsigned int dimx, dimy; struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; if (!expr_common_shape(ctx, arg1->data_type, arg2->data_type, loc, &type, &dimx, &dimy)) return NULL; + base = expr_common_base_type(arg1->data_type->e.numeric.type, arg2->data_type->e.numeric.type); common_type = hlsl_get_numeric_type(ctx, type, base, dimx, dimy); return_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_BOOL, dimx, dimy); @@ -1665,7 +1679,7 @@ static struct hlsl_ir_node *add_binary_shift_expr(struct hlsl_ctx *ctx, struct h enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc) { - enum hlsl_base_type base = arg1->data_type->base_type; + enum hlsl_base_type base = arg1->data_type->e.numeric.type; struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; struct hlsl_type *return_type, *integer_type; enum hlsl_type_class type; @@ -1695,7 +1709,7 @@ static struct hlsl_ir_node *add_binary_shift_expr(struct hlsl_ctx *ctx, struct h static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct hlsl_block *instrs, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc) { - enum hlsl_base_type base = expr_common_base_type(arg1->data_type->base_type, arg2->data_type->base_type); + enum hlsl_base_type base = expr_common_base_type(arg1->data_type->e.numeric.type, arg2->data_type->e.numeric.type); struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; struct hlsl_type *common_type, *ret_type; enum hlsl_ir_expr_op op; @@ -1933,10 +1947,9 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo return NULL; resource_type = hlsl_deref_get_type(ctx, &resource_deref); - assert(resource_type->class == HLSL_CLASS_OBJECT); - assert(resource_type->base_type == HLSL_TYPE_TEXTURE || resource_type->base_type == HLSL_TYPE_UAV); + assert(resource_type->class == HLSL_CLASS_TEXTURE || resource_type->class == HLSL_CLASS_UAV); - if (resource_type->base_type != HLSL_TYPE_UAV) + if (resource_type->class != HLSL_CLASS_UAV) hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Read-only resources cannot be stored to."); @@ -1947,7 +1960,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo "Resource store expressions must write to all components."); assert(coords->data_type->class == HLSL_CLASS_VECTOR); - assert(coords->data_type->base_type == HLSL_TYPE_UINT); + assert(coords->data_type->e.numeric.type == HLSL_TYPE_UINT); assert(coords->data_type->dimx == dim_count); if (!(store = hlsl_new_resource_store(ctx, &resource_deref, coords, rhs, &lhs->loc))) @@ -2085,24 +2098,23 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *i } } -static bool type_has_object_components(struct hlsl_type *type, bool must_be_in_struct) +static bool type_has_object_components(const struct hlsl_type *type) { - if (type->class == HLSL_CLASS_OBJECT) - return !must_be_in_struct; if (type->class == HLSL_CLASS_ARRAY) - return type_has_object_components(type->e.array.type, must_be_in_struct); + return type_has_object_components(type->e.array.type); if (type->class == HLSL_CLASS_STRUCT) { - unsigned int i; - - for (i = 0; i < type->e.record.field_count; ++i) + for (unsigned int i = 0; i < type->e.record.field_count; ++i) { - if (type_has_object_components(type->e.record.fields[i].type, false)) + if (type_has_object_components(type->e.record.fields[i].type)) return true; } + + return false; } - return false; + + return !hlsl_is_numeric_type(type); } static bool type_has_numeric_components(struct hlsl_type *type) @@ -2140,6 +2152,18 @@ static void check_invalid_in_out_modifiers(struct hlsl_ctx *ctx, unsigned int mo } } +static void check_invalid_object_fields(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var) +{ + const struct hlsl_type *type = var->data_type; + + while (type->class == HLSL_CLASS_ARRAY) + type = type->e.array.type; + + if (type->class == HLSL_CLASS_STRUCT && type_has_object_components(type)) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Target profile doesn't support objects as struct members in uniform variables."); +} + static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) { struct hlsl_type *basic_type = v->basic_type; @@ -2160,7 +2184,7 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) type = basic_type; - if (shader_is_sm_5_1(ctx) && type->class == HLSL_CLASS_OBJECT) + if (hlsl_version_ge(ctx, 5, 1) && hlsl_type_is_resource(type)) { for (i = 0; i < v->arrays.count; ++i) unbounded_res_array |= (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); @@ -2265,12 +2289,8 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) if (!(modifiers & HLSL_STORAGE_STATIC)) var->storage_modifiers |= HLSL_STORAGE_UNIFORM; - if (ctx->profile->major_version < 5 && (var->storage_modifiers & HLSL_STORAGE_UNIFORM) && - type_has_object_components(var->data_type, true)) - { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Target profile doesn't support objects as struct members in uniform variables."); - } + if (ctx->profile->major_version < 5 && (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) + check_invalid_object_fields(ctx, var); if ((func = hlsl_get_first_func_decl(ctx, var->name))) { @@ -2306,7 +2326,7 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) } if ((var->storage_modifiers & HLSL_STORAGE_STATIC) && type_has_numeric_components(var->data_type) - && type_has_object_components(var->data_type, false)) + && type_has_object_components(var->data_type)) { hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Static variables cannot have both numeric and resource components."); @@ -2349,8 +2369,25 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var free_parse_variable_def(v); continue; } + type = var->data_type; + var->state_blocks = v->state_blocks; + var->state_block_count = v->state_block_count; + var->state_block_capacity = v->state_block_capacity; + v->state_block_count = 0; + v->state_block_capacity = 0; + v->state_blocks = NULL; + + if (var->state_blocks && hlsl_type_component_count(type) != var->state_block_count) + { + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Expected %u state blocks, but got %u.", + hlsl_type_component_count(type), var->state_block_count); + free_parse_variable_def(v); + continue; + } + if (v->initializer.args_count) { if (v->initializer.braces) @@ -2394,7 +2431,7 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var /* Initialize statics to zero by default. */ - if (type_has_object_components(var->data_type, false)) + if (type_has_object_components(var->data_type)) { free_parse_variable_def(v); continue; @@ -2562,7 +2599,7 @@ static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, { struct hlsl_type *type = arg->data_type; - if (type->base_type == HLSL_TYPE_FLOAT || type->base_type == HLSL_TYPE_HALF) + if (type->e.numeric.type == HLSL_TYPE_FLOAT || type->e.numeric.type == HLSL_TYPE_HALF) return arg; type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); @@ -2589,7 +2626,7 @@ static bool convert_args(struct hlsl_ctx *ctx, const struct parse_initializer *p static struct hlsl_type *elementwise_intrinsic_get_common_type(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - enum hlsl_base_type base = params->args[0]->data_type->base_type; + enum hlsl_base_type base = params->args[0]->data_type->e.numeric.type; bool vectors = false, matrices = false; unsigned int dimx = 4, dimy = 4; struct hlsl_type *common_type; @@ -2599,7 +2636,7 @@ static struct hlsl_type *elementwise_intrinsic_get_common_type(struct hlsl_ctx * { struct hlsl_type *arg_type = params->args[i]->data_type; - base = expr_common_base_type(base, arg_type->base_type); + base = expr_common_base_type(base, arg_type->e.numeric.type); if (arg_type->class == HLSL_CLASS_VECTOR) { @@ -2650,12 +2687,14 @@ static bool elementwise_intrinsic_convert_args(struct hlsl_ctx *ctx, static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { + enum hlsl_base_type base_type; struct hlsl_type *type; if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) return false; - type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); + base_type = type->e.numeric.type == HLSL_TYPE_HALF ? HLSL_TYPE_HALF : HLSL_TYPE_FLOAT; + type = hlsl_get_numeric_type(ctx, type->class, base_type, type->dimx, type->dimy); return convert_args(ctx, params, type, loc); } @@ -2715,81 +2754,62 @@ static bool intrinsic_acos(struct hlsl_ctx *ctx, return write_acos_or_asin(ctx, params, loc, false); } -static bool intrinsic_all(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +/* Find the type corresponding to the given source type, with the same + * dimensions but a different base type. */ +static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx, + const struct hlsl_type *type, enum hlsl_base_type base_type) { - struct hlsl_ir_node *arg = params->args[0], *mul, *one, *zero, *load; + return hlsl_get_numeric_type(ctx, type->class, base_type, type->dimx, type->dimy); +} + +static bool add_combine_components(struct hlsl_ctx *ctx, const struct parse_initializer *params, + struct hlsl_ir_node *arg, enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *res, *load; unsigned int i, count; - if (!(one = hlsl_new_float_constant(ctx, 1.0f, loc))) - return false; - hlsl_block_add_instr(params->instrs, one); - - if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) - return false; - hlsl_block_add_instr(params->instrs, zero); - - mul = one; - count = hlsl_type_component_count(arg->data_type); - for (i = 0; i < count; ++i) + + if (!(res = hlsl_add_load_component(ctx, params->instrs, arg, 0, loc))) + return false; + + for (i = 1; i < count; ++i) { if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, i, loc))) return false; - if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, load, mul, loc))) - return false; + if (!(res = hlsl_new_binary_expr(ctx, op, res, load))) + return NULL; + hlsl_block_add_instr(params->instrs, res); } - return !!add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_NEQUAL, mul, zero, loc); + return true; } -static bool intrinsic_any(struct hlsl_ctx *ctx, +static bool intrinsic_all(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_node *arg = params->args[0], *dot, *or, *zero, *bfalse, *load; - unsigned int i, count; + struct hlsl_ir_node *arg = params->args[0], *cast; + struct hlsl_type *bool_type; - if (arg->data_type->class != HLSL_CLASS_VECTOR && arg->data_type->class != HLSL_CLASS_SCALAR) - { - hlsl_fixme(ctx, loc, "any() implementation for non-vector, non-scalar"); + bool_type = convert_numeric_type(ctx, arg->data_type, HLSL_TYPE_BOOL); + if (!(cast = add_cast(ctx, params->instrs, arg, bool_type, loc))) return false; - } - if (arg->data_type->base_type == HLSL_TYPE_FLOAT) - { - if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) - return false; - hlsl_block_add_instr(params->instrs, zero); + return add_combine_components(ctx, params, cast, HLSL_OP2_LOGIC_AND, loc); +} - if (!(dot = add_binary_dot_expr(ctx, params->instrs, arg, arg, loc))) - return false; +static bool intrinsic_any(struct hlsl_ctx *ctx, const struct parse_initializer *params, + const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg = params->args[0], *cast; + struct hlsl_type *bool_type; - return !!add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_NEQUAL, dot, zero, loc); - } - else if (arg->data_type->base_type == HLSL_TYPE_BOOL) - { - if (!(bfalse = hlsl_new_bool_constant(ctx, false, loc))) - return false; - hlsl_block_add_instr(params->instrs, bfalse); + bool_type = convert_numeric_type(ctx, arg->data_type, HLSL_TYPE_BOOL); + if (!(cast = add_cast(ctx, params->instrs, arg, bool_type, loc))) + return false; - or = bfalse; - - count = hlsl_type_component_count(arg->data_type); - for (i = 0; i < count; ++i) - { - if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, i, loc))) - return false; - - if (!(or = add_binary_bitwise_expr(ctx, params->instrs, HLSL_OP2_BIT_OR, or, load, loc))) - return false; - } - - return true; - } - - hlsl_fixme(ctx, loc, "any() implementation for non-float, non-bool"); - return false; + return add_combine_components(ctx, params, cast, HLSL_OP2_LOGIC_OR, loc); } static bool intrinsic_asin(struct hlsl_ctx *ctx, @@ -2857,20 +2877,20 @@ static bool write_atan_or_atan2(struct hlsl_ctx *ctx, type->name, type->name, type->name); if (ret < 0) { - vkd3d_string_buffer_cleanup(buf); + hlsl_release_string_buffer(ctx, buf); return false; } ret = vkd3d_string_buffer_printf(buf, body_template, type->name); if (ret < 0) { - vkd3d_string_buffer_cleanup(buf); + hlsl_release_string_buffer(ctx, buf); return false; } func = hlsl_compile_internal_function(ctx, atan2_mode ? atan2_name : atan_name, buf->buffer); - vkd3d_string_buffer_cleanup(buf); + hlsl_release_string_buffer(ctx, buf); if (!func) return false; @@ -2890,15 +2910,6 @@ static bool intrinsic_atan2(struct hlsl_ctx *ctx, return write_atan_or_atan2(ctx, params, loc, true); } - -/* Find the type corresponding to the given source type, with the same - * dimensions but a different base type. */ -static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx, - const struct hlsl_type *type, enum hlsl_base_type base_type) -{ - return hlsl_get_numeric_type(ctx, type->class, base_type, type->dimx, type->dimy); -} - static bool intrinsic_asfloat(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -2906,7 +2917,7 @@ static bool intrinsic_asfloat(struct hlsl_ctx *ctx, struct hlsl_type *data_type; data_type = params->args[0]->data_type; - if (data_type->base_type == HLSL_TYPE_BOOL || data_type->base_type == HLSL_TYPE_DOUBLE) + if (data_type->e.numeric.type == HLSL_TYPE_BOOL || data_type->e.numeric.type == HLSL_TYPE_DOUBLE) { struct vkd3d_string_buffer *string; @@ -2942,7 +2953,7 @@ static bool intrinsic_asuint(struct hlsl_ctx *ctx, } data_type = params->args[0]->data_type; - if (data_type->base_type == HLSL_TYPE_BOOL || data_type->base_type == HLSL_TYPE_DOUBLE) + if (data_type->e.numeric.type == HLSL_TYPE_BOOL || data_type->e.numeric.type == HLSL_TYPE_DOUBLE) { struct vkd3d_string_buffer *string; @@ -3022,6 +3033,46 @@ static bool intrinsic_cos(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_COS, arg, loc); } +static bool write_cosh_or_sinh(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc, bool sinh_mode) +{ + struct hlsl_ir_function_decl *func; + struct hlsl_ir_node *arg; + const char *fn_name, *type_name; + char *body; + + static const char template[] = + "%s %s(%s x)\n" + "{\n" + " return (exp(x) %s exp(-x)) / 2;\n" + "}\n"; + static const char fn_name_sinh[] = "sinh"; + static const char fn_name_cosh[] = "cosh"; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + type_name = arg->data_type->name; + fn_name = sinh_mode ? fn_name_sinh : fn_name_cosh; + + if (!(body = hlsl_sprintf_alloc(ctx, template, + type_name, fn_name, type_name, sinh_mode ? "-" : "+"))) + return false; + + func = hlsl_compile_internal_function(ctx, fn_name, body); + vkd3d_free(body); + if (!func) + return false; + + return add_user_call(ctx, func, params, loc); +} + +static bool intrinsic_cosh(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return write_cosh_or_sinh(ctx, params, loc, false); +} + static bool intrinsic_cross(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -3031,7 +3082,7 @@ static bool intrinsic_cross(struct hlsl_ctx *ctx, struct hlsl_type *cast_type; enum hlsl_base_type base; - if (arg1->data_type->base_type == HLSL_TYPE_HALF && arg2->data_type->base_type == HLSL_TYPE_HALF) + if (arg1->data_type->e.numeric.type == HLSL_TYPE_HALF && arg2->data_type->e.numeric.type == HLSL_TYPE_HALF) base = HLSL_TYPE_HALF; else base = HLSL_TYPE_FLOAT; @@ -3155,6 +3206,94 @@ static bool intrinsic_ddy_fine(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY_FINE, arg, loc); } +static bool intrinsic_determinant(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + static const char determinant2x2[] = + "%s determinant(%s2x2 m)\n" + "{\n" + " return m._11 * m._22 - m._12 * m._21;\n" + "}"; + static const char determinant3x3[] = + "%s determinant(%s3x3 m)\n" + "{\n" + " %s2x2 m1 = { m._22, m._23, m._32, m._33 };\n" + " %s2x2 m2 = { m._21, m._23, m._31, m._33 };\n" + " %s2x2 m3 = { m._21, m._22, m._31, m._32 };\n" + " %s3 v1 = { m._11, -m._12, m._13 };\n" + " %s3 v2 = { determinant(m1), determinant(m2), determinant(m3) };\n" + " return dot(v1, v2);\n" + "}"; + static const char determinant4x4[] = + "%s determinant(%s4x4 m)\n" + "{\n" + " %s3x3 m1 = { m._22, m._23, m._24, m._32, m._33, m._34, m._42, m._43, m._44 };\n" + " %s3x3 m2 = { m._21, m._23, m._24, m._31, m._33, m._34, m._41, m._43, m._44 };\n" + " %s3x3 m3 = { m._21, m._22, m._24, m._31, m._32, m._34, m._41, m._42, m._44 };\n" + " %s3x3 m4 = { m._21, m._22, m._23, m._31, m._32, m._33, m._41, m._42, m._43 };\n" + " %s4 v1 = { m._11, -m._12, m._13, -m._14 };\n" + " %s4 v2 = { determinant(m1), determinant(m2), determinant(m3), determinant(m4) };\n" + " return dot(v1, v2);\n" + "}"; + static const char *templates[] = + { + [2] = determinant2x2, + [3] = determinant3x3, + [4] = determinant4x4, + }; + + struct hlsl_ir_node *arg = params->args[0]; + const struct hlsl_type *type = arg->data_type; + struct hlsl_ir_function_decl *func; + const char *typename, *template; + unsigned int dim; + char *body; + + if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_MATRIX) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Invalid argument type."); + return false; + } + + dim = min(type->dimx, type->dimy); + if (dim == 1) + { + if (!(arg = intrinsic_float_convert_arg(ctx, params, arg, loc))) + return false; + return hlsl_add_load_component(ctx, params->instrs, arg, 0, loc); + } + + typename = type->e.numeric.type == HLSL_TYPE_HALF ? "half" : "float"; + template = templates[dim]; + + switch (dim) + { + case 2: + body = hlsl_sprintf_alloc(ctx, template, typename, typename); + break; + case 3: + body = hlsl_sprintf_alloc(ctx, template, typename, typename, typename, + typename, typename, typename, typename); + break; + case 4: + body = hlsl_sprintf_alloc(ctx, template, typename, typename, typename, + typename, typename, typename, typename, typename); + break; + default: + vkd3d_unreachable(); + } + + if (!body) + return false; + + func = hlsl_compile_internal_function(ctx, "determinant", body); + vkd3d_free(body); + if (!func) + return false; + + return add_user_call(ctx, func, params, loc); +} + static bool intrinsic_distance(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -3478,7 +3617,7 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *arg1 = params->args[0], *arg2 = params->args[1], *cast1, *cast2; - enum hlsl_base_type base = expr_common_base_type(arg1->data_type->base_type, arg2->data_type->base_type); + enum hlsl_base_type base = expr_common_base_type(arg1->data_type->e.numeric.type, arg2->data_type->e.numeric.type); struct hlsl_type *cast_type1 = arg1->data_type, *cast_type2 = arg2->data_type, *matrix_type, *ret_type; unsigned int i, j, k, vect_count = 0; struct hlsl_deref var_deref; @@ -3646,6 +3785,59 @@ static bool intrinsic_reflect(struct hlsl_ctx *ctx, return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, i, neg, loc); } +static bool intrinsic_refract(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_type *r_type = params->args[0]->data_type; + struct hlsl_type *n_type = params->args[1]->data_type; + struct hlsl_type *i_type = params->args[2]->data_type; + struct hlsl_type *res_type, *idx_type, *scal_type; + struct parse_initializer mut_params; + struct hlsl_ir_function_decl *func; + enum hlsl_base_type base; + char *body; + + static const char template[] = + "%s refract(%s r, %s n, %s i)\n" + "{\n" + " %s d, t;\n" + " d = dot(r, n);\n" + " t = 1 - i.x * i.x * (1 - d * d);\n" + " return t >= 0.0 ? i.x * r - (i.x * d + sqrt(t)) * n : 0;\n" + "}"; + + if (r_type->class == HLSL_CLASS_MATRIX + || n_type->class == HLSL_CLASS_MATRIX + || i_type->class == HLSL_CLASS_MATRIX) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Matrix arguments are not supported."); + return false; + } + + assert(params->args_count == 3); + mut_params = *params; + mut_params.args_count = 2; + if (!(res_type = elementwise_intrinsic_get_common_type(ctx, &mut_params, loc))) + return false; + + base = expr_common_base_type(res_type->e.numeric.type, i_type->e.numeric.type); + base = base == HLSL_TYPE_HALF ? HLSL_TYPE_HALF : HLSL_TYPE_FLOAT; + res_type = convert_numeric_type(ctx, res_type, base); + idx_type = convert_numeric_type(ctx, i_type, base); + scal_type = hlsl_get_scalar_type(ctx, base); + + if (!(body = hlsl_sprintf_alloc(ctx, template, res_type->name, res_type->name, + res_type->name, idx_type->name, scal_type->name))) + return false; + + func = hlsl_compile_internal_function(ctx, "refract", body); + vkd3d_free(body); + if (!func) + return false; + + return add_user_call(ctx, func, params, loc); +} + static bool intrinsic_round(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -3688,7 +3880,7 @@ static bool intrinsic_sign(struct hlsl_ctx *ctx, struct hlsl_type *int_type = hlsl_get_numeric_type(ctx, arg->data_type->class, HLSL_TYPE_INT, arg->data_type->dimx, arg->data_type->dimy); - if (!(zero = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, arg->data_type->base_type), &zero_value, loc))) + if (!(zero = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, arg->data_type->e.numeric.type), &zero_value, loc))) return false; hlsl_block_add_instr(params->instrs, zero); @@ -3726,6 +3918,12 @@ static bool intrinsic_sin(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SIN, arg, loc); } +static bool intrinsic_sinh(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return write_cosh_or_sinh(ctx, params, loc, true); +} + /* smoothstep(a, b, x) = p^2 (3 - 2p), where p = saturate((x - a)/(b - a)) */ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) @@ -3798,6 +3996,39 @@ static bool intrinsic_tan(struct hlsl_ctx *ctx, return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_DIV, sin, cos, loc); } +static bool intrinsic_tanh(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_function_decl *func; + struct hlsl_ir_node *arg; + struct hlsl_type *type; + char *body; + + static const char template[] = + "%s tanh(%s x)\n" + "{\n" + " %s exp_pos, exp_neg;\n" + " exp_pos = exp(x);\n" + " exp_neg = exp(-x);\n" + " return (exp_pos - exp_neg) / (exp_pos + exp_neg);\n" + "}\n"; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + type = arg->data_type; + + if (!(body = hlsl_sprintf_alloc(ctx, template, + type->name, type->name, type->name))) + return false; + + func = hlsl_compile_internal_function(ctx, "tanh", body); + vkd3d_free(body); + if (!func) + return false; + + return add_user_call(ctx, func, params, loc); +} + static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc, const char *name, enum hlsl_sampler_dim dim) { @@ -3818,7 +4049,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * } sampler_type = params->args[0]->data_type; - if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER + if (sampler_type->class != HLSL_CLASS_SAMPLER || (sampler_type->sampler_dim != dim && sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC)) { struct vkd3d_string_buffer *string; @@ -3866,7 +4097,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * return false; } - if (shader_profile_version_ge(ctx, 4, 0)) + if (hlsl_version_ge(ctx, 4, 0)) { unsigned int count = hlsl_sampler_dim_count(dim); struct hlsl_ir_node *divisor; @@ -3913,7 +4144,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * return false; initialize_var_components(ctx, params->instrs, var, &idx, coords); - if (shader_profile_version_ge(ctx, 4, 0)) + if (hlsl_version_ge(ctx, 4, 0)) { if (!(half = hlsl_new_float_constant(ctx, 0.5f, loc))) return false; @@ -4022,7 +4253,7 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, return true; } - mat_type = hlsl_get_matrix_type(ctx, arg_type->base_type, arg_type->dimy, arg_type->dimx); + mat_type = hlsl_get_matrix_type(ctx, arg_type->e.numeric.type, arg_type->dimy, arg_type->dimx); if (!(var = hlsl_new_synthetic_var(ctx, "transpose", mat_type, loc))) return false; @@ -4099,7 +4330,7 @@ static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, if (!(ret = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg, c, loc))) return false; - if (shader_profile_version_ge(ctx, 4, 0)) + if (hlsl_version_ge(ctx, 4, 0)) return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_TRUNC, ret, loc); return true; @@ -4130,6 +4361,7 @@ intrinsic_functions[] = {"clamp", 3, true, intrinsic_clamp}, {"clip", 1, true, intrinsic_clip}, {"cos", 1, true, intrinsic_cos}, + {"cosh", 1, true, intrinsic_cosh}, {"cross", 2, true, intrinsic_cross}, {"ddx", 1, true, intrinsic_ddx}, {"ddx_coarse", 1, true, intrinsic_ddx_coarse}, @@ -4138,6 +4370,7 @@ intrinsic_functions[] = {"ddy_coarse", 1, true, intrinsic_ddy_coarse}, {"ddy_fine", 1, true, intrinsic_ddy_fine}, {"degrees", 1, true, intrinsic_degrees}, + {"determinant", 1, true, intrinsic_determinant}, {"distance", 2, true, intrinsic_distance}, {"dot", 2, true, intrinsic_dot}, {"exp", 1, true, intrinsic_exp}, @@ -4160,15 +4393,18 @@ intrinsic_functions[] = {"pow", 2, true, intrinsic_pow}, {"radians", 1, true, intrinsic_radians}, {"reflect", 2, true, intrinsic_reflect}, + {"refract", 3, true, intrinsic_refract}, {"round", 1, true, intrinsic_round}, {"rsqrt", 1, true, intrinsic_rsqrt}, {"saturate", 1, true, intrinsic_saturate}, {"sign", 1, true, intrinsic_sign}, {"sin", 1, true, intrinsic_sin}, + {"sinh", 1, true, intrinsic_sinh}, {"smoothstep", 3, true, intrinsic_smoothstep}, {"sqrt", 1, true, intrinsic_sqrt}, {"step", 2, true, intrinsic_step}, {"tan", 1, true, intrinsic_tan}, + {"tanh", 1, true, intrinsic_tanh}, {"tex1D", -1, false, intrinsic_tex1D}, {"tex2D", -1, false, intrinsic_tex2D}, {"tex2Dlod", 2, false, intrinsic_tex2Dlod}, @@ -4263,22 +4499,7 @@ static struct hlsl_block *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type return NULL; for (i = 0; i < params->args_count; ++i) - { - struct hlsl_ir_node *arg = params->args[i]; - - if (arg->data_type->class == HLSL_CLASS_OBJECT) - { - struct vkd3d_string_buffer *string; - - if ((string = hlsl_type_to_string(ctx, arg->data_type))) - hlsl_error(ctx, &arg->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Invalid type %s for constructor argument.", string->buffer); - hlsl_release_string_buffer(ctx, string); - continue; - } - - initialize_var_components(ctx, params->instrs, var, &idx, arg); - } + initialize_var_components(ctx, params->instrs, var, &idx, params->args[i]); if (!(load = hlsl_new_var_load(ctx, var, loc))) return NULL; @@ -4318,26 +4539,34 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc))) return false; } - else if (common_type->dimx == 1 && common_type->dimy == 1) + else { - common_type = hlsl_get_numeric_type(ctx, cond_type->class, - common_type->base_type, cond_type->dimx, cond_type->dimy); - } - else if (cond_type->dimx != common_type->dimx || cond_type->dimy != common_type->dimy) - { - /* This condition looks wrong but is correct. - * floatN is compatible with float1xN, but not with floatNx1. */ + cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL, + cond_type->dimx, cond_type->dimy); + if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc))) + return false; - struct vkd3d_string_buffer *cond_string, *value_string; + if (common_type->dimx == 1 && common_type->dimy == 1) + { + common_type = hlsl_get_numeric_type(ctx, cond_type->class, + common_type->e.numeric.type, cond_type->dimx, cond_type->dimy); + } + else if (cond_type->dimx != common_type->dimx || cond_type->dimy != common_type->dimy) + { + /* This condition looks wrong but is correct. + * floatN is compatible with float1xN, but not with floatNx1. */ - cond_string = hlsl_type_to_string(ctx, cond_type); - value_string = hlsl_type_to_string(ctx, common_type); - if (cond_string && value_string) - hlsl_error(ctx, &first->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Ternary condition type '%s' is not compatible with value type '%s'.", - cond_string->buffer, value_string->buffer); - hlsl_release_string_buffer(ctx, cond_string); - hlsl_release_string_buffer(ctx, value_string); + struct vkd3d_string_buffer *cond_string, *value_string; + + cond_string = hlsl_type_to_string(ctx, cond_type); + value_string = hlsl_type_to_string(ctx, common_type); + if (cond_string && value_string) + hlsl_error(ctx, &first->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Ternary condition type '%s' is not compatible with value type '%s'.", + cond_string->buffer, value_string->buffer); + hlsl_release_string_buffer(ctx, cond_string); + hlsl_release_string_buffer(ctx, value_string); + } } if (!(first = add_implicit_conversion(ctx, block, first, common_type, &first->loc))) @@ -4362,9 +4591,16 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, hlsl_release_string_buffer(ctx, second_string); } + cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL, + cond_type->dimx, cond_type->dimy); + if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc))) + return false; + common_type = first->data_type; } + assert(cond->data_type->e.numeric.type == HLSL_TYPE_BOOL); + args[0] = cond; args[1] = first; args[2] = second; @@ -4490,8 +4726,7 @@ static bool add_sample_method_call(struct hlsl_ctx *ctx, struct hlsl_block *bloc } sampler_type = params->args[0]->data_type; - if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER - || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) + if (sampler_type->class != HLSL_CLASS_SAMPLER || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) { struct vkd3d_string_buffer *string; @@ -4555,8 +4790,7 @@ static bool add_sample_cmp_method_call(struct hlsl_ctx *ctx, struct hlsl_block * } sampler_type = params->args[0]->data_type; - if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER - || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_COMPARISON) + if (sampler_type->class != HLSL_CLASS_SAMPLER || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_COMPARISON) { struct vkd3d_string_buffer *string; @@ -4666,8 +4900,7 @@ static bool add_gather_method_call(struct hlsl_ctx *ctx, struct hlsl_block *bloc } sampler_type = params->args[0]->data_type; - if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER - || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) + if (sampler_type->class != HLSL_CLASS_SAMPLER || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) { struct vkd3d_string_buffer *string; @@ -4689,7 +4922,7 @@ static bool add_gather_method_call(struct hlsl_ctx *ctx, struct hlsl_block *bloc hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) return false; - load_params.format = hlsl_get_vector_type(ctx, object_type->e.resource.format->base_type, 4); + load_params.format = hlsl_get_vector_type(ctx, object_type->e.resource.format->e.numeric.type, 4); load_params.resource = object; load_params.sampler = params->args[0]; @@ -4903,8 +5136,7 @@ static bool add_sample_lod_method_call(struct hlsl_ctx *ctx, struct hlsl_block * } sampler_type = params->args[0]->data_type; - if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER - || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) + if (sampler_type->class != HLSL_CLASS_SAMPLER || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) { struct vkd3d_string_buffer *string; @@ -4966,8 +5198,7 @@ static bool add_sample_grad_method_call(struct hlsl_ctx *ctx, struct hlsl_block } sampler_type = params->args[0]->data_type; - if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER - || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) + if (sampler_type->class != HLSL_CLASS_SAMPLER || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) { struct vkd3d_string_buffer *string; @@ -5051,8 +5282,7 @@ static bool add_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, stru const struct hlsl_type *object_type = object->data_type; const struct method_function *method; - if (object_type->class != HLSL_CLASS_OBJECT || object_type->base_type != HLSL_TYPE_TEXTURE - || object_type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC) + if (object_type->class != HLSL_CLASS_TEXTURE || object_type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC) { struct vkd3d_string_buffer *string; @@ -5193,6 +5423,16 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, hlsl_release_string_buffer(ctx, string); } +static bool state_block_add_entry(struct hlsl_state_block *state_block, struct hlsl_state_block_entry *entry) +{ + if (!vkd3d_array_reserve((void **)&state_block->entries, &state_block->capacity, state_block->count + 1, + sizeof(*state_block->entries))) + return false; + + state_block->entries[state_block->count++] = entry; + return true; +} + } %locations @@ -5233,6 +5473,8 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, struct parse_attribute_list attr_list; struct hlsl_ir_switch_case *switch_case; struct hlsl_scope *scope; + struct hlsl_state_block *state_block; + struct state_block_index state_block_index; } %token KW_BLENDSTATE @@ -5243,6 +5485,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, %token KW_CENTROID %token KW_COLUMN_MAJOR %token KW_COMPILE +%token KW_COMPUTESHADER %token KW_CONST %token KW_CONTINUE %token KW_DEFAULT @@ -5250,14 +5493,17 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, %token KW_DEPTHSTENCILVIEW %token KW_DISCARD %token KW_DO +%token KW_DOMAINSHADER %token KW_DOUBLE %token KW_ELSE +%token KW_EXPORT %token KW_EXTERN %token KW_FALSE %token KW_FOR %token KW_FXGROUP %token KW_GEOMETRYSHADER %token KW_GROUPSHARED +%token KW_HULLSHADER %token KW_IF %token KW_IN %token KW_INLINE @@ -5271,7 +5517,6 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, %token KW_PACKOFFSET %token KW_PASS %token KW_PIXELSHADER -%token KW_PRECISE %token KW_RASTERIZERORDEREDBUFFER %token KW_RASTERIZERORDEREDSTRUCTUREDBUFFER %token KW_RASTERIZERORDEREDTEXTURE1D @@ -5429,6 +5674,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, %type any_identifier %type var_identifier +%type stateblock_lhs_identifier %type name_opt %type parameter @@ -5436,13 +5682,17 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, %type param_list %type parameters -%type register_opt -%type packoffset_opt +%type register_reservation +%type packoffset_reservation %type texture_type texture_ms_type uav_type rov_type %type semantic +%type state_block + +%type state_block_index_opt + %type switch_case %type field_type @@ -5453,6 +5703,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, %type type_no_void %type typedef_type +%type state_block_list %type type_spec %type variable_decl %type variable_def @@ -5483,9 +5734,9 @@ name_opt: | any_identifier pass: - KW_PASS name_opt annotations_opt '{' '}' + KW_PASS name_opt annotations_opt '{' state_block_start state_block '}' { - if (!add_pass(ctx, $2, $3, &@1)) + if (!add_pass(ctx, $2, $3, $6, &@1)) YYABORT; } @@ -5535,10 +5786,6 @@ technique10: struct hlsl_scope *scope = ctx->cur_scope; hlsl_pop_scope(ctx); - if (ctx->profile->type == VKD3D_SHADER_TYPE_EFFECT && ctx->profile->major_version == 2) - hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, - "The 'technique10' keyword is invalid for this profile."); - if (!add_technique(ctx, $2, scope, $3, "technique10", &@1)) YYABORT; } @@ -5580,12 +5827,12 @@ effect_group: } buffer_declaration: - buffer_type any_identifier colon_attribute + var_modifiers buffer_type any_identifier colon_attribute annotations_opt { - if ($3.semantic.name) - hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "Semantics are not allowed on buffers."); + if ($4.semantic.name) + hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "Semantics are not allowed on buffers."); - if (!(ctx->cur_buffer = hlsl_new_buffer(ctx, $1, $2, &$3.reg_reservation, &@2))) + if (!(ctx->cur_buffer = hlsl_new_buffer(ctx, $2, $3, $1, &$4.reg_reservation, $5, &@3))) YYABORT; } @@ -5792,11 +6039,7 @@ attribute_list: $$ = $1; if (!(new_array = vkd3d_realloc($$.attrs, ($$.count + 1) * sizeof(*$$.attrs)))) { - unsigned int i; - - for (i = 0; i < $$.count; ++i) - hlsl_free_attribute((void *)$$.attrs[i]); - vkd3d_free($$.attrs); + cleanup_parse_attribute_list(&$$); YYABORT; } $$.attrs = new_array; @@ -5884,9 +6127,9 @@ func_prototype_no_attrs: /* Functions are unconditionally inlined. */ modifiers &= ~HLSL_MODIFIER_INLINE; - if (modifiers & ~HLSL_MODIFIERS_MAJORITY_MASK) + if (modifiers & ~(HLSL_MODIFIERS_MAJORITY_MASK | HLSL_MODIFIER_EXPORT)) hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, - "Only majority modifiers are allowed on functions."); + "Unexpected modifier used on a function."); if (!(type = apply_type_modifiers(ctx, $2, &modifiers, true, &@1))) YYABORT; if ((var = hlsl_get_var(ctx->globals, $3))) @@ -6002,11 +6245,7 @@ func_prototype: } else { - unsigned int i; - - for (i = 0; i < $1.count; ++i) - hlsl_free_attribute((void *)$1.attrs[i]); - vkd3d_free($1.attrs); + cleanup_parse_attribute_list(&$1); } $$ = $2; } @@ -6060,12 +6299,12 @@ colon_attribute: $$.reg_reservation.reg_type = 0; $$.reg_reservation.offset_type = 0; } - | register_opt + | register_reservation { $$.semantic = (struct hlsl_semantic){0}; $$.reg_reservation = $1; } - | packoffset_opt + | packoffset_reservation { $$.semantic = (struct hlsl_semantic){0}; $$.reg_reservation = $1; @@ -6087,22 +6326,57 @@ semantic: } /* FIXME: Writemasks */ -register_opt: +register_reservation: ':' KW_REGISTER '(' any_identifier ')' { - $$ = parse_reg_reservation($4); + memset(&$$, 0, sizeof($$)); + if (!parse_reservation_index($4, &$$.reg_type, &$$.reg_index)) + hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Invalid register reservation '%s'.", $4); + vkd3d_free($4); } | ':' KW_REGISTER '(' any_identifier ',' any_identifier ')' { - FIXME("Ignoring shader target %s in a register reservation.\n", debugstr_a($4)); - vkd3d_free($4); + memset(&$$, 0, sizeof($$)); + if (parse_reservation_index($6, &$$.reg_type, &$$.reg_index)) + { + hlsl_fixme(ctx, &@4, "Reservation shader target %s.", $4); + } + else if (parse_reservation_space($6, &$$.reg_space)) + { + if (!parse_reservation_index($4, &$$.reg_type, &$$.reg_index)) + hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Invalid register reservation '%s'.", $4); + } + else + { + hlsl_error(ctx, &@6, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Invalid register or space reservation '%s'.", $6); + } - $$ = parse_reg_reservation($6); + vkd3d_free($4); vkd3d_free($6); } + | ':' KW_REGISTER '(' any_identifier ',' any_identifier ',' any_identifier ')' + { + hlsl_fixme(ctx, &@4, "Reservation shader target %s.", $4); -packoffset_opt: + memset(&$$, 0, sizeof($$)); + if (!parse_reservation_index($6, &$$.reg_type, &$$.reg_index)) + hlsl_error(ctx, &@6, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Invalid register reservation '%s'.", $6); + + if (!parse_reservation_space($8, &$$.reg_space)) + hlsl_error(ctx, &@8, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Invalid register space reservation '%s'.", $8); + + vkd3d_free($4); + vkd3d_free($6); + vkd3d_free($8); + } + +packoffset_reservation: ':' KW_PACKOFFSET '(' any_identifier ')' { $$ = parse_packoffset(ctx, $4, NULL, &@$); @@ -6307,7 +6581,7 @@ type_no_void: YYABORT; } - $$ = hlsl_type_clone(ctx, hlsl_get_vector_type(ctx, $3->base_type, $5), 0, 0); + $$ = hlsl_type_clone(ctx, hlsl_get_vector_type(ctx, $3->e.numeric.type, $5), 0, 0); $$->is_minimum_precision = $3->is_minimum_precision; } | KW_VECTOR @@ -6340,7 +6614,7 @@ type_no_void: YYABORT; } - $$ = hlsl_type_clone(ctx, hlsl_get_matrix_type(ctx, $3->base_type, $7, $5), 0, 0); + $$ = hlsl_type_clone(ctx, hlsl_get_matrix_type(ctx, $3->e.numeric.type, $7, $5), 0, 0); $$->is_minimum_precision = $3->is_minimum_precision; } | KW_MATRIX @@ -6388,7 +6662,7 @@ type_no_void: { validate_texture_format_type(ctx, $3, &@3); - if (shader_profile_version_lt(ctx, 4, 1)) + if (hlsl_version_lt(ctx, 4, 1)) { hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Multisampled texture object declaration needs sample count for profile %s.", ctx->profile->name); @@ -6427,7 +6701,7 @@ type_no_void: $$ = hlsl_get_type(ctx->cur_scope, $1, true, true); if ($$->is_minimum_precision) { - if (shader_profile_version_lt(ctx, 4, 0)) + if (hlsl_version_lt(ctx, 4, 0)) { hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Target profile doesn't support minimum-precision types."); @@ -6454,6 +6728,14 @@ type_no_void: { $$ = hlsl_get_type(ctx->cur_scope, "DepthStencilView", true, true); } + | KW_VERTEXSHADER + { + $$ = hlsl_get_type(ctx->cur_scope, "VertexShader", true, true); + } + | KW_PIXELSHADER + { + $$ = hlsl_get_type(ctx->cur_scope, "PixelShader", true, true); + } type: type_no_void @@ -6583,22 +6865,97 @@ variable_decl: $$->reg_reservation = $3.reg_reservation; } -state: - any_identifier '=' expr ';' - { - vkd3d_free($1); - destroy_block($3); - } - state_block_start: %empty { ctx->in_state_block = 1; } +stateblock_lhs_identifier: + any_identifier + { + $$ = $1; + } + | KW_PIXELSHADER + { + if (!($$ = hlsl_strdup(ctx, "pixelshader"))) + YYABORT; + } + | KW_VERTEXSHADER + { + if (!($$ = hlsl_strdup(ctx, "vertexshader"))) + YYABORT; + } + +state_block_index_opt: + %empty + { + $$.has_index = false; + $$.index = 0; + } + | '[' C_INTEGER ']' + { + if ($2 < 0) + { + hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_INDEX, + "State block array index is not a positive integer constant."); + YYABORT; + } + $$.has_index = true; + $$.index = $2; + } + state_block: %empty - | state_block state + { + if (!($$ = hlsl_alloc(ctx, sizeof(*$$)))) + YYABORT; + } + | state_block stateblock_lhs_identifier state_block_index_opt '=' complex_initializer ';' + { + struct hlsl_state_block_entry *entry; + unsigned int i; + + if (!(entry = hlsl_alloc(ctx, sizeof(*entry)))) + YYABORT; + + entry->name = $2; + entry->lhs_has_index = $3.has_index; + entry->lhs_index = $3.index; + + entry->instrs = $5.instrs; + + entry->args_count = $5.args_count; + if (!(entry->args = hlsl_alloc(ctx, sizeof(*entry->args) * entry->args_count))) + YYABORT; + for (i = 0; i < entry->args_count; ++i) + hlsl_src_from_node(&entry->args[i], $5.args[i]); + vkd3d_free($5.args); + + $$ = $1; + state_block_add_entry($$, entry); + } + +state_block_list: + '{' state_block '}' + { + if (!($$ = hlsl_alloc(ctx, sizeof(*$$)))) + YYABORT; + + if(!(vkd3d_array_reserve((void **)&$$->state_blocks, &$$->state_block_capacity, + $$->state_block_count + 1, sizeof(*$$->state_blocks)))) + YYABORT; + $$->state_blocks[$$->state_block_count++] = $2; + } + | state_block_list ',' '{' state_block '}' + { + $$ = $1; + + if(!(vkd3d_array_reserve((void **)&$$->state_blocks, &$$->state_block_capacity, + $$->state_block_count + 1, sizeof(*$$->state_blocks)))) + YYABORT; + $$->state_blocks[$$->state_block_count++] = $4; + } variable_def: variable_decl @@ -6611,6 +6968,24 @@ variable_def: { $$ = $1; ctx->in_state_block = 0; + + if(!(vkd3d_array_reserve((void **)&$$->state_blocks, &$$->state_block_capacity, + $$->state_block_count + 1, sizeof(*$$->state_blocks)))) + YYABORT; + $$->state_blocks[$$->state_block_count++] = $4; + } + | variable_decl '{' state_block_start state_block_list '}' + { + $$ = $1; + ctx->in_state_block = 0; + + $$->state_blocks = $4->state_blocks; + $$->state_block_count = $4->state_block_count; + $$->state_block_capacity = $4->state_block_capacity; + $4->state_blocks = NULL; + $4->state_block_count = 0; + $4->state_block_capacity = 0; + free_parse_variable_def($4); } variable_def_typed: @@ -6727,10 +7102,6 @@ var_modifiers: { $$ = add_modifiers(ctx, $2, HLSL_STORAGE_NOPERSPECTIVE, &@1); } - | KW_PRECISE var_modifiers - { - $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_PRECISE, &@1); - } | KW_SHARED var_modifiers { $$ = add_modifiers(ctx, $2, HLSL_STORAGE_SHARED, &@1); @@ -6779,7 +7150,20 @@ var_modifiers: { $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_INLINE, &@1); } - + | KW_EXPORT var_modifiers + { + $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_EXPORT, &@1); + } + | var_identifier var_modifiers + { + if (!strcmp($1, "precise")) + $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_PRECISE, &@1); + else if (!strcmp($1, "single")) + $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_SINGLE, &@1); + else + hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_UNKNOWN_MODIFIER, + "Unknown modifier %s.", debugstr_a($1)); + } complex_initializer: initializer_expr @@ -6978,6 +7362,7 @@ selection_statement: { destroy_block($6.then_block); destroy_block($6.else_block); + cleanup_parse_attribute_list(&$1); YYABORT; } @@ -6985,10 +7370,12 @@ selection_statement: { destroy_block($6.then_block); destroy_block($6.else_block); + cleanup_parse_attribute_list(&$1); YYABORT; } destroy_block($6.then_block); destroy_block($6.else_block); + cleanup_parse_attribute_list(&$1); $$ = $4; hlsl_block_add_instr($$, instr); @@ -7011,21 +7398,25 @@ loop_statement: { $$ = create_loop(ctx, LOOP_WHILE, &$1, NULL, $5, NULL, $7, &@3); hlsl_pop_scope(ctx); + cleanup_parse_attribute_list(&$1); } | attribute_list_optional loop_scope_start KW_DO statement KW_WHILE '(' expr ')' ';' { $$ = create_loop(ctx, LOOP_DO_WHILE, &$1, NULL, $7, NULL, $4, &@3); hlsl_pop_scope(ctx); + cleanup_parse_attribute_list(&$1); } | attribute_list_optional loop_scope_start KW_FOR '(' expr_statement expr_statement expr_optional ')' statement { $$ = create_loop(ctx, LOOP_FOR, &$1, $5, $6, $7, $9, &@3); hlsl_pop_scope(ctx); + cleanup_parse_attribute_list(&$1); } | attribute_list_optional loop_scope_start KW_FOR '(' declaration expr_statement expr_optional ')' statement { $$ = create_loop(ctx, LOOP_FOR, &$1, $5, $6, $7, $9, &@3); hlsl_pop_scope(ctx); + cleanup_parse_attribute_list(&$1); } switch_statement: @@ -7038,6 +7429,7 @@ switch_statement: { destroy_switch_cases($8); destroy_block($5); + cleanup_parse_attribute_list(&$1); YYABORT; } @@ -7048,6 +7440,7 @@ switch_statement: if (!s) { destroy_block($5); + cleanup_parse_attribute_list(&$1); YYABORT; } @@ -7055,6 +7448,7 @@ switch_statement: hlsl_block_add_instr($$, s); hlsl_pop_scope(ctx); + cleanup_parse_attribute_list(&$1); } switch_case: @@ -7227,15 +7621,13 @@ primary_expr: { if (ctx->in_state_block) { - struct hlsl_ir_load *load; - struct hlsl_ir_var *var; + struct hlsl_ir_node *constant; - if (!(var = hlsl_new_synthetic_var(ctx, "state_block_expr", - hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), &@1))) + if (!(constant = hlsl_new_stateblock_constant(ctx, $1, &@1))) YYABORT; - if (!(load = hlsl_new_var_load(ctx, var, &@1))) - YYABORT; - if (!($$ = make_block(ctx, &load->node))) + vkd3d_free($1); + + if (!($$ = make_block(ctx, constant))) YYABORT; } else diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c index 307f86f55b7..bdb72a1fab9 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c @@ -263,8 +263,8 @@ static bool types_are_semantic_equivalent(struct hlsl_ctx *ctx, const struct hls if (type1->dimx != type2->dimx) return false; - return base_type_get_semantic_equivalent(type1->base_type) - == base_type_get_semantic_equivalent(type2->base_type); + return base_type_get_semantic_equivalent(type1->e.numeric.type) + == base_type_get_semantic_equivalent(type2->e.numeric.type); } static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, @@ -355,10 +355,10 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s if (!semantic->name) return; - vector_type_dst = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type)); + vector_type_dst = hlsl_get_vector_type(ctx, type->e.numeric.type, hlsl_type_minor_size(type)); vector_type_src = vector_type_dst; if (ctx->profile->major_version < 4 && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) - vector_type_src = hlsl_get_vector_type(ctx, type->base_type, 4); + vector_type_src = hlsl_get_vector_type(ctx, type->e.numeric.type, 4); for (i = 0; i < hlsl_type_major_size(type); ++i) { @@ -427,7 +427,10 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block * { field = &type->e.record.fields[i]; if (hlsl_type_is_resource(field->type)) + { + hlsl_fixme(ctx, &field->loc, "Prepend uniform copies for resource components within structs."); continue; + } validate_field_semantic(ctx, field); semantic = &field->semantic; elem_semantic_index = semantic->index; @@ -497,7 +500,7 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s if (!semantic->name) return; - vector_type = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type)); + vector_type = hlsl_get_vector_type(ctx, type->e.numeric.type, hlsl_type_minor_size(type)); for (i = 0; i < hlsl_type_major_size(type); ++i) { @@ -1098,7 +1101,7 @@ static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_ir_node *resource_load; assert(coords->data_type->class == HLSL_CLASS_VECTOR); - assert(coords->data_type->base_type == HLSL_TYPE_UINT); + assert(coords->data_type->e.numeric.type == HLSL_TYPE_UINT); assert(coords->data_type->dimx == dim_count); if (!(coords = add_zero_mipmap_level(ctx, coords, &instr->loc))) @@ -1188,7 +1191,7 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, s { struct hlsl_ir_node *new_cast, *swizzle; - dst_scalar_type = hlsl_get_scalar_type(ctx, dst_type->base_type); + dst_scalar_type = hlsl_get_scalar_type(ctx, dst_type->e.numeric.type); /* We need to preserve the cast since it might be doing more than just * turning the scalar into a vector. */ if (!(new_cast = hlsl_new_cast(ctx, cast->operands[0].node, dst_scalar_type, &cast->node.loc))) @@ -1562,7 +1565,7 @@ static bool copy_propagation_replace_with_single_instr(struct hlsl_ctx *ctx, var->name, start, start + count, debug_hlsl_swizzle(swizzle, instr_component_count), new_instr, debug_hlsl_swizzle(ret_swizzle, instr_component_count)); - if (instr->data_type->class != HLSL_CLASS_OBJECT) + if (new_instr->data_type->class == HLSL_CLASS_SCALAR || new_instr->data_type->class == HLSL_CLASS_VECTOR) { struct hlsl_ir_node *swizzle_node; @@ -1622,7 +1625,11 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, { case HLSL_CLASS_SCALAR: case HLSL_CLASS_VECTOR: - case HLSL_CLASS_OBJECT: + case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_TEXTURE: + case HLSL_CLASS_UAV: + case HLSL_CLASS_VERTEX_SHADER: break; case HLSL_CLASS_MATRIX: @@ -1631,6 +1638,15 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, /* FIXME: Actually we shouldn't even get here, but we don't split * matrices yet. */ return false; + + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_PASS: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_STRING: + case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_VOID: + vkd3d_unreachable(); } if (copy_propagation_replace_with_constant_vector(ctx, state, load, HLSL_SWIZZLE(X, Y, Z, W), &load->node)) @@ -1739,7 +1755,7 @@ static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_s { unsigned int writemask = store->writemask; - if (store->rhs.node->data_type->class == HLSL_CLASS_OBJECT) + if (!hlsl_is_numeric_type(store->rhs.node->data_type)) writemask = VKD3DSP_WRITEMASK_0; copy_propagation_set_value(ctx, var_def, start, writemask, store->rhs.node, store->node.index); } @@ -2049,7 +2065,7 @@ static bool fold_redundant_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst src_type = expr->operands[0].node->data_type; if (hlsl_types_are_equal(src_type, dst_type) - || (src_type->base_type == dst_type->base_type && is_vec1(src_type) && is_vec1(dst_type))) + || (src_type->e.numeric.type == dst_type->e.numeric.type && is_vec1(src_type) && is_vec1(dst_type))) { hlsl_replace_node(&expr->node, expr->operands[0].node); return true; @@ -2176,7 +2192,7 @@ static bool split_matrix_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr type = rhs->data_type; if (type->class != HLSL_CLASS_MATRIX) return false; - element_type = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type)); + element_type = hlsl_get_vector_type(ctx, type->e.numeric.type, hlsl_type_minor_size(type)); if (rhs->type != HLSL_IR_LOAD) { @@ -2213,7 +2229,7 @@ static bool lower_narrowing_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins { struct hlsl_ir_node *new_cast, *swizzle; - dst_vector_type = hlsl_get_vector_type(ctx, dst_type->base_type, src_type->dimx); + dst_vector_type = hlsl_get_vector_type(ctx, dst_type->e.numeric.type, src_type->dimx); /* We need to preserve the cast since it might be doing more than just * narrowing the vector. */ if (!(new_cast = hlsl_new_cast(ctx, cast->operands[0].node, dst_vector_type, &cast->node.loc))) @@ -2467,7 +2483,7 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir op = HLSL_OP2_DOT; if (type->dimx == 1) - op = type->base_type == HLSL_TYPE_BOOL ? HLSL_OP2_LOGIC_AND : HLSL_OP2_MUL; + op = type->e.numeric.type == HLSL_TYPE_BOOL ? HLSL_OP2_LOGIC_AND : HLSL_OP2_MUL; /* Note: We may be creating a DOT for bool vectors here, which we need to lower to * LOGIC_OR + LOGIC_AND. */ @@ -2603,8 +2619,8 @@ static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *in hlsl_copy_deref(ctx, &load->sampler, &load->resource); load->resource.var = var; - assert(hlsl_deref_get_type(ctx, &load->resource)->base_type == HLSL_TYPE_TEXTURE); - assert(hlsl_deref_get_type(ctx, &load->sampler)->base_type == HLSL_TYPE_SAMPLER); + assert(hlsl_deref_get_type(ctx, &load->resource)->class == HLSL_CLASS_TEXTURE); + assert(hlsl_deref_get_type(ctx, &load->sampler)->class == HLSL_CLASS_SAMPLER); return true; } @@ -2647,10 +2663,11 @@ static bool sort_synthetic_separated_samplers_first(struct hlsl_ctx *ctx) return false; } -/* Append a FLOOR before a CAST to int or uint (which is written as a mere MOV). */ +/* Turn CAST to int or uint into FLOOR + REINTERPRET (which is written as a mere MOV). */ static bool lower_casts_to_int(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { - struct hlsl_ir_node *arg, *floor, *cast2; + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }; + struct hlsl_ir_node *arg, *floor, *res; struct hlsl_ir_expr *expr; if (instr->type != HLSL_IR_EXPR) @@ -2660,22 +2677,20 @@ static bool lower_casts_to_int(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, return false; arg = expr->operands[0].node; - if (instr->data_type->base_type != HLSL_TYPE_INT && instr->data_type->base_type != HLSL_TYPE_UINT) + if (instr->data_type->e.numeric.type != HLSL_TYPE_INT && instr->data_type->e.numeric.type != HLSL_TYPE_UINT) return false; - if (arg->data_type->base_type != HLSL_TYPE_FLOAT && arg->data_type->base_type != HLSL_TYPE_HALF) - return false; - - /* Check that the argument is not already a FLOOR */ - if (arg->type == HLSL_IR_EXPR && hlsl_ir_expr(arg)->op == HLSL_OP1_FLOOR) + if (arg->data_type->e.numeric.type != HLSL_TYPE_FLOAT && arg->data_type->e.numeric.type != HLSL_TYPE_HALF) return false; if (!(floor = hlsl_new_unary_expr(ctx, HLSL_OP1_FLOOR, arg, &instr->loc))) return false; hlsl_block_add_instr(block, floor); - if (!(cast2 = hlsl_new_cast(ctx, floor, instr->data_type, &instr->loc))) + memset(operands, 0, sizeof(operands)); + operands[0] = floor; + if (!(res = hlsl_new_expr(ctx, HLSL_OP1_REINTERPRET, operands, instr->data_type, &instr->loc))) return false; - hlsl_block_add_instr(block, cast2); + hlsl_block_add_instr(block, res); return true; } @@ -2903,12 +2918,60 @@ static bool lower_floor(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct return true; } -/* Use 'movc' for the ternary operator. */ +static bool lower_logic_not(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +{ + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS]; + struct hlsl_ir_node *arg, *arg_cast, *neg, *one, *sub, *res; + struct hlsl_constant_value one_value; + struct hlsl_type *float_type; + struct hlsl_ir_expr *expr; + + if (instr->type != HLSL_IR_EXPR) + return false; + expr = hlsl_ir_expr(instr); + if (expr->op != HLSL_OP1_LOGIC_NOT) + return false; + + arg = expr->operands[0].node; + float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, arg->data_type->dimx); + + /* If this is happens, it means we failed to cast the argument to boolean somewhere. */ + assert(arg->data_type->e.numeric.type == HLSL_TYPE_BOOL); + + if (!(arg_cast = hlsl_new_cast(ctx, arg, float_type, &arg->loc))) + return false; + hlsl_block_add_instr(block, arg_cast); + + if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg_cast, &instr->loc))) + return false; + hlsl_block_add_instr(block, neg); + + one_value.u[0].f = 1.0; + one_value.u[1].f = 1.0; + one_value.u[2].f = 1.0; + one_value.u[3].f = 1.0; + if (!(one = hlsl_new_constant(ctx, float_type, &one_value, &instr->loc))) + return false; + hlsl_block_add_instr(block, one); + + if (!(sub = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, one, neg))) + return false; + hlsl_block_add_instr(block, sub); + + memset(operands, 0, sizeof(operands)); + operands[0] = sub; + if (!(res = hlsl_new_expr(ctx, HLSL_OP1_REINTERPRET, operands, instr->data_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, res); + + return true; +} + +/* Lower TERNARY to CMP for SM1. */ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }, *replacement; - struct hlsl_ir_node *zero, *cond, *first, *second; - struct hlsl_constant_value zero_value = { 0 }; + struct hlsl_ir_node *cond, *first, *second, *float_cond, *neg; struct hlsl_ir_expr *expr; struct hlsl_type *type; @@ -2925,59 +2988,286 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru if (cond->data_type->class > HLSL_CLASS_VECTOR || instr->data_type->class > HLSL_CLASS_VECTOR) { - hlsl_fixme(ctx, &instr->loc, "Lower ternary of type other than scalar or vector.\n"); + hlsl_fixme(ctx, &instr->loc, "Lower ternary of type other than scalar or vector."); return false; } - if (ctx->profile->major_version < 4 && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) - { - struct hlsl_ir_node *abs, *neg; + assert(cond->data_type->e.numeric.type == HLSL_TYPE_BOOL); - if (!(abs = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, cond, &instr->loc))) - return false; - hlsl_block_add_instr(block, abs); + type = hlsl_get_numeric_type(ctx, instr->data_type->class, HLSL_TYPE_FLOAT, + instr->data_type->dimx, instr->data_type->dimy); - if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, abs, &instr->loc))) - return false; - hlsl_block_add_instr(block, neg); - - operands[0] = neg; - operands[1] = second; - operands[2] = first; - if (!(replacement = hlsl_new_expr(ctx, HLSL_OP3_CMP, operands, first->data_type, &instr->loc))) - return false; - } - else if (ctx->profile->major_version < 4 && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) - { - hlsl_fixme(ctx, &instr->loc, "Ternary operator is not implemented for %s profile.", ctx->profile->name); + if (!(float_cond = hlsl_new_cast(ctx, cond, type, &instr->loc))) return false; + hlsl_block_add_instr(block, float_cond); + + if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, float_cond, &instr->loc))) + return false; + hlsl_block_add_instr(block, neg); + + memset(operands, 0, sizeof(operands)); + operands[0] = neg; + operands[1] = second; + operands[2] = first; + if (!(replacement = hlsl_new_expr(ctx, HLSL_OP3_CMP, operands, first->data_type, &instr->loc))) + return false; + + hlsl_block_add_instr(block, replacement); + return true; +} + +static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + struct hlsl_block *block) +{ + struct hlsl_ir_node *arg1, *arg1_cast, *arg2, *arg2_cast, *slt, *res, *ret; + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS]; + struct hlsl_type *float_type; + struct hlsl_ir_expr *expr; + bool negate = false; + + if (instr->type != HLSL_IR_EXPR) + return false; + expr = hlsl_ir_expr(instr); + if (expr->op != HLSL_OP2_EQUAL && expr->op != HLSL_OP2_NEQUAL && expr->op != HLSL_OP2_LESS + && expr->op != HLSL_OP2_GEQUAL) + return false; + + arg1 = expr->operands[0].node; + arg2 = expr->operands[1].node; + float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx); + + if (!(arg1_cast = hlsl_new_cast(ctx, arg1, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, arg1_cast); + + if (!(arg2_cast = hlsl_new_cast(ctx, arg2, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, arg2_cast); + + switch (expr->op) + { + case HLSL_OP2_EQUAL: + case HLSL_OP2_NEQUAL: + { + struct hlsl_ir_node *neg, *sub, *abs, *abs_neg; + + if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg2_cast, &instr->loc))) + return false; + hlsl_block_add_instr(block, neg); + + if (!(sub = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, arg1_cast, neg))) + return false; + hlsl_block_add_instr(block, sub); + + if (ctx->profile->major_version >= 3) + { + if (!(abs = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, sub, &instr->loc))) + return false; + hlsl_block_add_instr(block, abs); + } + else + { + /* Use MUL as a precarious ABS. */ + if (!(abs = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, sub, sub))) + return false; + hlsl_block_add_instr(block, abs); + } + + if (!(abs_neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, abs, &instr->loc))) + return false; + hlsl_block_add_instr(block, abs_neg); + + if (!(slt = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, abs_neg, abs))) + return false; + hlsl_block_add_instr(block, slt); + + negate = (expr->op == HLSL_OP2_EQUAL); + break; + } + + case HLSL_OP2_GEQUAL: + case HLSL_OP2_LESS: + { + if (!(slt = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, arg1_cast, arg2_cast))) + return false; + hlsl_block_add_instr(block, slt); + + negate = (expr->op == HLSL_OP2_GEQUAL); + break; + } + + default: + vkd3d_unreachable(); + } + + if (negate) + { + struct hlsl_constant_value one_value; + struct hlsl_ir_node *one, *slt_neg; + + one_value.u[0].f = 1.0; + one_value.u[1].f = 1.0; + one_value.u[2].f = 1.0; + one_value.u[3].f = 1.0; + if (!(one = hlsl_new_constant(ctx, float_type, &one_value, &instr->loc))) + return false; + hlsl_block_add_instr(block, one); + + if (!(slt_neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, slt, &instr->loc))) + return false; + hlsl_block_add_instr(block, slt_neg); + + if (!(res = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, one, slt_neg))) + return false; + hlsl_block_add_instr(block, res); } else { - if (cond->data_type->base_type == HLSL_TYPE_FLOAT) - { - if (!(zero = hlsl_new_constant(ctx, cond->data_type, &zero_value, &instr->loc))) - return false; - hlsl_block_add_instr(block, zero); - - operands[0] = zero; - operands[1] = cond; - type = cond->data_type; - type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL, type->dimx, type->dimy); - if (!(cond = hlsl_new_expr(ctx, HLSL_OP2_NEQUAL, operands, type, &instr->loc))) - return false; - hlsl_block_add_instr(block, cond); - } - - memset(operands, 0, sizeof(operands)); - operands[0] = cond; - operands[1] = first; - operands[2] = second; - if (!(replacement = hlsl_new_expr(ctx, HLSL_OP3_MOVC, operands, first->data_type, &instr->loc))) - return false; + res = slt; } - hlsl_block_add_instr(block, replacement); + /* We need a REINTERPRET so that the HLSL IR code is valid. SLT and its arguments must be FLOAT, + * and casts to BOOL have already been lowered to "!= 0". */ + memset(operands, 0, sizeof(operands)); + operands[0] = res; + if (!(ret = hlsl_new_expr(ctx, HLSL_OP1_REINTERPRET, operands, instr->data_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, ret); + + return true; +} + +/* Intended to be used for SM1-SM3, lowers SLT instructions (only available in vertex shaders) to + * CMP instructions (only available in pixel shaders). + * Based on the following equivalence: + * SLT(x, y) + * = (x < y) ? 1.0 : 0.0 + * = ((x - y) >= 0) ? 0.0 : 1.0 + * = CMP(x - y, 0.0, 1.0) + */ +static bool lower_slt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +{ + struct hlsl_ir_node *arg1, *arg2, *arg1_cast, *arg2_cast, *neg, *sub, *zero, *one, *cmp; + struct hlsl_constant_value zero_value, one_value; + struct hlsl_type *float_type; + struct hlsl_ir_expr *expr; + + if (instr->type != HLSL_IR_EXPR) + return false; + expr = hlsl_ir_expr(instr); + if (expr->op != HLSL_OP2_SLT) + return false; + + arg1 = expr->operands[0].node; + arg2 = expr->operands[1].node; + float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx); + + if (!(arg1_cast = hlsl_new_cast(ctx, arg1, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, arg1_cast); + + if (!(arg2_cast = hlsl_new_cast(ctx, arg2, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, arg2_cast); + + if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg2_cast, &instr->loc))) + return false; + hlsl_block_add_instr(block, neg); + + if (!(sub = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, arg1_cast, neg))) + return false; + hlsl_block_add_instr(block, sub); + + memset(&zero_value, 0, sizeof(zero_value)); + if (!(zero = hlsl_new_constant(ctx, float_type, &zero_value, &instr->loc))) + return false; + hlsl_block_add_instr(block, zero); + + one_value.u[0].f = 1.0; + one_value.u[1].f = 1.0; + one_value.u[2].f = 1.0; + one_value.u[3].f = 1.0; + if (!(one = hlsl_new_constant(ctx, float_type, &one_value, &instr->loc))) + return false; + hlsl_block_add_instr(block, one); + + if (!(cmp = hlsl_new_ternary_expr(ctx, HLSL_OP3_CMP, sub, zero, one))) + return false; + hlsl_block_add_instr(block, cmp); + + return true; +} + +/* Intended to be used for SM1-SM3, lowers CMP instructions (only available in pixel shaders) to + * SLT instructions (only available in vertex shaders). + * Based on the following equivalence: + * CMP(x, y, z) + * = (x >= 0) ? y : z + * = z * ((x < 0) ? 1.0 : 0.0) + y * ((x < 0) ? 0.0 : 1.0) + * = z * SLT(x, 0.0) + y * (1 - SLT(x, 0.0)) + */ +static bool lower_cmp(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +{ + struct hlsl_ir_node *args[3], *args_cast[3], *slt, *neg_slt, *sub, *zero, *one, *mul1, *mul2, *add; + struct hlsl_constant_value zero_value, one_value; + struct hlsl_type *float_type; + struct hlsl_ir_expr *expr; + unsigned int i; + + if (instr->type != HLSL_IR_EXPR) + return false; + expr = hlsl_ir_expr(instr); + if (expr->op != HLSL_OP3_CMP) + return false; + + float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx); + + for (i = 0; i < 3; ++i) + { + args[i] = expr->operands[i].node; + + if (!(args_cast[i] = hlsl_new_cast(ctx, args[i], float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, args_cast[i]); + } + + memset(&zero_value, 0, sizeof(zero_value)); + if (!(zero = hlsl_new_constant(ctx, float_type, &zero_value, &instr->loc))) + return false; + hlsl_block_add_instr(block, zero); + + one_value.u[0].f = 1.0; + one_value.u[1].f = 1.0; + one_value.u[2].f = 1.0; + one_value.u[3].f = 1.0; + if (!(one = hlsl_new_constant(ctx, float_type, &one_value, &instr->loc))) + return false; + hlsl_block_add_instr(block, one); + + if (!(slt = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, args_cast[0], zero))) + return false; + hlsl_block_add_instr(block, slt); + + if (!(mul1 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, args_cast[2], slt))) + return false; + hlsl_block_add_instr(block, mul1); + + if (!(neg_slt = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, slt, &instr->loc))) + return false; + hlsl_block_add_instr(block, neg_slt); + + if (!(sub = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, one, neg_slt))) + return false; + hlsl_block_add_instr(block, sub); + + if (!(mul2 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, args_cast[1], sub))) + return false; + hlsl_block_add_instr(block, mul2); + + if (!(add = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, mul1, mul2))) + return false; + hlsl_block_add_instr(block, add); + return true; } @@ -2996,7 +3286,7 @@ static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr arg_type = expr->operands[0].node->data_type; if (type->class > HLSL_CLASS_VECTOR || arg_type->class > HLSL_CLASS_VECTOR) return false; - if (type->base_type != HLSL_TYPE_BOOL) + if (type->e.numeric.type != HLSL_TYPE_BOOL) return false; /* Narrowing casts should have already been lowered. */ @@ -3018,11 +3308,21 @@ static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_block *instrs, struct hlsl_ir_node *condition, struct hlsl_ir_node *if_true, struct hlsl_ir_node *if_false) { + struct hlsl_type *cond_type = condition->data_type; struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS]; struct hlsl_ir_node *cond; assert(hlsl_types_are_equal(if_true->data_type, if_false->data_type)); + if (cond_type->e.numeric.type != HLSL_TYPE_BOOL) + { + cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL, cond_type->dimx, cond_type->dimy); + + if (!(condition = hlsl_new_cast(ctx, condition, cond_type, &condition->loc))) + return NULL; + hlsl_block_add_instr(instrs, condition); + } + operands[0] = condition; operands[1] = if_true; operands[2] = if_false; @@ -3050,7 +3350,7 @@ static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, return false; if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR) return false; - if (type->base_type != HLSL_TYPE_INT) + if (type->e.numeric.type != HLSL_TYPE_INT) return false; utype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->dimx, type->dimy); @@ -3116,7 +3416,7 @@ static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, return false; if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR) return false; - if (type->base_type != HLSL_TYPE_INT) + if (type->e.numeric.type != HLSL_TYPE_INT) return false; utype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->dimx, type->dimy); @@ -3175,7 +3475,7 @@ static bool lower_int_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru return false; if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR) return false; - if (type->base_type != HLSL_TYPE_INT) + if (type->e.numeric.type != HLSL_TYPE_INT) return false; arg = expr->operands[0].node; @@ -3206,14 +3506,14 @@ static bool lower_int_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru if (expr->op != HLSL_OP2_DOT) return false; - if (type->base_type == HLSL_TYPE_INT || type->base_type == HLSL_TYPE_UINT - || type->base_type == HLSL_TYPE_BOOL) + if (type->e.numeric.type == HLSL_TYPE_INT || type->e.numeric.type == HLSL_TYPE_UINT + || type->e.numeric.type == HLSL_TYPE_BOOL) { arg1 = expr->operands[0].node; arg2 = expr->operands[1].node; assert(arg1->data_type->dimx == arg2->data_type->dimx); dimx = arg1->data_type->dimx; - is_bool = type->base_type == HLSL_TYPE_BOOL; + is_bool = type->e.numeric.type == HLSL_TYPE_BOOL; if (!(mult = hlsl_new_binary_expr(ctx, is_bool ? HLSL_OP2_LOGIC_AND : HLSL_OP2_MUL, arg1, arg2))) return false; @@ -3259,7 +3559,7 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr return false; if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR) return false; - if (type->base_type != HLSL_TYPE_FLOAT) + if (type->e.numeric.type != HLSL_TYPE_FLOAT) return false; btype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL, type->dimx, type->dimy); @@ -3308,6 +3608,63 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr return true; } +static bool lower_nonfloat_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +{ + struct hlsl_ir_expr *expr; + + if (instr->type != HLSL_IR_EXPR) + return false; + expr = hlsl_ir_expr(instr); + if (expr->op == HLSL_OP1_CAST || instr->data_type->e.numeric.type == HLSL_TYPE_FLOAT) + return false; + + switch (expr->op) + { + case HLSL_OP1_ABS: + case HLSL_OP1_NEG: + case HLSL_OP2_ADD: + case HLSL_OP2_DIV: + case HLSL_OP2_LOGIC_AND: + case HLSL_OP2_LOGIC_OR: + case HLSL_OP2_MAX: + case HLSL_OP2_MIN: + case HLSL_OP2_MUL: + { + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_ir_node *arg, *arg_cast, *float_expr, *ret; + struct hlsl_type *float_type; + unsigned int i; + + for (i = 0; i < HLSL_MAX_OPERANDS; ++i) + { + arg = expr->operands[i].node; + if (!arg) + continue; + + float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, arg->data_type->dimx); + if (!(arg_cast = hlsl_new_cast(ctx, arg, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, arg_cast); + + operands[i] = arg_cast; + } + + float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx); + if (!(float_expr = hlsl_new_expr(ctx, expr->op, operands, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, float_expr); + + if (!(ret = hlsl_new_cast(ctx, float_expr, instr->data_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, ret); + + return true; + } + default: + return false; + } +} + static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { struct hlsl_ir_node *zero, *bool_false, *or, *cmp, *load; @@ -3402,6 +3759,9 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) case HLSL_IR_RESOURCE_STORE: case HLSL_IR_SWITCH: break; + case HLSL_IR_STATEBLOCK_CONSTANT: + /* Stateblock constants should not appear in the shader program. */ + vkd3d_unreachable(); } return false; @@ -3457,9 +3817,6 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) { unsigned int r; - if (!hlsl_type_is_resource(var->data_type)) - continue; - if (var->reg_reservation.reg_type) { for (r = 0; r <= HLSL_REGSET_LAST_OBJECT; ++r) @@ -3493,6 +3850,22 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) } } +static void deref_mark_last_read(struct hlsl_deref *deref, unsigned int last_read) +{ + unsigned int i; + + if (hlsl_deref_is_lowered(deref)) + { + if (deref->rel_offset.node) + deref->rel_offset.node->last_read = last_read; + } + else + { + for (i = 0; i < deref->path_len; ++i) + deref->path[i].node->last_read = last_read; + } +} + /* Compute the earliest and latest liveness for each variable. In the case that * a variable is accessed inside of a loop, we promote its liveness to extend * to at least the range of the entire loop. We also do this for nodes, so that @@ -3512,6 +3885,9 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop case HLSL_IR_CALL: /* We should have inlined all calls before computing liveness. */ vkd3d_unreachable(); + case HLSL_IR_STATEBLOCK_CONSTANT: + /* Stateblock constants should not appear in the shader program. */ + vkd3d_unreachable(); case HLSL_IR_STORE: { @@ -3521,8 +3897,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop if (!var->first_write) var->first_write = loop_first ? min(instr->index, loop_first) : instr->index; store->rhs.node->last_read = last_read; - if (store->lhs.rel_offset.node) - store->lhs.rel_offset.node->last_read = last_read; + deref_mark_last_read(&store->lhs, last_read); break; } case HLSL_IR_EXPR: @@ -3549,8 +3924,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop var = load->src.var; var->last_read = max(var->last_read, last_read); - if (load->src.rel_offset.node) - load->src.rel_offset.node->last_read = last_read; + deref_mark_last_read(&load->src, last_read); break; } case HLSL_IR_LOOP: @@ -3567,14 +3941,12 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop var = load->resource.var; var->last_read = max(var->last_read, last_read); - if (load->resource.rel_offset.node) - load->resource.rel_offset.node->last_read = last_read; + deref_mark_last_read(&load->resource, last_read); if ((var = load->sampler.var)) { var->last_read = max(var->last_read, last_read); - if (load->sampler.rel_offset.node) - load->sampler.rel_offset.node->last_read = last_read; + deref_mark_last_read(&load->sampler, last_read); } if (load->coords.node) @@ -3599,8 +3971,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop var = store->resource.var; var->last_read = max(var->last_read, last_read); - if (store->resource.rel_offset.node) - store->resource.rel_offset.node->last_read = last_read; + deref_mark_last_read(&store->resource, last_read); store->coords.node->last_read = last_read; store->value.node->last_read = last_read; break; @@ -3877,34 +4248,67 @@ static bool track_object_components_sampler_dim(struct hlsl_ctx *ctx, struct hls return false; } -static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +static void register_deref_usage(struct hlsl_ctx *ctx, struct hlsl_deref *deref) { - struct hlsl_ir_resource_load *load; - struct hlsl_ir_var *var; - enum hlsl_regset regset; + struct hlsl_ir_var *var = deref->var; + enum hlsl_regset regset = hlsl_deref_get_regset(ctx, deref); + uint32_t required_bind_count; + struct hlsl_type *type; unsigned int index; - if (instr->type != HLSL_IR_RESOURCE_LOAD) - return false; + if (!hlsl_regset_index_from_deref(ctx, deref, regset, &index)) + return; - load = hlsl_ir_resource_load(instr); - var = load->resource.var; - - regset = hlsl_deref_get_regset(ctx, &load->resource); - - if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) - return false; - - var->objects_usage[regset][index].used = true; - var->bind_count[regset] = max(var->bind_count[regset], index + 1); - if (load->sampler.var) + if (regset <= HLSL_REGSET_LAST_OBJECT) { - var = load->sampler.var; - if (!hlsl_regset_index_from_deref(ctx, &load->sampler, HLSL_REGSET_SAMPLERS, &index)) - return false; + var->objects_usage[regset][index].used = true; + var->bind_count[regset] = max(var->bind_count[regset], index + 1); + } + else if (regset == HLSL_REGSET_NUMERIC) + { + type = hlsl_deref_get_type(ctx, deref); - var->objects_usage[HLSL_REGSET_SAMPLERS][index].used = true; - var->bind_count[HLSL_REGSET_SAMPLERS] = max(var->bind_count[HLSL_REGSET_SAMPLERS], index + 1); + hlsl_regset_index_from_deref(ctx, deref, regset, &index); + required_bind_count = align(index + type->reg_size[regset], 4) / 4; + var->bind_count[regset] = max(var->bind_count[regset], required_bind_count); + } + else + { + vkd3d_unreachable(); + } +} + +static bool track_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + switch (instr->type) + { + case HLSL_IR_LOAD: + { + struct hlsl_ir_load *load = hlsl_ir_load(instr); + + if (!load->src.var->is_uniform) + return false; + + /* These will are handled by validate_static_object_references(). */ + if (hlsl_deref_get_regset(ctx, &load->src) != HLSL_REGSET_NUMERIC) + return false; + + register_deref_usage(ctx, &load->src); + break; + } + + case HLSL_IR_RESOURCE_LOAD: + register_deref_usage(ctx, &hlsl_ir_resource_load(instr)->resource); + if (hlsl_ir_resource_load(instr)->sampler.var) + register_deref_usage(ctx, &hlsl_ir_resource_load(instr)->sampler); + break; + + case HLSL_IR_RESOURCE_STORE: + register_deref_usage(ctx, &hlsl_ir_resource_store(instr)->resource); + break; + + default: + break; } return false; @@ -4083,7 +4487,7 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, continue; value = &constant->value.u[i++]; - switch (type->base_type) + switch (type->e.numeric.type) { case HLSL_TYPE_BOOL: f = !!value->u; @@ -4149,16 +4553,52 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, } } +static void sort_uniform_by_numeric_bind_count(struct list *sorted, struct hlsl_ir_var *to_sort) +{ + struct hlsl_ir_var *var; + + list_remove(&to_sort->extern_entry); + + LIST_FOR_EACH_ENTRY(var, sorted, struct hlsl_ir_var, extern_entry) + { + uint32_t to_sort_size = to_sort->bind_count[HLSL_REGSET_NUMERIC]; + uint32_t var_size = var->bind_count[HLSL_REGSET_NUMERIC]; + + if (to_sort_size > var_size) + { + list_add_before(&var->extern_entry, &to_sort->extern_entry); + return; + } + } + + list_add_tail(sorted, &to_sort->extern_entry); +} + +static void sort_uniforms_by_numeric_bind_count(struct hlsl_ctx *ctx) +{ + struct list sorted = LIST_INIT(sorted); + struct hlsl_ir_var *var, *next; + + LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (var->is_uniform) + sort_uniform_by_numeric_bind_count(&sorted, var); + } + list_move_tail(&ctx->extern_vars, &sorted); +} + static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) { struct register_allocator allocator = {0}; struct hlsl_ir_var *var; + sort_uniforms_by_numeric_bind_count(ctx); + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; - if (!var->is_uniform || !var->last_read || reg_size == 0) + if (!var->is_uniform || reg_size == 0) continue; if (var->reg_reservation.reg_type == 'c') @@ -4189,15 +4629,14 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; + unsigned int alloc_size = 4 * var->bind_count[HLSL_REGSET_NUMERIC]; - if (!var->is_uniform || !var->last_read || reg_size == 0) + if (!var->is_uniform || alloc_size == 0) continue; if (!var->regs[HLSL_REGSET_NUMERIC].allocated) { - var->regs[HLSL_REGSET_NUMERIC] = allocate_numeric_registers_for_type(ctx, &allocator, - 1, UINT_MAX, var->data_type); + var->regs[HLSL_REGSET_NUMERIC] = allocate_range(ctx, &allocator, 1, UINT_MAX, alloc_size); TRACE("Allocated %s to %s.\n", var->name, debug_register('c', var->regs[HLSL_REGSET_NUMERIC], var->data_type)); } @@ -4435,7 +4874,9 @@ static void validate_buffer_offsets(struct hlsl_ctx *ctx) continue; if (var1->reg_reservation.offset_type - || (var1->data_type->class == HLSL_CLASS_OBJECT && var1->reg_reservation.reg_type)) + || var1->reg_reservation.reg_type == 's' + || var1->reg_reservation.reg_type == 't' + || var1->reg_reservation.reg_type == 'u') buffer->manually_packed_elements = true; else buffer->automatically_packed_elements = true; @@ -4674,7 +5115,7 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl /* We should always have generated a cast to UINT. */ assert(path_node->data_type->class == HLSL_CLASS_SCALAR - && path_node->data_type->base_type == HLSL_TYPE_UINT); + && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT); idx = hlsl_ir_constant(path_node)->value.u[0].u; @@ -4729,14 +5170,15 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl return true; } +/* Retrieves true if the index is constant, and false otherwise. In the latter case, the maximum + * possible index is retrieved, assuming there is not out-of-bounds access. */ bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, enum hlsl_regset regset, unsigned int *index) { struct hlsl_type *type = deref->var->data_type; + bool index_is_constant = true; unsigned int i; - assert(regset <= HLSL_REGSET_LAST_OBJECT); - *index = 0; for (i = 0; i < deref->path_len; ++i) @@ -4745,37 +5187,62 @@ bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref unsigned int idx = 0; assert(path_node); - if (path_node->type != HLSL_IR_CONSTANT) - return false; - - /* We should always have generated a cast to UINT. */ - assert(path_node->data_type->class == HLSL_CLASS_SCALAR - && path_node->data_type->base_type == HLSL_TYPE_UINT); - - idx = hlsl_ir_constant(path_node)->value.u[0].u; - - switch (type->class) + if (path_node->type == HLSL_IR_CONSTANT) { - case HLSL_CLASS_ARRAY: - if (idx >= type->e.array.elements_count) - return false; + /* We should always have generated a cast to UINT. */ + assert(path_node->data_type->class == HLSL_CLASS_SCALAR + && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT); - *index += idx * type->e.array.type->reg_size[regset]; - break; + idx = hlsl_ir_constant(path_node)->value.u[0].u; - case HLSL_CLASS_STRUCT: - *index += type->e.record.fields[idx].reg_offset[regset]; - break; + switch (type->class) + { + case HLSL_CLASS_ARRAY: + if (idx >= type->e.array.elements_count) + return false; - default: - vkd3d_unreachable(); + *index += idx * type->e.array.type->reg_size[regset]; + break; + + case HLSL_CLASS_STRUCT: + *index += type->e.record.fields[idx].reg_offset[regset]; + break; + + case HLSL_CLASS_MATRIX: + *index += 4 * idx; + break; + + default: + vkd3d_unreachable(); + } + } + else + { + index_is_constant = false; + + switch (type->class) + { + case HLSL_CLASS_ARRAY: + idx = type->e.array.elements_count - 1; + *index += idx * type->e.array.type->reg_size[regset]; + break; + + case HLSL_CLASS_MATRIX: + idx = hlsl_type_major_size(type) - 1; + *index += idx * 4; + break; + + default: + vkd3d_unreachable(); + } } type = hlsl_get_element_type_from_path_index(ctx, type, path_node); } - assert(type->reg_size[regset] == 1); - return true; + assert(!(regset <= HLSL_REGSET_LAST_OBJECT) || (type->reg_size[regset] == 1)); + assert(!(regset == HLSL_REGSET_NUMERIC) || type->reg_size[regset] <= 4); + return index_is_constant; } bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *offset) @@ -4790,7 +5257,7 @@ bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref { /* We should always have generated a cast to UINT. */ assert(offset_node->data_type->class == HLSL_CLASS_SCALAR - && offset_node->data_type->base_type == HLSL_TYPE_UINT); + && offset_node->data_type->e.numeric.type == HLSL_TYPE_UINT); assert(offset_node->type != HLSL_IR_CONSTANT); return false; } @@ -4857,7 +5324,7 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a const struct hlsl_ir_constant *constant; if (type->class != HLSL_CLASS_SCALAR - || (type->base_type != HLSL_TYPE_INT && type->base_type != HLSL_TYPE_UINT)) + || (type->e.numeric.type != HLSL_TYPE_INT && type->e.numeric.type != HLSL_TYPE_UINT)) { struct vkd3d_string_buffer *string; @@ -4876,8 +5343,8 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a } constant = hlsl_ir_constant(instr); - if ((type->base_type == HLSL_TYPE_INT && constant->value.u[0].i <= 0) - || (type->base_type == HLSL_TYPE_UINT && !constant->value.u[0].u)) + if ((type->e.numeric.type == HLSL_TYPE_INT && constant->value.u[0].i <= 0) + || (type->e.numeric.type == HLSL_TYPE_UINT && !constant->value.u[0].u)) hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_THREAD_COUNT, "Thread count must be a positive integer."); @@ -4885,25 +5352,6 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a } } -static bool type_has_object_components(struct hlsl_type *type) -{ - if (type->class == HLSL_CLASS_OBJECT) - return true; - if (type->class == HLSL_CLASS_ARRAY) - return type_has_object_components(type->e.array.type); - if (type->class == HLSL_CLASS_STRUCT) - { - unsigned int i; - - for (i = 0; i < type->e.record.field_count; ++i) - { - if (type_has_object_components(type->e.record.fields[i].type)) - return true; - } - } - return false; -} - static void remove_unreachable_code(struct hlsl_ctx *ctx, struct hlsl_block *body) { struct hlsl_ir_node *instr, *next; @@ -4960,15 +5408,42 @@ static void remove_unreachable_code(struct hlsl_ctx *ctx, struct hlsl_block *bod } } -void hlsl_prepend_global_uniform_copy(struct hlsl_ctx *ctx, struct hlsl_block *body) +void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) { - struct hlsl_ir_var *var; + bool progress; - LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) + lower_ir(ctx, lower_matrix_swizzles, body); + lower_ir(ctx, lower_index_loads, body); + + lower_ir(ctx, lower_broadcasts, body); + while (hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL)); + do { - if (var->storage_modifiers & HLSL_STORAGE_UNIFORM) - prepend_uniform_copy(ctx, body, var); + progress = hlsl_transform_ir(ctx, split_array_copies, body, NULL); + progress |= hlsl_transform_ir(ctx, split_struct_copies, body, NULL); } + while (progress); + hlsl_transform_ir(ctx, split_matrix_copies, body, NULL); + + lower_ir(ctx, lower_narrowing_casts, body); + lower_ir(ctx, lower_int_dot, body); + lower_ir(ctx, lower_int_division, body); + lower_ir(ctx, lower_int_modulus, body); + lower_ir(ctx, lower_int_abs, body); + lower_ir(ctx, lower_casts_to_bool, body); + lower_ir(ctx, lower_float_modulus, body); + hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL); + + do + { + progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); + progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_identities, body, NULL); + progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); + progress |= hlsl_copy_propagation_execute(ctx, body); + progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); + progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL); + progress |= hlsl_transform_ir(ctx, remove_trivial_conditional_branches, body, NULL); + } while (progress); } int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, @@ -4979,7 +5454,6 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry struct recursive_call_ctx recursive_call_ctx; struct hlsl_ir_var *var; unsigned int i; - bool progress; list_move_head(&body->instrs, &ctx->static_initializers.instrs); @@ -4999,7 +5473,11 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry lower_ir(ctx, lower_matrix_swizzles, body); lower_ir(ctx, lower_index_loads, body); - hlsl_prepend_global_uniform_copy(ctx, body); + LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) + { + if (var->storage_modifiers & HLSL_STORAGE_UNIFORM) + prepend_uniform_copy(ctx, body, var); + } for (i = 0; i < entry_func->parameters.count; ++i) { @@ -5011,9 +5489,6 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry } else { - if (type_has_object_components(var->data_type)) - hlsl_fixme(ctx, &var->loc, "Prepend uniform copies for object components within structs."); - if (hlsl_get_multiarray_element_type(var->data_type)->class != HLSL_CLASS_STRUCT && !var->semantic.name) { @@ -5056,34 +5531,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry { hlsl_transform_ir(ctx, lower_discard_neg, body, NULL); } - lower_ir(ctx, lower_broadcasts, body); - while (hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL)); - do - { - progress = hlsl_transform_ir(ctx, split_array_copies, body, NULL); - progress |= hlsl_transform_ir(ctx, split_struct_copies, body, NULL); - } - while (progress); - hlsl_transform_ir(ctx, split_matrix_copies, body, NULL); - lower_ir(ctx, lower_narrowing_casts, body); - lower_ir(ctx, lower_casts_to_bool, body); - lower_ir(ctx, lower_int_dot, body); - lower_ir(ctx, lower_int_division, body); - lower_ir(ctx, lower_int_modulus, body); - lower_ir(ctx, lower_int_abs, body); - lower_ir(ctx, lower_float_modulus, body); - hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL); - do - { - progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); - progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); - progress |= hlsl_copy_propagation_execute(ctx, body); - progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); - progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL); - progress |= hlsl_transform_ir(ctx, remove_trivial_conditional_branches, body, NULL); - } - while (progress); + hlsl_run_const_passes(ctx, body); + remove_unreachable_code(ctx, body); hlsl_transform_ir(ctx, normalize_switch_cases, body, NULL); @@ -5095,12 +5545,23 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry hlsl_transform_ir(ctx, track_object_components_sampler_dim, body, NULL); if (profile->major_version >= 4) hlsl_transform_ir(ctx, lower_combined_samples, body, NULL); - hlsl_transform_ir(ctx, track_object_components_usage, body, NULL); + + do + compute_liveness(ctx, entry_func); + while (hlsl_transform_ir(ctx, dce, body, NULL)); + + hlsl_transform_ir(ctx, track_components_usage, body, NULL); sort_synthetic_separated_samplers_first(ctx); - lower_ir(ctx, lower_ternary, body); if (profile->major_version < 4) { + lower_ir(ctx, lower_ternary, body); + + lower_ir(ctx, lower_nonfloat_exprs, body); + /* Constants casted to float must be folded, and new casts to bool also need to be lowered. */ + hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); + lower_ir(ctx, lower_casts_to_bool, body); + lower_ir(ctx, lower_casts_to_int, body); lower_ir(ctx, lower_division, body); lower_ir(ctx, lower_sqrt, body); @@ -5108,6 +5569,12 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry lower_ir(ctx, lower_round, body); lower_ir(ctx, lower_ceil, body); lower_ir(ctx, lower_floor, body); + lower_ir(ctx, lower_comparison_operators, body); + lower_ir(ctx, lower_logic_not, body); + if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) + lower_ir(ctx, lower_slt, body); + else + lower_ir(ctx, lower_cmp, body); } if (profile->major_version < 2) @@ -5117,6 +5584,10 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry lower_ir(ctx, validate_nonconstant_vector_store_derefs, body); + do + compute_liveness(ctx, entry_func); + while (hlsl_transform_ir(ctx, dce, body, NULL)); + /* TODO: move forward, remove when no longer needed */ transform_derefs(ctx, replace_deref_path_with_offset, body); while (hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL)); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c index b76b1fce507..16015fa8a81 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c @@ -25,10 +25,10 @@ static bool fold_abs(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) { - enum hlsl_base_type type = dst_type->base_type; + enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src->node.data_type->base_type); + assert(type == src->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -116,10 +116,10 @@ static int32_t double_to_int(double x) static bool fold_bit_not(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) { - enum hlsl_base_type type = dst_type->base_type; + enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src->node.data_type->base_type); + assert(type == src->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -158,7 +158,7 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, for (k = 0; k < dst_type->dimx; ++k) { - switch (src->node.data_type->base_type) + switch (src->node.data_type->e.numeric.type) { case HLSL_TYPE_FLOAT: case HLSL_TYPE_HALF: @@ -200,7 +200,7 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, vkd3d_unreachable(); } - switch (dst_type->base_type) + switch (dst_type->e.numeric.type) { case HLSL_TYPE_FLOAT: case HLSL_TYPE_HALF: @@ -231,10 +231,10 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, static bool fold_ceil(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) { - enum hlsl_base_type type = dst_type->base_type; + enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src->node.data_type->base_type); + assert(type == src->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -257,10 +257,10 @@ static bool fold_ceil(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, static bool fold_exp2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) { - enum hlsl_base_type type = dst_type->base_type; + enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src->node.data_type->base_type); + assert(type == src->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -283,10 +283,10 @@ static bool fold_exp2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, static bool fold_floor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) { - enum hlsl_base_type type = dst_type->base_type; + enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src->node.data_type->base_type); + assert(type == src->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -309,11 +309,11 @@ static bool fold_floor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, static bool fold_fract(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) { - enum hlsl_base_type type = dst_type->base_type; + enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; float i; - assert(type == src->node.data_type->base_type); + assert(type == src->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -336,10 +336,10 @@ static bool fold_fract(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, static bool fold_log2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) { - enum hlsl_base_type type = dst_type->base_type; + enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src->node.data_type->base_type); + assert(type == src->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -381,10 +381,10 @@ static bool fold_log2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, con static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) { - enum hlsl_base_type type = dst_type->base_type; + enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src->node.data_type->base_type); + assert(type == src->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -415,10 +415,10 @@ static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, static bool fold_not(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) { - enum hlsl_base_type type = dst_type->base_type; + enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src->node.data_type->base_type); + assert(type == src->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -439,10 +439,10 @@ static bool fold_not(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, static bool fold_rcp(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) { - enum hlsl_base_type type = dst_type->base_type; + enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src->node.data_type->base_type); + assert(type == src->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -484,10 +484,10 @@ static bool fold_rcp(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons static bool fold_rsq(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) { - enum hlsl_base_type type = dst_type->base_type; + enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src->node.data_type->base_type); + assert(type == src->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -524,10 +524,10 @@ static bool fold_rsq(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons static bool fold_sat(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) { - enum hlsl_base_type type = dst_type->base_type; + enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src->node.data_type->base_type); + assert(type == src->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -550,10 +550,10 @@ static bool fold_sat(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons static bool fold_sqrt(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) { - enum hlsl_base_type type = dst_type->base_type; + enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src->node.data_type->base_type); + assert(type == src->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -595,11 +595,11 @@ static bool fold_sqrt(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, con static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { - enum hlsl_base_type type = dst_type->base_type; + enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src1->node.data_type->base_type); - assert(type == src2->node.data_type->base_type); + assert(type == src1->node.data_type->e.numeric.type); + assert(type == src2->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -632,11 +632,11 @@ static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons static bool fold_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { - enum hlsl_base_type type = dst_type->base_type; + enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src1->node.data_type->base_type); - assert(type == src2->node.data_type->base_type); + assert(type == src1->node.data_type->e.numeric.type); + assert(type == src2->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -659,11 +659,11 @@ static bool fold_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons static bool fold_or(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { - enum hlsl_base_type type = dst_type->base_type; + enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src1->node.data_type->base_type); - assert(type == src2->node.data_type->base_type); + assert(type == src1->node.data_type->e.numeric.type); + assert(type == src2->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -686,11 +686,11 @@ static bool fold_or(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { - enum hlsl_base_type type = dst_type->base_type; + enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src1->node.data_type->base_type); - assert(type == src2->node.data_type->base_type); + assert(type == src1->node.data_type->e.numeric.type); + assert(type == src2->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -712,11 +712,11 @@ static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, static bool fold_dot(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { - enum hlsl_base_type type = dst_type->base_type; + enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src1->node.data_type->base_type); - assert(type == src2->node.data_type->base_type); + assert(type == src1->node.data_type->e.numeric.type); + assert(type == src2->node.data_type->e.numeric.type); assert(src1->node.data_type->dimx == src2->node.data_type->dimx); dst->u[0].f = 0.0f; @@ -740,12 +740,12 @@ static bool fold_dot(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons static bool fold_dp2add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, const struct hlsl_ir_constant *src3) { - enum hlsl_base_type type = dst_type->base_type; + enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src1->node.data_type->base_type); - assert(type == src2->node.data_type->base_type); - assert(type == src3->node.data_type->base_type); + assert(type == src1->node.data_type->e.numeric.type); + assert(type == src2->node.data_type->e.numeric.type); + assert(type == src3->node.data_type->e.numeric.type); assert(src1->node.data_type->dimx == src2->node.data_type->dimx); assert(src3->node.data_type->dimx == 1); @@ -771,11 +771,11 @@ static bool fold_div(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, const struct vkd3d_shader_location *loc) { - enum hlsl_base_type type = dst_type->base_type; + enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src1->node.data_type->base_type); - assert(type == src2->node.data_type->base_type); + assert(type == src1->node.data_type->e.numeric.type); + assert(type == src2->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -841,12 +841,12 @@ static bool fold_equal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, co { unsigned int k; - assert(dst_type->base_type == HLSL_TYPE_BOOL); - assert(src1->node.data_type->base_type == src2->node.data_type->base_type); + assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); + assert(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { - switch (src1->node.data_type->base_type) + switch (src1->node.data_type->e.numeric.type) { case HLSL_TYPE_FLOAT: case HLSL_TYPE_HALF: @@ -877,12 +877,12 @@ static bool fold_gequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c { unsigned int k; - assert(dst_type->base_type == HLSL_TYPE_BOOL); - assert(src1->node.data_type->base_type == src2->node.data_type->base_type); + assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); + assert(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { - switch (src1->node.data_type->base_type) + switch (src1->node.data_type->e.numeric.type) { case HLSL_TYPE_FLOAT: case HLSL_TYPE_HALF: @@ -916,12 +916,12 @@ static bool fold_less(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, con { unsigned int k; - assert(dst_type->base_type == HLSL_TYPE_BOOL); - assert(src1->node.data_type->base_type == src2->node.data_type->base_type); + assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); + assert(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { - switch (src1->node.data_type->base_type) + switch (src1->node.data_type->e.numeric.type) { case HLSL_TYPE_FLOAT: case HLSL_TYPE_HALF: @@ -955,14 +955,14 @@ static bool fold_lshift(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c { unsigned int k; - assert(dst_type->base_type == src1->node.data_type->base_type); - assert(src2->node.data_type->base_type == HLSL_TYPE_INT); + assert(dst_type->e.numeric.type == src1->node.data_type->e.numeric.type); + assert(src2->node.data_type->e.numeric.type == HLSL_TYPE_INT); for (k = 0; k < dst_type->dimx; ++k) { unsigned int shift = src2->value.u[k].u % 32; - switch (src1->node.data_type->base_type) + switch (src1->node.data_type->e.numeric.type) { case HLSL_TYPE_INT: dst->u[k].i = src1->value.u[k].i << shift; @@ -983,11 +983,11 @@ static bool fold_lshift(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c static bool fold_max(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { - enum hlsl_base_type type = dst_type->base_type; + enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src1->node.data_type->base_type); - assert(type == src2->node.data_type->base_type); + assert(type == src1->node.data_type->e.numeric.type); + assert(type == src2->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -1021,11 +1021,11 @@ static bool fold_max(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons static bool fold_min(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { - enum hlsl_base_type type = dst_type->base_type; + enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src1->node.data_type->base_type); - assert(type == src2->node.data_type->base_type); + assert(type == src1->node.data_type->e.numeric.type); + assert(type == src2->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -1060,11 +1060,11 @@ static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, const struct vkd3d_shader_location *loc) { - enum hlsl_base_type type = dst_type->base_type; + enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src1->node.data_type->base_type); - assert(type == src2->node.data_type->base_type); + assert(type == src1->node.data_type->e.numeric.type); + assert(type == src2->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -1102,11 +1102,11 @@ static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { - enum hlsl_base_type type = dst_type->base_type; + enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src1->node.data_type->base_type); - assert(type == src2->node.data_type->base_type); + assert(type == src1->node.data_type->e.numeric.type); + assert(type == src2->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -1139,12 +1139,12 @@ static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c { unsigned int k; - assert(dst_type->base_type == HLSL_TYPE_BOOL); - assert(src1->node.data_type->base_type == src2->node.data_type->base_type); + assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); + assert(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { - switch (src1->node.data_type->base_type) + switch (src1->node.data_type->e.numeric.type) { case HLSL_TYPE_FLOAT: case HLSL_TYPE_HALF: @@ -1175,32 +1175,13 @@ static bool fold_ternary(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, { unsigned int k; - assert(dst_type->base_type == src2->node.data_type->base_type); - assert(dst_type->base_type == src3->node.data_type->base_type); + assert(dst_type->e.numeric.type == src2->node.data_type->e.numeric.type); + assert(dst_type->e.numeric.type == src3->node.data_type->e.numeric.type); + assert(src1->node.data_type->e.numeric.type == HLSL_TYPE_BOOL); for (k = 0; k < dst_type->dimx; ++k) - { - switch (src1->node.data_type->base_type) - { - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: - dst->u[k] = src1->value.u[k].f != 0.0f ? src2->value.u[k] : src3->value.u[k]; - break; + dst->u[k] = src1->value.u[k].u ? src2->value.u[k] : src3->value.u[k]; - case HLSL_TYPE_DOUBLE: - dst->u[k] = src1->value.u[k].d != 0.0 ? src2->value.u[k] : src3->value.u[k]; - break; - - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - case HLSL_TYPE_BOOL: - dst->u[k] = src1->value.u[k].u ? src2->value.u[k] : src3->value.u[k]; - break; - - default: - vkd3d_unreachable(); - } - } return true; } @@ -1209,14 +1190,14 @@ static bool fold_rshift(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c { unsigned int k; - assert(dst_type->base_type == src1->node.data_type->base_type); - assert(src2->node.data_type->base_type == HLSL_TYPE_INT); + assert(dst_type->e.numeric.type == src1->node.data_type->e.numeric.type); + assert(src2->node.data_type->e.numeric.type == HLSL_TYPE_INT); for (k = 0; k < dst_type->dimx; ++k) { unsigned int shift = src2->value.u[k].u % 32; - switch (src1->node.data_type->base_type) + switch (src1->node.data_type->e.numeric.type) { case HLSL_TYPE_INT: dst->u[k].i = src1->value.u[k].i >> shift; @@ -1415,6 +1396,136 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, return success; } +static bool constant_is_zero(struct hlsl_ir_constant *const_arg) +{ + struct hlsl_type *data_type = const_arg->node.data_type; + unsigned int k; + + for (k = 0; k < data_type->dimx; ++k) + { + switch (data_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + if (const_arg->value.u[k].f != 0.0f) + return false; + break; + + case HLSL_TYPE_DOUBLE: + if (const_arg->value.u[k].d != 0.0) + return false; + break; + + case HLSL_TYPE_UINT: + case HLSL_TYPE_INT: + case HLSL_TYPE_BOOL: + if (const_arg->value.u[k].u != 0) + return false; + break; + + default: + return false; + } + } + return true; +} + +static bool constant_is_one(struct hlsl_ir_constant *const_arg) +{ + struct hlsl_type *data_type = const_arg->node.data_type; + unsigned int k; + + for (k = 0; k < data_type->dimx; ++k) + { + switch (data_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + if (const_arg->value.u[k].f != 1.0f) + return false; + break; + + case HLSL_TYPE_DOUBLE: + if (const_arg->value.u[k].d != 1.0) + return false; + break; + + case HLSL_TYPE_UINT: + case HLSL_TYPE_INT: + case HLSL_TYPE_BOOL: + if (const_arg->value.u[k].u != 1) + return false; + break; + + default: + return false; + } + } + return true; +} + +bool hlsl_fold_constant_identities(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_constant *const_arg = NULL; + struct hlsl_ir_node *mut_arg = NULL; + struct hlsl_ir_node *res_node; + struct hlsl_ir_expr *expr; + unsigned int i; + + if (instr->type != HLSL_IR_EXPR) + return false; + expr = hlsl_ir_expr(instr); + + if (instr->data_type->class > HLSL_CLASS_VECTOR) + return false; + + /* Verify that the expression has two operands. */ + for (i = 0; i < ARRAY_SIZE(expr->operands); ++i) + { + if (!!expr->operands[i].node != (i < 2)) + return false; + } + + if (expr->operands[0].node->type == HLSL_IR_CONSTANT) + { + const_arg = hlsl_ir_constant(expr->operands[0].node); + mut_arg = expr->operands[1].node; + } + else if (expr->operands[1].node->type == HLSL_IR_CONSTANT) + { + mut_arg = expr->operands[0].node; + const_arg = hlsl_ir_constant(expr->operands[1].node); + } + else + { + return false; + } + + res_node = NULL; + switch (expr->op) + { + case HLSL_OP2_ADD: + if (constant_is_zero(const_arg)) + res_node = mut_arg; + break; + + case HLSL_OP2_MUL: + if (constant_is_one(const_arg)) + res_node = mut_arg; + break; + + default: + break; + } + + if (res_node) + { + hlsl_replace_node(&expr->node, res_node); + return true; + } + return false; +} + bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { struct hlsl_constant_value value; diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c index f0bd85338c6..b3b745fc1b2 100644 --- a/libs/vkd3d/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d/libs/vkd3d-shader/ir.c @@ -17,9 +17,11 @@ */ #include "vkd3d_shader_private.h" +#include "vkd3d_types.h" bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_version *version, unsigned int reserve) { + memset(program, 0, sizeof(*program)); program->shader_version = *version; return shader_instruction_array_init(&program->instructions, reserve); } @@ -32,6 +34,9 @@ void vsir_program_cleanup(struct vsir_program *program) vkd3d_free((void *)program->block_names[i]); vkd3d_free(program->block_names); shader_instruction_array_destroy(&program->instructions); + shader_signature_cleanup(&program->input_signature); + shader_signature_cleanup(&program->output_signature); + shader_signature_cleanup(&program->patch_constant_signature); } static inline bool shader_register_is_phase_instance_id(const struct vkd3d_shader_register *reg) @@ -53,19 +58,6 @@ static void vkd3d_shader_instruction_make_nop(struct vkd3d_shader_instruction *i vsir_instruction_init(ins, &location, VKD3DSIH_NOP); } -static void remove_dcl_temps(struct vsir_program *program) -{ - unsigned int i; - - for (i = 0; i < program->instructions.count; ++i) - { - struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - - if (ins->handler_idx == VKD3DSIH_DCL_TEMPS) - vkd3d_shader_instruction_make_nop(ins); - } -} - static bool vsir_instruction_init_with_params(struct vsir_program *program, struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, enum vkd3d_shader_opcode handler_idx, unsigned int dst_count, unsigned int src_count) @@ -91,86 +83,270 @@ static bool vsir_instruction_init_with_params(struct vsir_program *program, return true; } -static enum vkd3d_result instruction_array_lower_texkills(struct vkd3d_shader_parser *parser) +static bool get_opcode_from_rel_op(enum vkd3d_shader_rel_op rel_op, enum vkd3d_data_type data_type, + enum vkd3d_shader_opcode *opcode, bool *requires_swap) { - struct vsir_program *program = &parser->program; - struct vkd3d_shader_instruction_array *instructions = &program->instructions; - struct vkd3d_shader_instruction *texkill_ins, *ins; - unsigned int components_read = 3 + (program->shader_version.major >= 2); - unsigned int tmp_idx = ~0u; - unsigned int i, k; - - for (i = 0; i < instructions->count; ++i) + switch (rel_op) { - texkill_ins = &instructions->elements[i]; + case VKD3D_SHADER_REL_OP_LT: + case VKD3D_SHADER_REL_OP_GT: + *requires_swap = (rel_op == VKD3D_SHADER_REL_OP_GT); + if (data_type == VKD3D_DATA_FLOAT) + { + *opcode = VKD3DSIH_LTO; + return true; + } + break; - if (texkill_ins->handler_idx != VKD3DSIH_TEXKILL) - continue; + case VKD3D_SHADER_REL_OP_GE: + case VKD3D_SHADER_REL_OP_LE: + *requires_swap = (rel_op == VKD3D_SHADER_REL_OP_LE); + if (data_type == VKD3D_DATA_FLOAT) + { + *opcode = VKD3DSIH_GEO; + return true; + } + break; - if (!shader_instruction_array_insert_at(instructions, i + 1, components_read + 1)) - return VKD3D_ERROR_OUT_OF_MEMORY; + case VKD3D_SHADER_REL_OP_EQ: + *requires_swap = false; + if (data_type == VKD3D_DATA_FLOAT) + { + *opcode = VKD3DSIH_EQO; + return true; + } + break; - if (tmp_idx == ~0u) - tmp_idx = program->temp_count++; + case VKD3D_SHADER_REL_OP_NE: + *requires_swap = false; + if (data_type == VKD3D_DATA_FLOAT) + { + *opcode = VKD3DSIH_NEO; + return true; + } + break; + } + return false; +} - /* tmp = ins->dst[0] < 0 */ +static enum vkd3d_result vsir_program_lower_ifc(struct vsir_program *program, + struct vkd3d_shader_instruction *ifc, unsigned int *tmp_idx, + struct vkd3d_shader_message_context *message_context) +{ + struct vkd3d_shader_instruction_array *instructions = &program->instructions; + size_t pos = ifc - instructions->elements; + struct vkd3d_shader_instruction *ins; + enum vkd3d_shader_opcode opcode; + bool swap; - ins = &instructions->elements[i + 1]; - if (!vsir_instruction_init_with_params(program, ins, &texkill_ins->location, VKD3DSIH_LTO, 1, 2)) + if (!shader_instruction_array_insert_at(instructions, pos + 1, 2)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + if (*tmp_idx == ~0u) + *tmp_idx = program->temp_count++; + + /* Replace ifc comparison with actual comparison, saving the result in the tmp register. */ + if (!(get_opcode_from_rel_op(ifc->flags, ifc->src[0].reg.data_type, &opcode, &swap))) + { + vkd3d_shader_error(message_context, &ifc->location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + "Aborting due to not yet implemented feature: opcode for rel_op %u and data type %u.", + ifc->flags, ifc->src[0].reg.data_type); + return VKD3D_ERROR_NOT_IMPLEMENTED; + } + + ins = &instructions->elements[pos + 1]; + if (!vsir_instruction_init_with_params(program, ins, &ifc->location, opcode, 1, 2)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); + ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; + ins->dst[0].reg.idx[0].offset = *tmp_idx; + ins->dst[0].write_mask = VKD3DSP_WRITEMASK_0; + + ins->src[0] = ifc->src[swap]; + ins->src[1] = ifc->src[!swap]; + + /* Create new if instruction using the previous result. */ + ins = &instructions->elements[pos + 2]; + if (!vsir_instruction_init_with_params(program, ins, &ifc->location, VKD3DSIH_IF, 0, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + ins->flags = VKD3D_SHADER_CONDITIONAL_OP_NZ; + + vsir_register_init(&ins->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); + ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; + ins->src[0].reg.idx[0].offset = *tmp_idx; + ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); + + /* Make the original instruction no-op */ + vkd3d_shader_instruction_make_nop(ifc); + + return VKD3D_OK; +} + +static enum vkd3d_result vsir_program_lower_texkill(struct vsir_program *program, + struct vkd3d_shader_instruction *texkill, unsigned int *tmp_idx) +{ + const unsigned int components_read = 3 + (program->shader_version.major >= 2); + struct vkd3d_shader_instruction_array *instructions = &program->instructions; + size_t pos = texkill - instructions->elements; + struct vkd3d_shader_instruction *ins; + unsigned int j; + + if (!shader_instruction_array_insert_at(instructions, pos + 1, components_read + 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + if (*tmp_idx == ~0u) + *tmp_idx = program->temp_count++; + + /* tmp = ins->dst[0] < 0 */ + + ins = &instructions->elements[pos + 1]; + if (!vsir_instruction_init_with_params(program, ins, &texkill->location, VKD3DSIH_LTO, 1, 2)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); + ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; + ins->dst[0].reg.idx[0].offset = *tmp_idx; + ins->dst[0].write_mask = VKD3DSP_WRITEMASK_ALL; + + ins->src[0].reg = texkill->dst[0].reg; + ins->src[0].swizzle = VKD3D_SHADER_NO_SWIZZLE; + vsir_register_init(&ins->src[1].reg, VKD3DSPR_IMMCONST, VKD3D_DATA_FLOAT, 0); + ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; + ins->src[1].reg.u.immconst_f32[0] = 0.0f; + ins->src[1].reg.u.immconst_f32[1] = 0.0f; + ins->src[1].reg.u.immconst_f32[2] = 0.0f; + ins->src[1].reg.u.immconst_f32[3] = 0.0f; + + /* tmp.x = tmp.x || tmp.y */ + /* tmp.x = tmp.x || tmp.z */ + /* tmp.x = tmp.x || tmp.w, if sm >= 2.0 */ + + for (j = 1; j < components_read; ++j) + { + ins = &instructions->elements[pos + 1 + j]; + if (!(vsir_instruction_init_with_params(program, ins, &texkill->location, VKD3DSIH_OR, 1, 2))) return VKD3D_ERROR_OUT_OF_MEMORY; vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; - ins->dst[0].reg.idx[0].offset = tmp_idx; - ins->dst[0].write_mask = VKD3DSP_WRITEMASK_ALL; - - ins->src[0].reg = texkill_ins->dst[0].reg; - vsir_register_init(&ins->src[1].reg, VKD3DSPR_IMMCONST, VKD3D_DATA_FLOAT, 0); - ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; - ins->src[1].reg.u.immconst_f32[0] = 0.0f; - ins->src[1].reg.u.immconst_f32[1] = 0.0f; - ins->src[1].reg.u.immconst_f32[2] = 0.0f; - ins->src[1].reg.u.immconst_f32[3] = 0.0f; - - /* tmp.x = tmp.x || tmp.y */ - /* tmp.x = tmp.x || tmp.z */ - /* tmp.x = tmp.x || tmp.w, if sm >= 2.0 */ - - for (k = 1; k < components_read; ++k) - { - ins = &instructions->elements[i + 1 + k]; - if (!(vsir_instruction_init_with_params(program, ins, &texkill_ins->location, VKD3DSIH_OR, 1, 2))) - return VKD3D_ERROR_OUT_OF_MEMORY; - - vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); - ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; - ins->dst[0].reg.idx[0].offset = tmp_idx; - ins->dst[0].write_mask = VKD3DSP_WRITEMASK_0; - - vsir_register_init(&ins->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); - ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; - ins->src[0].reg.idx[0].offset = tmp_idx; - ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); - vsir_register_init(&ins->src[1].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); - ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; - ins->src[1].reg.idx[0].offset = tmp_idx; - ins->src[1].swizzle = vkd3d_shader_create_swizzle(k, k, k, k); - } - - /* discard_nz tmp.x */ - - ins = &instructions->elements[i + 1 + components_read]; - if (!(vsir_instruction_init_with_params(program, ins, &texkill_ins->location, VKD3DSIH_DISCARD, 0, 1))) - return VKD3D_ERROR_OUT_OF_MEMORY; - ins->flags = VKD3D_SHADER_CONDITIONAL_OP_NZ; + ins->dst[0].reg.idx[0].offset = *tmp_idx; + ins->dst[0].write_mask = VKD3DSP_WRITEMASK_0; vsir_register_init(&ins->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; - ins->src[0].reg.idx[0].offset = tmp_idx; + ins->src[0].reg.idx[0].offset = *tmp_idx; ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); + vsir_register_init(&ins->src[1].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); + ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; + ins->src[1].reg.idx[0].offset = *tmp_idx; + ins->src[1].swizzle = vkd3d_shader_create_swizzle(j, j, j, j); + } - /* Make the original instruction no-op */ - vkd3d_shader_instruction_make_nop(texkill_ins); + /* discard_nz tmp.x */ + + ins = &instructions->elements[pos + 1 + components_read]; + if (!(vsir_instruction_init_with_params(program, ins, &texkill->location, VKD3DSIH_DISCARD, 0, 1))) + return VKD3D_ERROR_OUT_OF_MEMORY; + ins->flags = VKD3D_SHADER_CONDITIONAL_OP_NZ; + + vsir_register_init(&ins->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); + ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; + ins->src[0].reg.idx[0].offset = *tmp_idx; + ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); + + /* Make the original instruction no-op */ + vkd3d_shader_instruction_make_nop(texkill); + + return VKD3D_OK; +} + +/* The Shader Model 5 Assembly documentation states: "If components of a mad + * instruction are tagged as precise, the hardware must execute a mad instruction + * or the exact equivalent, and it cannot split it into a multiply followed by an add." + * But DXIL.rst states the opposite: "Floating point multiply & add. This operation is + * not fused for "precise" operations." + * Windows drivers seem to conform with the latter, for SM 4-5 and SM 6. */ +static enum vkd3d_result vsir_program_lower_precise_mad(struct vsir_program *program, + struct vkd3d_shader_instruction *mad, unsigned int *tmp_idx) +{ + struct vkd3d_shader_instruction_array *instructions = &program->instructions; + struct vkd3d_shader_instruction *mul_ins, *add_ins; + size_t pos = mad - instructions->elements; + struct vkd3d_shader_dst_param *mul_dst; + + if (!(mad->flags & VKD3DSI_PRECISE_XYZW)) + return VKD3D_OK; + + if (!shader_instruction_array_insert_at(instructions, pos + 1, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + if (*tmp_idx == ~0u) + *tmp_idx = program->temp_count++; + + mul_ins = &instructions->elements[pos]; + add_ins = &instructions->elements[pos + 1]; + + mul_ins->handler_idx = VKD3DSIH_MUL; + mul_ins->src_count = 2; + + if (!(vsir_instruction_init_with_params(program, add_ins, &mul_ins->location, VKD3DSIH_ADD, 1, 2))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + add_ins->flags = mul_ins->flags & VKD3DSI_PRECISE_XYZW; + + mul_dst = mul_ins->dst; + *add_ins->dst = *mul_dst; + + mul_dst->modifiers = 0; + vsir_register_init(&mul_dst->reg, VKD3DSPR_TEMP, mul_ins->src[0].reg.data_type, 1); + mul_dst->reg.dimension = add_ins->dst->reg.dimension; + mul_dst->reg.idx[0].offset = *tmp_idx; + + add_ins->src[0].reg = mul_dst->reg; + add_ins->src[0].swizzle = vsir_swizzle_from_writemask(mul_dst->write_mask); + add_ins->src[0].modifiers = 0; + add_ins->src[1] = mul_ins->src[2]; + + return VKD3D_OK; +} + +static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *program, + struct vkd3d_shader_message_context *message_context) +{ + struct vkd3d_shader_instruction_array *instructions = &program->instructions; + unsigned int tmp_idx = ~0u, i; + enum vkd3d_result ret; + + for (i = 0; i < instructions->count; ++i) + { + struct vkd3d_shader_instruction *ins = &instructions->elements[i]; + + switch (ins->handler_idx) + { + case VKD3DSIH_IFC: + if ((ret = vsir_program_lower_ifc(program, ins, &tmp_idx, message_context)) < 0) + return ret; + break; + + case VKD3DSIH_TEXKILL: + if ((ret = vsir_program_lower_texkill(program, ins, &tmp_idx)) < 0) + return ret; + break; + + case VKD3DSIH_MAD: + if ((ret = vsir_program_lower_precise_mad(program, ins, &tmp_idx)) < 0) + return ret; + break; + + case VKD3DSIH_DCL_CONSTANT_BUFFER: + case VKD3DSIH_DCL_TEMPS: + vkd3d_shader_instruction_make_nop(ins); + break; + + default: + break; + } } return VKD3D_OK; @@ -227,10 +403,11 @@ static const struct vkd3d_shader_varying_map *find_varying_map( return NULL; } -static enum vkd3d_result remap_output_signature(struct vkd3d_shader_parser *parser, - const struct vkd3d_shader_compile_info *compile_info) +static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program *program, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) { - struct shader_signature *signature = &parser->shader_desc.output_signature; + const struct vkd3d_shader_location location = {.source_name = compile_info->source_name}; + struct shader_signature *signature = &program->output_signature; const struct vkd3d_shader_varying_map_info *varying_map; unsigned int i; @@ -252,7 +429,7 @@ static enum vkd3d_result remap_output_signature(struct vkd3d_shader_parser *pars * location with a different mask. */ if (input_mask && input_mask != e->mask) { - vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, "Aborting due to not yet implemented feature: " "Output mask %#x does not match input mask %#x.", e->mask, input_mask); @@ -269,7 +446,7 @@ static enum vkd3d_result remap_output_signature(struct vkd3d_shader_parser *pars { if (varying_map->varying_map[i].output_signature_index >= signature->element_count) { - vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, "Aborting due to not yet implemented feature: " "The next stage consumes varyings not written by this stage."); return VKD3D_ERROR_NOT_IMPLEMENTED; @@ -453,7 +630,7 @@ void vsir_dst_param_init(struct vkd3d_shader_dst_param *param, enum vkd3d_shader void vsir_src_param_init_label(struct vkd3d_shader_src_param *param, unsigned int label_id) { - vsir_src_param_init(param, VKD3DSPR_LABEL, VKD3D_DATA_UINT, 1); + vsir_src_param_init(param, VKD3DSPR_LABEL, VKD3D_DATA_UNUSED, 1); param->reg.dimension = VSIR_DIMENSION_NONE; param->reg.idx[0].offset = label_id; } @@ -464,12 +641,24 @@ static void src_param_init_ssa_bool(struct vkd3d_shader_src_param *src, unsigned src->reg.idx[0].offset = idx; } +static void src_param_init_temp_bool(struct vkd3d_shader_src_param *src, unsigned int idx) +{ + vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); + src->reg.idx[0].offset = idx; +} + static void dst_param_init_ssa_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) { vsir_dst_param_init(dst, VKD3DSPR_SSA, VKD3D_DATA_BOOL, 1); dst->reg.idx[0].offset = idx; } +static void dst_param_init_temp_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) +{ + vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); + dst->reg.idx[0].offset = idx; +} + static void dst_param_init_temp_uint(struct vkd3d_shader_dst_param *dst, unsigned int idx) { vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); @@ -554,11 +743,14 @@ static bool control_point_normaliser_is_in_control_point_phase(const struct cont return normaliser->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE; } -static struct vkd3d_shader_src_param *instruction_array_create_outpointid_param( +struct vkd3d_shader_src_param *instruction_array_create_outpointid_param( struct vkd3d_shader_instruction_array *instructions) { struct vkd3d_shader_src_param *rel_addr; + if (instructions->outpointid_param) + return instructions->outpointid_param; + if (!(rel_addr = shader_src_param_allocator_get(&instructions->src_params, 1))) return NULL; @@ -566,6 +758,7 @@ static struct vkd3d_shader_src_param *instruction_array_create_outpointid_param( rel_addr->swizzle = 0; rel_addr->modifiers = 0; + instructions->outpointid_param = rel_addr; return rel_addr; } @@ -1383,10 +1576,9 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi } } -static enum vkd3d_result shader_normalise_io_registers(struct vkd3d_shader_parser *parser) +static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program *program) { - struct io_normaliser normaliser = {parser->program.instructions}; - struct vsir_program *program = &parser->program; + struct io_normaliser normaliser = {program->instructions}; struct vkd3d_shader_instruction *ins; bool has_control_point_phase; unsigned int i, j; @@ -1394,9 +1586,9 @@ static enum vkd3d_result shader_normalise_io_registers(struct vkd3d_shader_parse normaliser.phase = VKD3DSIH_INVALID; normaliser.shader_type = program->shader_version.type; normaliser.major = program->shader_version.major; - normaliser.input_signature = &parser->shader_desc.input_signature; - normaliser.output_signature = &parser->shader_desc.output_signature; - normaliser.patch_constant_signature = &parser->shader_desc.patch_constant_signature; + normaliser.input_signature = &program->input_signature; + normaliser.output_signature = &program->output_signature; + normaliser.patch_constant_signature = &program->patch_constant_signature; for (i = 0, has_control_point_phase = false; i < program->instructions.count; ++i) { @@ -1439,9 +1631,9 @@ static enum vkd3d_result shader_normalise_io_registers(struct vkd3d_shader_parse } } - if (!shader_signature_merge(&parser->shader_desc.input_signature, normaliser.input_range_map, false) - || !shader_signature_merge(&parser->shader_desc.output_signature, normaliser.output_range_map, false) - || !shader_signature_merge(&parser->shader_desc.patch_constant_signature, normaliser.pc_range_map, true)) + if (!shader_signature_merge(&program->input_signature, normaliser.input_range_map, false) + || !shader_signature_merge(&program->output_signature, normaliser.output_range_map, false) + || !shader_signature_merge(&program->patch_constant_signature, normaliser.pc_range_map, true)) { program->instructions = normaliser.instructions; return VKD3D_ERROR_OUT_OF_MEMORY; @@ -1668,19 +1860,20 @@ static void remove_dead_code(struct vsir_program *program) } } -static enum vkd3d_result normalise_combined_samplers(struct vkd3d_shader_parser *parser) +static enum vkd3d_result vsir_program_normalise_combined_samplers(struct vsir_program *program, + struct vkd3d_shader_message_context *message_context) { unsigned int i; - for (i = 0; i < parser->program.instructions.count; ++i) + for (i = 0; i < program->instructions.count; ++i) { - struct vkd3d_shader_instruction *ins = &parser->program.instructions.elements[i]; + struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; struct vkd3d_shader_src_param *srcs; switch (ins->handler_idx) { case VKD3DSIH_TEX: - if (!(srcs = shader_src_param_allocator_get(&parser->program.instructions.src_params, 3))) + if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 3))) return VKD3D_ERROR_OUT_OF_MEMORY; memset(srcs, 0, sizeof(*srcs) * 3); @@ -1693,6 +1886,7 @@ static enum vkd3d_result normalise_combined_samplers(struct vkd3d_shader_parser srcs[1].reg.idx[1] = ins->src[1].reg.idx[0]; srcs[1].reg.idx_count = 2; srcs[1].reg.data_type = VKD3D_DATA_RESOURCE; + srcs[1].reg.dimension = VSIR_DIMENSION_VEC4; srcs[1].swizzle = VKD3D_SHADER_NO_SWIZZLE; srcs[2].reg.type = VKD3DSPR_SAMPLER; @@ -1723,7 +1917,7 @@ static enum vkd3d_result normalise_combined_samplers(struct vkd3d_shader_parser case VKD3DSIH_TEXREG2AR: case VKD3DSIH_TEXREG2GB: case VKD3DSIH_TEXREG2RGB: - vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + vkd3d_shader_error(message_context, &ins->location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, "Aborting due to not yet implemented feature: " "Combined sampler instruction %#x.", ins->handler_idx); return VKD3D_ERROR_NOT_IMPLEMENTED; @@ -1789,10 +1983,10 @@ struct cf_flattener_info struct cf_flattener { - struct vkd3d_shader_parser *parser; + struct vsir_program *program; struct vkd3d_shader_location location; - bool allocation_failed; + enum vkd3d_result status; struct vkd3d_shader_instruction *instructions; size_t instruction_capacity; @@ -1812,13 +2006,20 @@ struct cf_flattener size_t control_flow_info_size; }; +static void cf_flattener_set_error(struct cf_flattener *flattener, enum vkd3d_result error) +{ + if (flattener->status != VKD3D_OK) + return; + flattener->status = error; +} + static struct vkd3d_shader_instruction *cf_flattener_require_space(struct cf_flattener *flattener, size_t count) { if (!vkd3d_array_reserve((void **)&flattener->instructions, &flattener->instruction_capacity, flattener->instruction_count + count, sizeof(*flattener->instructions))) { ERR("Failed to allocate instructions.\n"); - flattener->allocation_failed = true; + cf_flattener_set_error(flattener, VKD3D_ERROR_OUT_OF_MEMORY); return NULL; } return &flattener->instructions[flattener->instruction_count]; @@ -1850,9 +2051,9 @@ static struct vkd3d_shader_src_param *instruction_src_params_alloc(struct vkd3d_ { struct vkd3d_shader_src_param *params; - if (!(params = vsir_program_get_src_params(&flattener->parser->program, count))) + if (!(params = vsir_program_get_src_params(flattener->program, count))) { - flattener->allocation_failed = true; + cf_flattener_set_error(flattener, VKD3D_ERROR_OUT_OF_MEMORY); return NULL; } ins->src = params; @@ -1866,10 +2067,10 @@ static void cf_flattener_emit_label(struct cf_flattener *flattener, unsigned int if (!(ins = cf_flattener_require_space(flattener, 1))) return; - if (vsir_instruction_init_label(ins, &flattener->location, label_id, &flattener->parser->program)) + if (vsir_instruction_init_label(ins, &flattener->location, label_id, flattener->program)) ++flattener->instruction_count; else - flattener->allocation_failed = true; + cf_flattener_set_error(flattener, VKD3D_ERROR_OUT_OF_MEMORY); } /* For conditional branches, this returns the false target branch parameter. */ @@ -1947,7 +2148,7 @@ static struct cf_flattener_info *cf_flattener_push_control_flow_level(struct cf_ flattener->control_flow_depth + 1, sizeof(*flattener->control_flow_info))) { ERR("Failed to allocate control flow info structure.\n"); - flattener->allocation_failed = true; + cf_flattener_set_error(flattener, VKD3D_ERROR_OUT_OF_MEMORY); return NULL; } @@ -2014,12 +2215,12 @@ static void VKD3D_PRINTF_FUNC(3, 4) cf_flattener_create_block_name(struct cf_fla flattener->block_names[block_id] = buffer.buffer; } -static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flattener *flattener) +static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flattener *flattener, + struct vkd3d_shader_message_context *message_context) { bool main_block_open, is_hull_shader, after_declarations_section; - struct vkd3d_shader_parser *parser = flattener->parser; struct vkd3d_shader_instruction_array *instructions; - struct vsir_program *program = &parser->program; + struct vsir_program *program = flattener->program; struct vkd3d_shader_instruction *dst_ins; size_t i; @@ -2041,12 +2242,19 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte flattener->location = instruction->location; /* Declarations should occur before the first code block, which in hull shaders is marked by the first - * phase instruction, and in all other shader types begins with the first label instruction. */ - if (!after_declarations_section && !vsir_instruction_is_dcl(instruction) - && instruction->handler_idx != VKD3DSIH_NOP) + * phase instruction, and in all other shader types begins with the first label instruction. + * Declaring an indexable temp with function scope is not considered a declaration, + * because it needs to live inside a function. */ + if (!after_declarations_section && instruction->handler_idx != VKD3DSIH_NOP) { - after_declarations_section = true; - cf_flattener_emit_label(flattener, cf_flattener_alloc_block_id(flattener)); + bool is_function_indexable = instruction->handler_idx == VKD3DSIH_DCL_INDEXABLE_TEMP + && instruction->declaration.indexable_temp.has_function_scope; + + if (!vsir_instruction_is_dcl(instruction) || is_function_indexable) + { + after_declarations_section = true; + cf_flattener_emit_label(flattener, cf_flattener_alloc_block_id(flattener)); + } } cf_info = flattener->control_flow_depth @@ -2064,7 +2272,8 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte break; case VKD3DSIH_LABEL: - vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + vkd3d_shader_error(message_context, &instruction->location, + VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, "Aborting due to not yet implemented feature: Label instruction."); return VKD3D_ERROR_NOT_IMPLEMENTED; @@ -2229,8 +2438,10 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte if (src->swizzle != VKD3D_SHADER_SWIZZLE(X, X, X, X)) { WARN("Unexpected src swizzle %#x.\n", src->swizzle); - vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE, + vkd3d_shader_error(message_context, &instruction->location, + VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE, "The swizzle for a switch case value is not scalar X."); + cf_flattener_set_error(flattener, VKD3D_ERROR_INVALID_SHADER); } value = *src->reg.u.immconst_u32; @@ -2358,21 +2569,18 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte ++flattener->instruction_count; } - return flattener->allocation_failed ? VKD3D_ERROR_OUT_OF_MEMORY : VKD3D_OK; + return flattener->status; } -static enum vkd3d_result flatten_control_flow_constructs(struct vkd3d_shader_parser *parser) +static enum vkd3d_result vsir_program_flatten_control_flow_constructs(struct vsir_program *program, + struct vkd3d_shader_message_context *message_context) { - struct vsir_program *program = &parser->program; - struct cf_flattener flattener = {0}; + struct cf_flattener flattener = {.program = program}; enum vkd3d_result result; - flattener.parser = parser; - result = cf_flattener_iterate_instruction_array(&flattener); - - if (result >= 0) + if ((result = cf_flattener_iterate_instruction_array(&flattener, message_context)) >= 0) { - vkd3d_free(parser->program.instructions.elements); + vkd3d_free(program->instructions.elements); program->instructions.elements = flattener.instructions; program->instructions.capacity = flattener.instruction_capacity; program->instructions.count = flattener.instruction_count; @@ -2548,97 +2756,6 @@ static enum vkd3d_result lower_switch_to_if_ladder(struct vsir_program *program) } } - /* Second subpass: creating new blocks might have broken - * references in PHI instructions, so we use the block map to fix - * them. */ - current_label = 0; - for (i = 0; i < ins_count; ++i) - { - struct vkd3d_shader_instruction *ins = &instructions[i]; - struct vkd3d_shader_src_param *new_src; - unsigned int j, l, new_src_count = 0; - - switch (ins->handler_idx) - { - case VKD3DSIH_LABEL: - current_label = label_from_src_param(&ins->src[0]); - continue; - - case VKD3DSIH_PHI: - break; - - default: - continue; - } - - /* First count how many source parameters we need. */ - for (j = 0; j < ins->src_count; j += 2) - { - unsigned int source_label = label_from_src_param(&ins->src[j + 1]); - size_t k, match_count = 0; - - for (k = 0; k < map_count; ++k) - { - struct lower_switch_to_if_ladder_block_mapping *mapping = &block_map[k]; - - if (mapping->switch_label == source_label && mapping->target_label == current_label) - match_count += 1; - } - - new_src_count += (match_count != 0) ? 2 * match_count : 2; - } - - assert(new_src_count >= ins->src_count); - - /* Allocate more source parameters if needed. */ - if (new_src_count == ins->src_count) - { - new_src = ins->src; - } - else - { - if (!(new_src = vsir_program_get_src_params(program, new_src_count))) - { - ERR("Failed to allocate %u source parameters.\n", new_src_count); - goto fail; - } - } - - /* Then do the copy. */ - for (j = 0, l = 0; j < ins->src_count; j += 2) - { - unsigned int source_label = label_from_src_param(&ins->src[j + 1]); - size_t k, match_count = 0; - - for (k = 0; k < map_count; ++k) - { - struct lower_switch_to_if_ladder_block_mapping *mapping = &block_map[k]; - - if (mapping->switch_label == source_label && mapping->target_label == current_label) - { - match_count += 1; - - new_src[l] = ins->src[j]; - new_src[l + 1] = ins->src[j + 1]; - new_src[l + 1].reg.idx[0].offset = mapping->if_label; - l += 2; - } - } - - if (match_count == 0) - { - new_src[l] = ins->src[j]; - new_src[l + 1] = ins->src[j + 1]; - l += 2; - } - } - - assert(l == new_src_count); - - ins->src_count = new_src_count; - ins->src = new_src; - } - vkd3d_free(program->instructions.elements); vkd3d_free(block_map); program->instructions.elements = instructions; @@ -2656,145 +2773,139 @@ fail: return VKD3D_ERROR_OUT_OF_MEMORY; } -static void materialize_ssas_to_temps_process_src_param(struct vkd3d_shader_parser *parser, struct vkd3d_shader_src_param *src); +struct ssas_to_temps_alloc +{ + unsigned int *table; + unsigned int next_temp_idx; +}; + +static bool ssas_to_temps_alloc_init(struct ssas_to_temps_alloc *alloc, unsigned int ssa_count, unsigned int temp_count) +{ + size_t i = ssa_count * sizeof(*alloc->table); + + if (!(alloc->table = vkd3d_malloc(i))) + { + ERR("Failed to allocate SSA table.\n"); + return false; + } + memset(alloc->table, 0xff, i); + + alloc->next_temp_idx = temp_count; + return true; +} /* This is idempotent: it can be safely applied more than once on the * same register. */ -static void materialize_ssas_to_temps_process_reg(struct vkd3d_shader_parser *parser, struct vkd3d_shader_register *reg) +static void materialize_ssas_to_temps_process_reg(struct vsir_program *program, struct ssas_to_temps_alloc *alloc, + struct vkd3d_shader_register *reg) { unsigned int i; - if (reg->type == VKD3DSPR_SSA) + if (reg->type == VKD3DSPR_SSA && alloc->table[reg->idx[0].offset] != UINT_MAX) { reg->type = VKD3DSPR_TEMP; - reg->idx[0].offset += parser->program.temp_count; + reg->idx[0].offset = alloc->table[reg->idx[0].offset]; } for (i = 0; i < reg->idx_count; ++i) if (reg->idx[i].rel_addr) - materialize_ssas_to_temps_process_src_param(parser, reg->idx[i].rel_addr); + materialize_ssas_to_temps_process_reg(program, alloc, ®->idx[i].rel_addr->reg); } -static void materialize_ssas_to_temps_process_dst_param(struct vkd3d_shader_parser *parser, struct vkd3d_shader_dst_param *dst) +struct ssas_to_temps_block_info { - materialize_ssas_to_temps_process_reg(parser, &dst->reg); -} - -static void materialize_ssas_to_temps_process_src_param(struct vkd3d_shader_parser *parser, struct vkd3d_shader_src_param *src) -{ - materialize_ssas_to_temps_process_reg(parser, &src->reg); -} - -static const struct vkd3d_shader_src_param *materialize_ssas_to_temps_compute_source(struct vkd3d_shader_instruction *ins, - unsigned int label) -{ - unsigned int i; - - assert(ins->handler_idx == VKD3DSIH_PHI); - - for (i = 0; i < ins->src_count; i += 2) + struct phi_incoming_to_temp { - if (label_from_src_param(&ins->src[i + 1]) == label) - return &ins->src[i]; - } + struct vkd3d_shader_src_param *src; + struct vkd3d_shader_dst_param *dst; + } *incomings; + size_t incoming_capacity; + size_t incoming_count; +}; - vkd3d_unreachable(); +static void ssas_to_temps_block_info_cleanup(struct ssas_to_temps_block_info *block_info, + size_t count) +{ + size_t i; + + for (i = 0; i < count; ++i) + vkd3d_free(block_info[i].incomings); + + vkd3d_free(block_info); } -static bool materialize_ssas_to_temps_synthesize_mov(struct vkd3d_shader_parser *parser, - struct vkd3d_shader_instruction *instruction, const struct vkd3d_shader_location *loc, - const struct vkd3d_shader_dst_param *dest, const struct vkd3d_shader_src_param *cond, - const struct vkd3d_shader_src_param *source, bool invert) -{ - struct vkd3d_shader_src_param *src; - struct vkd3d_shader_dst_param *dst; - - if (!vsir_instruction_init_with_params(&parser->program, instruction, loc, - cond ? VKD3DSIH_MOVC : VKD3DSIH_MOV, 1, cond ? 3 : 1)) - return false; - - dst = instruction->dst; - src = instruction->src; - - dst[0] = *dest; - materialize_ssas_to_temps_process_dst_param(parser, &dst[0]); - - assert(dst[0].write_mask == VKD3DSP_WRITEMASK_0); - assert(dst[0].modifiers == 0); - assert(dst[0].shift == 0); - - if (cond) - { - src[0] = *cond; - src[1 + invert] = *source; - memset(&src[2 - invert], 0, sizeof(src[2 - invert])); - src[2 - invert].reg = dst[0].reg; - materialize_ssas_to_temps_process_src_param(parser, &src[1]); - materialize_ssas_to_temps_process_src_param(parser, &src[2]); - } - else - { - src[0] = *source; - materialize_ssas_to_temps_process_src_param(parser, &src[0]); - } - - return true; -} - -static enum vkd3d_result materialize_ssas_to_temps(struct vkd3d_shader_parser *parser) +static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_program *program) { + size_t ins_capacity = 0, ins_count = 0, phi_count, incoming_count, i; + struct ssas_to_temps_block_info *info, *block_info = NULL; struct vkd3d_shader_instruction *instructions = NULL; - struct materialize_ssas_to_temps_block_data - { - size_t phi_begin; - size_t phi_count; - } *block_index = NULL; - size_t ins_capacity = 0, ins_count = 0, i; + struct ssas_to_temps_alloc alloc = {0}; unsigned int current_label = 0; - if (!reserve_instructions(&instructions, &ins_capacity, parser->program.instructions.count)) - goto fail; - - if (!(block_index = vkd3d_calloc(parser->program.block_count, sizeof(*block_index)))) + if (!(block_info = vkd3d_calloc(program->block_count, sizeof(*block_info)))) { - ERR("Failed to allocate block index.\n"); + ERR("Failed to allocate block info array.\n"); goto fail; } - for (i = 0; i < parser->program.instructions.count; ++i) - { - struct vkd3d_shader_instruction *ins = &parser->program.instructions.elements[i]; + if (!ssas_to_temps_alloc_init(&alloc, program->ssa_count, program->temp_count)) + goto fail; - switch (ins->handler_idx) + for (i = 0, phi_count = 0, incoming_count = 0; i < program->instructions.count; ++i) + { + struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + unsigned int j, temp_idx; + + /* Only phi src/dst SSA values need be converted here. Structurisation may + * introduce new cases of undominated SSA use, which will be handled later. */ + if (ins->handler_idx != VKD3DSIH_PHI) + continue; + ++phi_count; + + temp_idx = alloc.next_temp_idx++; + + for (j = 0; j < ins->src_count; j += 2) { - case VKD3DSIH_LABEL: - current_label = label_from_src_param(&ins->src[0]); - break; + struct phi_incoming_to_temp *incoming; + unsigned int label; - case VKD3DSIH_PHI: - assert(current_label != 0); - assert(i != 0); - if (block_index[current_label - 1].phi_begin == 0) - block_index[current_label - 1].phi_begin = i; - block_index[current_label - 1].phi_count += 1; - break; + label = label_from_src_param(&ins->src[j + 1]); + assert(label); - default: - current_label = 0; - break; + info = &block_info[label - 1]; + + if (!(vkd3d_array_reserve((void **)&info->incomings, &info->incoming_capacity, info->incoming_count + 1, + sizeof(*info->incomings)))) + goto fail; + + incoming = &info->incomings[info->incoming_count++]; + incoming->src = &ins->src[j]; + incoming->dst = ins->dst; + + alloc.table[ins->dst->reg.idx[0].offset] = temp_idx; + + ++incoming_count; } + + materialize_ssas_to_temps_process_reg(program, &alloc, &ins->dst->reg); } - for (i = 0; i < parser->program.instructions.count; ++i) + if (!phi_count) + goto done; + + if (!reserve_instructions(&instructions, &ins_capacity, program->instructions.count + incoming_count - phi_count)) + goto fail; + + for (i = 0; i < program->instructions.count; ++i) { - struct vkd3d_shader_instruction *ins = &parser->program.instructions.elements[i]; + struct vkd3d_shader_instruction *mov_ins, *ins = &program->instructions.elements[i]; size_t j; for (j = 0; j < ins->dst_count; ++j) - materialize_ssas_to_temps_process_dst_param(parser, &ins->dst[j]); + materialize_ssas_to_temps_process_reg(program, &alloc, &ins->dst[j].reg); for (j = 0; j < ins->src_count; ++j) - materialize_ssas_to_temps_process_src_param(parser, &ins->src[j]); + materialize_ssas_to_temps_process_reg(program, &alloc, &ins->src[j].reg); switch (ins->handler_idx) { @@ -2803,62 +2914,21 @@ static enum vkd3d_result materialize_ssas_to_temps(struct vkd3d_shader_parser *p break; case VKD3DSIH_BRANCH: - { - if (vsir_register_is_label(&ins->src[0].reg)) + case VKD3DSIH_SWITCH_MONOLITHIC: + info = &block_info[current_label - 1]; + + for (j = 0; j < info->incoming_count; ++j) { - const struct materialize_ssas_to_temps_block_data *data = &block_index[label_from_src_param(&ins->src[0]) - 1]; + struct phi_incoming_to_temp *incoming = &info->incomings[j]; - if (!reserve_instructions(&instructions, &ins_capacity, ins_count + data->phi_count)) + mov_ins = &instructions[ins_count++]; + if (!vsir_instruction_init_with_params(program, mov_ins, &ins->location, VKD3DSIH_MOV, 1, 0)) goto fail; - - for (j = data->phi_begin; j < data->phi_begin + data->phi_count; ++j) - { - const struct vkd3d_shader_src_param *source; - - source = materialize_ssas_to_temps_compute_source(&parser->program.instructions.elements[j], current_label); - if (!materialize_ssas_to_temps_synthesize_mov(parser, &instructions[ins_count], &ins->location, - &parser->program.instructions.elements[j].dst[0], NULL, source, false)) - goto fail; - - ++ins_count; - } - } - else - { - struct materialize_ssas_to_temps_block_data *data_true = &block_index[label_from_src_param(&ins->src[1]) - 1], - *data_false = &block_index[label_from_src_param(&ins->src[2]) - 1]; - const struct vkd3d_shader_src_param *cond = &ins->src[0]; - - if (!reserve_instructions(&instructions, &ins_capacity, - ins_count + data_true->phi_count + data_false->phi_count)) - goto fail; - - for (j = data_true->phi_begin; j < data_true->phi_begin + data_true->phi_count; ++j) - { - const struct vkd3d_shader_src_param *source; - - source = materialize_ssas_to_temps_compute_source(&parser->program.instructions.elements[j], current_label); - if (!materialize_ssas_to_temps_synthesize_mov(parser, &instructions[ins_count], &ins->location, - &parser->program.instructions.elements[j].dst[0], cond, source, false)) - goto fail; - - ++ins_count; - } - - for (j = data_false->phi_begin; j < data_false->phi_begin + data_false->phi_count; ++j) - { - const struct vkd3d_shader_src_param *source; - - source = materialize_ssas_to_temps_compute_source(&parser->program.instructions.elements[j], current_label); - if (!materialize_ssas_to_temps_synthesize_mov(parser, &instructions[ins_count], &ins->location, - &parser->program.instructions.elements[j].dst[0], cond, source, true)) - goto fail; - - ++ins_count; - } + *mov_ins->dst = *incoming->dst; + mov_ins->src = incoming->src; + mov_ins->src_count = 1; } break; - } case VKD3DSIH_PHI: continue; @@ -2867,148 +2937,28 @@ static enum vkd3d_result materialize_ssas_to_temps(struct vkd3d_shader_parser *p break; } - if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 1)) - goto fail; - instructions[ins_count++] = *ins; } - vkd3d_free(parser->program.instructions.elements); - vkd3d_free(block_index); - parser->program.instructions.elements = instructions; - parser->program.instructions.capacity = ins_capacity; - parser->program.instructions.count = ins_count; - parser->program.temp_count += parser->program.ssa_count; - parser->program.ssa_count = 0; + vkd3d_free(program->instructions.elements); + program->instructions.elements = instructions; + program->instructions.capacity = ins_capacity; + program->instructions.count = ins_count; + program->temp_count = alloc.next_temp_idx; +done: + ssas_to_temps_block_info_cleanup(block_info, program->block_count); + vkd3d_free(alloc.table); return VKD3D_OK; fail: vkd3d_free(instructions); - vkd3d_free(block_index); + ssas_to_temps_block_info_cleanup(block_info, program->block_count); + vkd3d_free(alloc.table); return VKD3D_ERROR_OUT_OF_MEMORY; } -static enum vkd3d_result simple_structurizer_run(struct vkd3d_shader_parser *parser) -{ - const unsigned int block_temp_idx = parser->program.temp_count; - struct vkd3d_shader_instruction *instructions = NULL; - const struct vkd3d_shader_location no_loc = {0}; - size_t ins_capacity = 0, ins_count = 0, i; - bool first_label_found = false; - - if (!reserve_instructions(&instructions, &ins_capacity, parser->program.instructions.count)) - goto fail; - - for (i = 0; i < parser->program.instructions.count; ++i) - { - struct vkd3d_shader_instruction *ins = &parser->program.instructions.elements[i]; - - switch (ins->handler_idx) - { - case VKD3DSIH_PHI: - case VKD3DSIH_SWITCH_MONOLITHIC: - vkd3d_unreachable(); - - case VKD3DSIH_LABEL: - if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 4)) - goto fail; - - if (!first_label_found) - { - first_label_found = true; - - if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_MOV, 1, 1)) - goto fail; - dst_param_init_temp_uint(&instructions[ins_count].dst[0], block_temp_idx); - src_param_init_const_uint(&instructions[ins_count].src[0], label_from_src_param(&ins->src[0])); - ins_count++; - - if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_LOOP, 0, 0)) - goto fail; - ins_count++; - - if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_SWITCH, 0, 1)) - goto fail; - src_param_init_temp_uint(&instructions[ins_count].src[0], block_temp_idx); - ins_count++; - } - - if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_CASE, 0, 1)) - goto fail; - src_param_init_const_uint(&instructions[ins_count].src[0], label_from_src_param(&ins->src[0])); - ins_count++; - break; - - case VKD3DSIH_BRANCH: - if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 2)) - goto fail; - - if (vsir_register_is_label(&ins->src[0].reg)) - { - if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_MOV, 1, 1)) - goto fail; - dst_param_init_temp_uint(&instructions[ins_count].dst[0], block_temp_idx); - src_param_init_const_uint(&instructions[ins_count].src[0], label_from_src_param(&ins->src[0])); - ins_count++; - } - else - { - if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_MOVC, 1, 3)) - goto fail; - dst_param_init_temp_uint(&instructions[ins_count].dst[0], block_temp_idx); - instructions[ins_count].src[0] = ins->src[0]; - src_param_init_const_uint(&instructions[ins_count].src[1], label_from_src_param(&ins->src[1])); - src_param_init_const_uint(&instructions[ins_count].src[2], label_from_src_param(&ins->src[2])); - ins_count++; - } - - if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_BREAK, 0, 0)) - goto fail; - ins_count++; - break; - - case VKD3DSIH_RET: - default: - if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 1)) - goto fail; - - instructions[ins_count++] = *ins; - break; - } - } - - assert(first_label_found); - - if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 3)) - goto fail; - - if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_ENDSWITCH, 0, 0)) - goto fail; - ins_count++; - - if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_ENDLOOP, 0, 0)) - goto fail; - ins_count++; - - if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_RET, 0, 0)) - goto fail; - ins_count++; - - vkd3d_free(parser->program.instructions.elements); - parser->program.instructions.elements = instructions; - parser->program.instructions.capacity = ins_capacity; - parser->program.instructions.count = ins_count; - parser->program.temp_count += 1; - - return VKD3D_OK; - -fail: - vkd3d_free(instructions); - return VKD3D_ERROR_OUT_OF_MEMORY; -} - struct vsir_block_list { struct vsir_block **blocks; @@ -3025,14 +2975,8 @@ static void vsir_block_list_cleanup(struct vsir_block_list *list) vkd3d_free(list->blocks); } -static enum vkd3d_result vsir_block_list_add(struct vsir_block_list *list, struct vsir_block *block) +static enum vkd3d_result vsir_block_list_add_checked(struct vsir_block_list *list, struct vsir_block *block) { - size_t i; - - for (i = 0; i < list->count; ++i) - if (block == list->blocks[i]) - return VKD3D_OK; - if (!vkd3d_array_reserve((void **)&list->blocks, &list->capacity, list->count + 1, sizeof(*list->blocks))) { ERR("Cannot extend block list.\n"); @@ -3044,9 +2988,27 @@ static enum vkd3d_result vsir_block_list_add(struct vsir_block_list *list, struc return VKD3D_OK; } +static enum vkd3d_result vsir_block_list_add(struct vsir_block_list *list, struct vsir_block *block) +{ + size_t i; + + for (i = 0; i < list->count; ++i) + if (block == list->blocks[i]) + return VKD3D_FALSE; + + return vsir_block_list_add_checked(list, block); +} + +/* It is guaranteed that the relative order is kept. */ +static void vsir_block_list_remove_index(struct vsir_block_list *list, size_t idx) +{ + --list->count; + memmove(&list->blocks[idx], &list->blocks[idx + 1], (list->count - idx) * sizeof(*list->blocks)); +} + struct vsir_block { - unsigned int label; + unsigned int label, order_pos; /* `begin' points to the instruction immediately following the * LABEL that introduces the block. `end' points to the terminator * instruction (either BRANCH or RET). They can coincide, meaning @@ -3063,8 +3025,7 @@ static enum vkd3d_result vsir_block_init(struct vsir_block *block, unsigned int if (block_count > SIZE_MAX - (sizeof(*block->dominates) * CHAR_BIT - 1)) return VKD3D_ERROR_OUT_OF_MEMORY; - block_count = align(block_count, sizeof(*block->dominates) * CHAR_BIT); - byte_count = block_count / CHAR_BIT; + byte_count = VKD3D_BITMAP_SIZE(block_count) * sizeof(*block->dominates); assert(label); memset(block, 0, sizeof(*block)); @@ -3089,12 +3050,211 @@ static void vsir_block_cleanup(struct vsir_block *block) vkd3d_free(block->dominates); } +static int block_compare(const void *ptr1, const void *ptr2) +{ + const struct vsir_block *block1 = *(const struct vsir_block **)ptr1; + const struct vsir_block *block2 = *(const struct vsir_block **)ptr2; + + return vkd3d_u32_compare(block1->label, block2->label); +} + +static void vsir_block_list_sort(struct vsir_block_list *list) +{ + qsort(list->blocks, list->count, sizeof(*list->blocks), block_compare); +} + +static bool vsir_block_list_search(struct vsir_block_list *list, struct vsir_block *block) +{ + return !!bsearch(&block, list->blocks, list->count, sizeof(*list->blocks), block_compare); +} + +struct vsir_cfg_structure_list +{ + struct vsir_cfg_structure *structures; + size_t count, capacity; + unsigned int end; +}; + +struct vsir_cfg_structure +{ + enum vsir_cfg_structure_type + { + /* Execute a block of the original VSIR program. */ + STRUCTURE_TYPE_BLOCK, + /* Execute a loop, which is identified by an index. */ + STRUCTURE_TYPE_LOOP, + /* Execute a selection construct. */ + STRUCTURE_TYPE_SELECTION, + /* Execute a `return' or a (possibly) multilevel `break' or + * `continue', targeting a loop by its index. If `condition' + * is non-NULL, then the jump is conditional (this is + * currently not allowed for `return'). */ + STRUCTURE_TYPE_JUMP, + } type; + union + { + struct vsir_block *block; + struct vsir_cfg_structure_loop + { + struct vsir_cfg_structure_list body; + unsigned idx; + bool needs_trampoline; + struct vsir_cfg_structure *outer_loop; + } loop; + struct vsir_cfg_structure_selection + { + struct vkd3d_shader_src_param *condition; + struct vsir_cfg_structure_list if_body; + struct vsir_cfg_structure_list else_body; + bool invert_condition; + } selection; + struct vsir_cfg_structure_jump + { + enum vsir_cfg_jump_type + { + /* NONE is available as an intermediate value, but it + * is not allowed in valid structured programs. */ + JUMP_NONE, + JUMP_BREAK, + JUMP_CONTINUE, + JUMP_RET, + } type; + unsigned int target; + struct vkd3d_shader_src_param *condition; + bool invert_condition; + bool needs_launcher; + } jump; + } u; +}; + +static void vsir_cfg_structure_init(struct vsir_cfg_structure *structure, enum vsir_cfg_structure_type type); +static void vsir_cfg_structure_cleanup(struct vsir_cfg_structure *structure); + +static void vsir_cfg_structure_list_cleanup(struct vsir_cfg_structure_list *list) +{ + unsigned int i; + + for (i = 0; i < list->count; ++i) + vsir_cfg_structure_cleanup(&list->structures[i]); + vkd3d_free(list->structures); +} + +static struct vsir_cfg_structure *vsir_cfg_structure_list_append(struct vsir_cfg_structure_list *list, + enum vsir_cfg_structure_type type) +{ + struct vsir_cfg_structure *ret; + + if (!vkd3d_array_reserve((void **)&list->structures, &list->capacity, list->count + 1, + sizeof(*list->structures))) + return NULL; + + ret = &list->structures[list->count++]; + + vsir_cfg_structure_init(ret, type); + + return ret; +} + +static enum vkd3d_result vsir_cfg_structure_list_append_from_region(struct vsir_cfg_structure_list *list, + struct vsir_cfg_structure *begin, size_t size) +{ + if (!vkd3d_array_reserve((void **)&list->structures, &list->capacity, list->count + size, + sizeof(*list->structures))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + memcpy(&list->structures[list->count], begin, size * sizeof(*begin)); + + list->count += size; + + return VKD3D_OK; +} + +static void vsir_cfg_structure_init(struct vsir_cfg_structure *structure, enum vsir_cfg_structure_type type) +{ + memset(structure, 0, sizeof(*structure)); + structure->type = type; +} + +static void vsir_cfg_structure_cleanup(struct vsir_cfg_structure *structure) +{ + switch (structure->type) + { + case STRUCTURE_TYPE_LOOP: + vsir_cfg_structure_list_cleanup(&structure->u.loop.body); + break; + + case STRUCTURE_TYPE_SELECTION: + vsir_cfg_structure_list_cleanup(&structure->u.selection.if_body); + vsir_cfg_structure_list_cleanup(&structure->u.selection.else_body); + break; + + default: + break; + } +} + +struct vsir_cfg_emit_target +{ + struct vkd3d_shader_instruction *instructions; + size_t ins_capacity, ins_count; + unsigned int jump_target_temp_idx; + unsigned int temp_count; +}; + struct vsir_cfg { + struct vkd3d_shader_message_context *message_context; struct vsir_program *program; + size_t function_begin; + size_t function_end; struct vsir_block *blocks; struct vsir_block *entry; size_t block_count; + struct vkd3d_string_buffer debug_buffer; + + struct vsir_block_list *loops; + size_t loops_count, loops_capacity; + size_t *loops_by_header; + + struct vsir_block_list order; + struct cfg_loop_interval + { + /* `begin' is the position of the first block of the loop in + * the topological sort; `end' is the position of the first + * block after the loop. In other words, `begin' is where a + * `continue' instruction would jump and `end' is where a + * `break' instruction would jump. */ + unsigned int begin, end; + /* Each loop interval can be natural or synthetic. Natural + * intervals are added to represent loops given by CFG back + * edges. Synthetic intervals do not correspond to loops in + * the input CFG, but are added to leverage their `break' + * instruction in order to execute forward edges. + * + * For a synthetic loop interval it's not really important + * which one is the `begin' block, since we don't need to + * execute `continue' for them. So we have some leeway for + * moving it provided that these conditions are met: 1. the + * interval must contain all `break' instructions that target + * it, which in practice means that `begin' can be moved + * backward and not forward; 2. intervals must remain properly + * nested (for each pair of intervals, either one contains the + * other or they are disjoint). + * + * Subject to these conditions, we try to reuse the same loop + * as much as possible (if many forward edges target the same + * block), but we still try to keep `begin' as forward as + * possible, to keep the loop scope as small as possible. */ + bool synthetic; + /* The number of jump instructions (both conditional and + * unconditional) that target this loop. */ + unsigned int target_count; + } *loop_intervals; + size_t loop_interval_count, loop_interval_capacity; + + struct vsir_cfg_structure_list structured_program; + + struct vsir_cfg_emit_target *target; }; static void vsir_cfg_cleanup(struct vsir_cfg *cfg) @@ -3104,7 +3264,44 @@ static void vsir_cfg_cleanup(struct vsir_cfg *cfg) for (i = 0; i < cfg->block_count; ++i) vsir_block_cleanup(&cfg->blocks[i]); + for (i = 0; i < cfg->loops_count; ++i) + vsir_block_list_cleanup(&cfg->loops[i]); + + vsir_block_list_cleanup(&cfg->order); + + vsir_cfg_structure_list_cleanup(&cfg->structured_program); + vkd3d_free(cfg->blocks); + vkd3d_free(cfg->loops); + vkd3d_free(cfg->loops_by_header); + vkd3d_free(cfg->loop_intervals); + + if (TRACE_ON()) + vkd3d_string_buffer_cleanup(&cfg->debug_buffer); +} + +static enum vkd3d_result vsir_cfg_add_loop_interval(struct vsir_cfg *cfg, unsigned int begin, + unsigned int end, bool synthetic) +{ + struct cfg_loop_interval *interval; + + if (!vkd3d_array_reserve((void **)&cfg->loop_intervals, &cfg->loop_interval_capacity, + cfg->loop_interval_count + 1, sizeof(*cfg->loop_intervals))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + interval = &cfg->loop_intervals[cfg->loop_interval_count++]; + + interval->begin = begin; + interval->end = end; + interval->synthetic = synthetic; + interval->target_count = 0; + + return VKD3D_OK; +} + +static bool vsir_block_dominates(struct vsir_block *b1, struct vsir_block *b2) +{ + return bitmap_is_set(b1->dominates, b2->label - 1); } static enum vkd3d_result vsir_cfg_add_edge(struct vsir_cfg *cfg, struct vsir_block *block, @@ -3162,22 +3359,124 @@ static void vsir_cfg_dump_dot(struct vsir_cfg *cfg) TRACE("}\n"); } -static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program *program) +static void vsir_cfg_structure_list_dump(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list); + +static void vsir_cfg_structure_dump(struct vsir_cfg *cfg, struct vsir_cfg_structure *structure) +{ + switch (structure->type) + { + case STRUCTURE_TYPE_BLOCK: + TRACE("%sblock %u\n", cfg->debug_buffer.buffer, structure->u.block->label); + break; + + case STRUCTURE_TYPE_LOOP: + TRACE("%s%u : loop {\n", cfg->debug_buffer.buffer, structure->u.loop.idx); + + vsir_cfg_structure_list_dump(cfg, &structure->u.loop.body); + + TRACE("%s} # %u%s\n", cfg->debug_buffer.buffer, structure->u.loop.idx, + structure->u.loop.needs_trampoline ? ", tramp" : ""); + break; + + case STRUCTURE_TYPE_SELECTION: + TRACE("%sif {\n", cfg->debug_buffer.buffer); + + vsir_cfg_structure_list_dump(cfg, &structure->u.selection.if_body); + + if (structure->u.selection.else_body.count == 0) + { + TRACE("%s}\n", cfg->debug_buffer.buffer); + } + else + { + TRACE("%s} else {\n", cfg->debug_buffer.buffer); + + vsir_cfg_structure_list_dump(cfg, &structure->u.selection.else_body); + + TRACE("%s}\n", cfg->debug_buffer.buffer); + } + break; + + case STRUCTURE_TYPE_JUMP: + { + const char *type_str; + + switch (structure->u.jump.type) + { + case JUMP_RET: + TRACE("%sret\n", cfg->debug_buffer.buffer); + return; + + case JUMP_BREAK: + type_str = "break"; + break; + + case JUMP_CONTINUE: + type_str = "continue"; + break; + + default: + vkd3d_unreachable(); + } + + TRACE("%s%s%s %u%s\n", cfg->debug_buffer.buffer, type_str, + structure->u.jump.condition ? "c" : "", structure->u.jump.target, + structure->u.jump.needs_launcher ? " # launch" : ""); + break; + } + + default: + vkd3d_unreachable(); + } +} + +static void vsir_cfg_structure_list_dump(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list) +{ + unsigned int i; + + vkd3d_string_buffer_printf(&cfg->debug_buffer, " "); + + for (i = 0; i < list->count; ++i) + vsir_cfg_structure_dump(cfg, &list->structures[i]); + + vkd3d_string_buffer_truncate(&cfg->debug_buffer, cfg->debug_buffer.content_size - 2); +} + +static void vsir_cfg_dump_structured_program(struct vsir_cfg *cfg) +{ + unsigned int i; + + for (i = 0; i < cfg->structured_program.count; ++i) + vsir_cfg_structure_dump(cfg, &cfg->structured_program.structures[i]); +} + +static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program *program, + struct vkd3d_shader_message_context *message_context, struct vsir_cfg_emit_target *target, + size_t *pos) { struct vsir_block *current_block = NULL; enum vkd3d_result ret; size_t i; memset(cfg, 0, sizeof(*cfg)); + cfg->message_context = message_context; cfg->program = program; cfg->block_count = program->block_count; + cfg->target = target; + cfg->function_begin = *pos; + + vsir_block_list_init(&cfg->order); if (!(cfg->blocks = vkd3d_calloc(cfg->block_count, sizeof(*cfg->blocks)))) return VKD3D_ERROR_OUT_OF_MEMORY; - for (i = 0; i < program->instructions.count; ++i) + if (TRACE_ON()) + vkd3d_string_buffer_init(&cfg->debug_buffer); + + for (i = *pos; i < program->instructions.count; ++i) { struct vkd3d_shader_instruction *instruction = &program->instructions.elements[i]; + bool finish = false; switch (instruction->handler_idx) { @@ -3209,11 +3508,24 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program current_block = NULL; break; + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_HS_JOIN_PHASE: + assert(!current_block); + finish = true; + break; + default: break; } + + if (finish) + break; } + *pos = i; + cfg->function_end = *pos; + for (i = 0; i < cfg->block_count; ++i) { struct vsir_block *block = &cfg->blocks[i]; @@ -3285,12 +3597,8 @@ static void vsir_cfg_compute_dominators_recurse(struct vsir_block *current, stru static void vsir_cfg_compute_dominators(struct vsir_cfg *cfg) { - struct vkd3d_string_buffer buf; size_t i, j; - if (TRACE_ON()) - vkd3d_string_buffer_init(&buf); - for (i = 0; i < cfg->block_count; ++i) { struct vsir_block *block = &cfg->blocks[i]; @@ -3302,7 +3610,7 @@ static void vsir_cfg_compute_dominators(struct vsir_cfg *cfg) if (TRACE_ON()) { - vkd3d_string_buffer_printf(&buf, "Block %u dominates:", block->label); + vkd3d_string_buffer_printf(&cfg->debug_buffer, "Block %u dominates:", block->label); for (j = 0; j < cfg->block_count; j++) { struct vsir_block *block2 = &cfg->blocks[j]; @@ -3310,103 +3618,1649 @@ static void vsir_cfg_compute_dominators(struct vsir_cfg *cfg) if (block2->label == 0) continue; - if (bitmap_is_set(block->dominates, j)) - vkd3d_string_buffer_printf(&buf, " %u", block2->label); + if (vsir_block_dominates(block, block2)) + vkd3d_string_buffer_printf(&cfg->debug_buffer, " %u", block2->label); } - TRACE("%s\n", buf.buffer); - vkd3d_string_buffer_clear(&buf); + TRACE("%s\n", cfg->debug_buffer.buffer); + vkd3d_string_buffer_clear(&cfg->debug_buffer); + } + } +} + +/* A back edge is an edge X -> Y for which block Y dominates block + * X. All the other edges are forward edges, and it is required that + * the input CFG is reducible, i.e., it is acyclic once you strip away + * the back edges. + * + * Each back edge X -> Y defines a loop: block X is the header block, + * block Y is the back edge block, and the loop consists of all the + * blocks which are dominated by the header block and have a path to + * the back edge block that doesn't pass through the header block + * (including the header block itself). It can be proved that all the + * blocks in such a path (connecting a loop block to the back edge + * block without passing through the header block) belong to the same + * loop. + * + * If the input CFG is reducible its loops are properly nested (i.e., + * each two loops are either disjoint or one is contained in the + * other), provided that each block has at most one incoming back + * edge. If this condition does not hold, a synthetic block can be + * introduced as the only back edge block for the given header block, + * with all the previous back edge now being forward edges to the + * synthetic block. This is not currently implemented (but it is + * rarely found in practice anyway). */ +static enum vkd3d_result vsir_cfg_scan_loop(struct vsir_block_list *loop, struct vsir_block *block, + struct vsir_block *header) +{ + enum vkd3d_result ret; + size_t i; + + if ((ret = vsir_block_list_add(loop, block)) < 0) + return ret; + + if (ret == VKD3D_FALSE || block == header) + return VKD3D_OK; + + for (i = 0; i < block->predecessors.count; ++i) + { + if ((ret = vsir_cfg_scan_loop(loop, block->predecessors.blocks[i], header)) < 0) + return ret; + } + + return VKD3D_OK; +} + +static enum vkd3d_result vsir_cfg_compute_loops(struct vsir_cfg *cfg) +{ + size_t i, j, k; + + if (!(cfg->loops_by_header = vkd3d_calloc(cfg->block_count, sizeof(*cfg->loops_by_header)))) + return VKD3D_ERROR_OUT_OF_MEMORY; + memset(cfg->loops_by_header, 0xff, cfg->block_count * sizeof(*cfg->loops_by_header)); + + for (i = 0; i < cfg->block_count; ++i) + { + struct vsir_block *block = &cfg->blocks[i]; + + if (block->label == 0) + continue; + + for (j = 0; j < block->successors.count; ++j) + { + struct vsir_block *header = block->successors.blocks[j]; + struct vsir_block_list *loop; + enum vkd3d_result ret; + + /* Is this a back edge? */ + if (!vsir_block_dominates(header, block)) + continue; + + if (!vkd3d_array_reserve((void **)&cfg->loops, &cfg->loops_capacity, cfg->loops_count + 1, sizeof(*cfg->loops))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + loop = &cfg->loops[cfg->loops_count]; + vsir_block_list_init(loop); + + if ((ret = vsir_cfg_scan_loop(loop, block, header)) < 0) + return ret; + + vsir_block_list_sort(loop); + + if (TRACE_ON()) + { + vkd3d_string_buffer_printf(&cfg->debug_buffer, "Back edge %u -> %u with loop:", block->label, header->label); + + for (k = 0; k < loop->count; ++k) + vkd3d_string_buffer_printf(&cfg->debug_buffer, " %u", loop->blocks[k]->label); + + TRACE("%s\n", cfg->debug_buffer.buffer); + vkd3d_string_buffer_clear(&cfg->debug_buffer); + } + + if (cfg->loops_by_header[header->label - 1] != SIZE_MAX) + { + FIXME("Block %u is header to more than one loop, this is not implemented.\n", header->label); + vkd3d_shader_error(cfg->message_context, &header->begin->location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + "Block %u is header to more than one loop, this is not implemented.", header->label); + return VKD3D_ERROR_NOT_IMPLEMENTED; + } + + cfg->loops_by_header[header->label - 1] = cfg->loops_count; + + ++cfg->loops_count; } } - if (TRACE_ON()) - vkd3d_string_buffer_cleanup(&buf); + return VKD3D_OK; } -enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, - const struct vkd3d_shader_compile_info *compile_info) +struct vsir_cfg_node_sorter { - struct vkd3d_shader_instruction_array *instructions = &parser->program.instructions; - enum vkd3d_result result = VKD3D_OK; - - remove_dcl_temps(&parser->program); - - if ((result = instruction_array_lower_texkills(parser)) < 0) - return result; - - if (parser->shader_desc.is_dxil) + struct vsir_cfg *cfg; + struct vsir_cfg_node_sorter_stack_item { - struct vsir_cfg cfg; + struct vsir_block_list *loop; + unsigned int seen_count; + unsigned int begin; + } *stack; + size_t stack_count, stack_capacity; + struct vsir_block_list available_blocks; +}; - if ((result = lower_switch_to_if_ladder(&parser->program)) < 0) - return result; +/* Topologically sort the blocks according to the forward edges. By + * definition if the input CFG is reducible then its forward edges + * form a DAG, so a topological sorting exists. In order to compute it + * we keep an array with the incoming degree for each block and an + * available list of all the blocks whose incoming degree has reached + * zero. At each step we pick a block from the available list and + * strip it away from the graph, updating the incoming degrees and + * available list. + * + * In principle at each step we can pick whatever node we want from + * the available list, and will get a topological sort + * anyway. However, we use these two criteria to give to the computed + * order additional properties: + * + * 1. we keep track of which loops we're into, and pick blocks + * belonging to the current innermost loop, so that loops are kept + * contiguous in the order; this can always be done when the input + * CFG is reducible; + * + * 2. subject to the requirement above, we always pick the most + * recently added block to the available list, because this tends + * to keep related blocks and require fewer control flow + * primitives. + */ +static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) +{ + struct vsir_cfg_node_sorter sorter = { .cfg = cfg }; + unsigned int *in_degrees = NULL; + enum vkd3d_result ret; + size_t i; - if ((result = materialize_ssas_to_temps(parser)) < 0) - return result; + if (!(in_degrees = vkd3d_calloc(cfg->block_count, sizeof(*in_degrees)))) + return VKD3D_ERROR_OUT_OF_MEMORY; - if ((result = vsir_cfg_init(&cfg, &parser->program)) < 0) - return result; + for (i = 0; i < cfg->block_count; ++i) + { + struct vsir_block *block = &cfg->blocks[i]; - vsir_cfg_compute_dominators(&cfg); - - if ((result = simple_structurizer_run(parser)) < 0) + if (block->label == 0) { - vsir_cfg_cleanup(&cfg); - return result; + in_degrees[i] = UINT_MAX; + continue; } - vsir_cfg_cleanup(&cfg); + in_degrees[i] = block->predecessors.count; + + /* Do not count back edges. */ + if (cfg->loops_by_header[i] != SIZE_MAX) + { + assert(in_degrees[i] > 0); + in_degrees[i] -= 1; + } + + if (in_degrees[i] == 0 && block != cfg->entry) + { + WARN("Unexpected entry point %u.\n", block->label); + vkd3d_shader_error(cfg->message_context, &block->begin->location, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, + "Block %u is unreachable from the entry point.", block->label); + ret = VKD3D_ERROR_INVALID_SHADER; + goto fail; + } + } + + if (in_degrees[cfg->entry->label - 1] != 0) + { + WARN("Entry point has %u incoming forward edges.\n", in_degrees[cfg->entry->label - 1]); + vkd3d_shader_error(cfg->message_context, &cfg->entry->begin->location, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, + "The entry point block has %u incoming forward edges.", in_degrees[cfg->entry->label - 1]); + ret = VKD3D_ERROR_INVALID_SHADER; + goto fail; + } + + vsir_block_list_init(&sorter.available_blocks); + + if ((ret = vsir_block_list_add_checked(&sorter.available_blocks, cfg->entry)) < 0) + goto fail; + + while (sorter.available_blocks.count != 0) + { + struct vsir_cfg_node_sorter_stack_item *inner_stack_item = NULL; + struct vsir_block *block; + size_t new_seen_count; + + if (sorter.stack_count != 0) + inner_stack_item = &sorter.stack[sorter.stack_count - 1]; + + for (i = sorter.available_blocks.count - 1; ; --i) + { + if (i == SIZE_MAX) + { + ERR("Couldn't find any viable next block, is the input CFG reducible?\n"); + ret = VKD3D_ERROR_INVALID_SHADER; + goto fail; + } + + block = sorter.available_blocks.blocks[i]; + + if (!inner_stack_item || vsir_block_list_search(inner_stack_item->loop, block)) + break; + } + + /* If the node is a loop header, open the loop. */ + if (sorter.cfg->loops_by_header[block->label - 1] != SIZE_MAX) + { + struct vsir_block_list *loop = &sorter.cfg->loops[sorter.cfg->loops_by_header[block->label - 1]]; + + if (loop) + { + if (!vkd3d_array_reserve((void **)&sorter.stack, &sorter.stack_capacity, + sorter.stack_count + 1, sizeof(*sorter.stack))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + inner_stack_item = &sorter.stack[sorter.stack_count++]; + inner_stack_item->loop = loop; + inner_stack_item->seen_count = 0; + inner_stack_item->begin = sorter.cfg->order.count; + } + } + + vsir_block_list_remove_index(&sorter.available_blocks, i); + block->order_pos = cfg->order.count; + if ((ret = vsir_block_list_add_checked(&cfg->order, block)) < 0) + goto fail; + + /* Close loops: since each loop is a strict subset of any + * outer loop, we just need to track how many blocks we've + * seen; when I close a loop I mark the same number of seen + * blocks for the next outer loop. */ + new_seen_count = 1; + while (sorter.stack_count != 0) + { + inner_stack_item = &sorter.stack[sorter.stack_count - 1]; + + inner_stack_item->seen_count += new_seen_count; + + assert(inner_stack_item->seen_count <= inner_stack_item->loop->count); + if (inner_stack_item->seen_count != inner_stack_item->loop->count) + break; + + if ((ret = vsir_cfg_add_loop_interval(cfg, inner_stack_item->begin, + cfg->order.count, false)) < 0) + goto fail; + + new_seen_count = inner_stack_item->loop->count; + --sorter.stack_count; + } + + /* Remove (forward) edges and make new nodes available. */ + for (i = 0; i < block->successors.count; ++i) + { + struct vsir_block *successor = block->successors.blocks[i]; + + if (vsir_block_dominates(successor, block)) + continue; + + assert(in_degrees[successor->label - 1] > 0); + --in_degrees[successor->label - 1]; + + if (in_degrees[successor->label - 1] == 0) + { + if ((ret = vsir_block_list_add_checked(&sorter.available_blocks, successor)) < 0) + goto fail; + } + } + } + + if (cfg->order.count != cfg->block_count) + { + /* There is a cycle of forward edges. */ + WARN("The control flow graph is not reducible.\n"); + vkd3d_shader_error(cfg->message_context, &cfg->entry->begin->location, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, + "The control flow graph is not reducible."); + ret = VKD3D_ERROR_INVALID_SHADER; + goto fail; + } + + assert(sorter.stack_count == 0); + + vkd3d_free(in_degrees); + vkd3d_free(sorter.stack); + vsir_block_list_cleanup(&sorter.available_blocks); + + if (TRACE_ON()) + { + vkd3d_string_buffer_printf(&cfg->debug_buffer, "Block order:"); + + for (i = 0; i < cfg->order.count; ++i) + vkd3d_string_buffer_printf(&cfg->debug_buffer, " %u", cfg->order.blocks[i]->label); + + TRACE("%s\n", cfg->debug_buffer.buffer); + vkd3d_string_buffer_clear(&cfg->debug_buffer); + } + + return VKD3D_OK; + +fail: + vkd3d_free(in_degrees); + vkd3d_free(sorter.stack); + vsir_block_list_cleanup(&sorter.available_blocks); + + return ret; +} + +/* Sort loop intervals first by ascending begin time and then by + * descending end time, so that inner intervals appear after outer + * ones and disjoint intervals appear in their proper order. */ +static int compare_loop_intervals(const void *ptr1, const void *ptr2) +{ + const struct cfg_loop_interval *interval1 = ptr1; + const struct cfg_loop_interval *interval2 = ptr2; + + if (interval1->begin != interval2->begin) + return vkd3d_u32_compare(interval1->begin, interval2->begin); + + return -vkd3d_u32_compare(interval1->end, interval2->end); +} + +static enum vkd3d_result vsir_cfg_generate_synthetic_loop_intervals(struct vsir_cfg *cfg) +{ + enum vkd3d_result ret; + size_t i, j, k; + + for (i = 0; i < cfg->block_count; ++i) + { + struct vsir_block *block = &cfg->blocks[i]; + + if (block->label == 0) + continue; + + for (j = 0; j < block->successors.count; ++j) + { + struct vsir_block *successor = block->successors.blocks[j]; + struct cfg_loop_interval *extend = NULL; + unsigned int begin; + enum + { + ACTION_DO_NOTHING, + ACTION_CREATE_NEW, + ACTION_EXTEND, + } action = ACTION_CREATE_NEW; + + /* We've already contructed loop intervals for the back + * edges, there's nothing more to do. */ + if (vsir_block_dominates(successor, block)) + continue; + + assert(block->order_pos < successor->order_pos); + + /* Jumping from a block to the following one is always + * possible, so nothing to do. */ + if (block->order_pos + 1 == successor->order_pos) + continue; + + /* Let's look for a loop interval that already breaks at + * `successor' and either contains or can be extended to + * contain `block'. */ + for (k = 0; k < cfg->loop_interval_count; ++k) + { + struct cfg_loop_interval *interval = &cfg->loop_intervals[k]; + + if (interval->end != successor->order_pos) + continue; + + if (interval->begin <= block->order_pos) + { + action = ACTION_DO_NOTHING; + break; + } + + if (interval->synthetic) + { + action = ACTION_EXTEND; + extend = interval; + break; + } + } + + if (action == ACTION_DO_NOTHING) + continue; + + /* Ok, we have to decide where the new or replacing + * interval has to begin. These are the rules: 1. it must + * begin before `block'; 2. intervals must be properly + * nested; 3. the new interval should begin as late as + * possible, to limit control flow depth and extension. */ + begin = block->order_pos; + + /* Our candidate interval is always [begin, + * successor->order_pos), and we move `begin' backward + * until the candidate interval contains all the intervals + * whose endpoint lies in the candidate interval + * itself. */ + for (k = 0; k < cfg->loop_interval_count; ++k) + { + struct cfg_loop_interval *interval = &cfg->loop_intervals[k]; + + if (begin < interval->end && interval->end < successor->order_pos) + begin = min(begin, interval->begin); + } + + /* New we have to care about the intervals whose begin + * point lies in the candidate interval. We cannot move + * the candidate interval endpoint, because it is + * important that the loop break target matches + * `successor'. So we have to move that interval's begin + * point to the begin point of the candidate interval, + * i.e. `begin'. But what if the interval we should extend + * backward is not synthetic? This cannot happen, + * fortunately, because it would mean that there is a jump + * entering a loop via a block which is not the loop + * header, so the CFG would not be reducible. */ + for (k = 0; k < cfg->loop_interval_count; ++k) + { + struct cfg_loop_interval *interval = &cfg->loop_intervals[k]; + + if (interval->begin < successor->order_pos && successor->order_pos < interval->end) + { + if (interval->synthetic) + interval->begin = min(begin, interval->begin); + assert(begin >= interval->begin); + } + } + + if (action == ACTION_EXTEND) + extend->begin = begin; + else if ((ret = vsir_cfg_add_loop_interval(cfg, begin, successor->order_pos, true)) < 0) + return ret; + } + } + + qsort(cfg->loop_intervals, cfg->loop_interval_count, sizeof(*cfg->loop_intervals), compare_loop_intervals); + + if (TRACE_ON()) + for (i = 0; i < cfg->loop_interval_count; ++i) + TRACE("%s loop interval %u - %u\n", cfg->loop_intervals[i].synthetic ? "Synthetic" : "Natural", + cfg->loop_intervals[i].begin, cfg->loop_intervals[i].end); + + return VKD3D_OK; +} + +struct vsir_cfg_edge_action +{ + enum vsir_cfg_jump_type jump_type; + unsigned int target; + struct vsir_block *successor; +}; + +static void vsir_cfg_compute_edge_action(struct vsir_cfg *cfg, struct vsir_block *block, + struct vsir_block *successor, struct vsir_cfg_edge_action *action) +{ + unsigned int i; + + action->target = UINT_MAX; + action->successor = successor; + + if (successor->order_pos <= block->order_pos) + { + /* The successor is before the current block, so we have to + * use `continue'. The target loop is the innermost that + * contains the current block and has the successor as + * `continue' target. */ + for (i = 0; i < cfg->loop_interval_count; ++i) + { + struct cfg_loop_interval *interval = &cfg->loop_intervals[i]; + + if (interval->begin == successor->order_pos && block->order_pos < interval->end) + action->target = i; + + if (interval->begin > successor->order_pos) + break; + } + + assert(action->target != UINT_MAX); + action->jump_type = JUMP_CONTINUE; } else { - if (parser->program.shader_version.type != VKD3D_SHADER_TYPE_PIXEL) + /* The successor is after the current block, so we have to use + * `break', or possibly just jump to the following block. The + * target loop is the outermost that contains the current + * block and has the successor as `break' target. */ + for (i = 0; i < cfg->loop_interval_count; ++i) { - if ((result = remap_output_signature(parser, compile_info)) < 0) - return result; + struct cfg_loop_interval *interval = &cfg->loop_intervals[i]; + + if (interval->begin <= block->order_pos && interval->end == successor->order_pos) + { + action->target = i; + break; + } } - if (parser->program.shader_version.type == VKD3D_SHADER_TYPE_HULL) + if (action->target == UINT_MAX) { - if ((result = instruction_array_flatten_hull_shader_phases(instructions)) < 0) - return result; + assert(successor->order_pos == block->order_pos + 1); + action->jump_type = JUMP_NONE; + } + else + { + action->jump_type = JUMP_BREAK; + } + } +} - if ((result = instruction_array_normalise_hull_shader_control_point_io(instructions, - &parser->shader_desc.input_signature)) < 0) - return result; +static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg) +{ + unsigned int i, stack_depth = 1, open_interval_idx = 0; + struct vsir_cfg_structure_list **stack = NULL; + + /* It's enough to allocate up to the maximum interval stacking + * depth (plus one for the full program), but this is simpler. */ + if (!(stack = vkd3d_calloc(cfg->loop_interval_count + 1, sizeof(*stack)))) + goto fail; + cfg->structured_program.end = cfg->order.count; + stack[0] = &cfg->structured_program; + + for (i = 0; i < cfg->order.count; ++i) + { + struct vsir_block *block = cfg->order.blocks[i]; + struct vsir_cfg_structure *structure; + + assert(stack_depth > 0); + + /* Open loop intervals. */ + while (open_interval_idx < cfg->loop_interval_count) + { + struct cfg_loop_interval *interval = &cfg->loop_intervals[open_interval_idx]; + + if (interval->begin != i) + break; + + if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_LOOP))) + goto fail; + structure->u.loop.idx = open_interval_idx++; + + structure->u.loop.body.end = interval->end; + stack[stack_depth++] = &structure->u.loop.body; } - if ((result = shader_normalise_io_registers(parser)) < 0) - return result; + /* Execute the block. */ + if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_BLOCK))) + goto fail; + structure->u.block = block; - if ((result = instruction_array_normalise_flat_constants(&parser->program)) < 0) - return result; + /* Generate between zero and two jump instructions. */ + switch (block->end->handler_idx) + { + case VKD3DSIH_BRANCH: + { + struct vsir_cfg_edge_action action_true, action_false; + bool invert_condition = false; - remove_dead_code(&parser->program); + if (vsir_register_is_label(&block->end->src[0].reg)) + { + unsigned int target = label_from_src_param(&block->end->src[0]); + struct vsir_block *successor = &cfg->blocks[target - 1]; - if ((result = normalise_combined_samplers(parser)) < 0) - return result; + vsir_cfg_compute_edge_action(cfg, block, successor, &action_true); + action_false = action_true; + } + else + { + unsigned int target = label_from_src_param(&block->end->src[1]); + struct vsir_block *successor = &cfg->blocks[target - 1]; + + vsir_cfg_compute_edge_action(cfg, block, successor, &action_true); + + target = label_from_src_param(&block->end->src[2]); + successor = &cfg->blocks[target - 1]; + + vsir_cfg_compute_edge_action(cfg, block, successor, &action_false); + } + + /* This will happen if the branch is unconditional, + * but also if it's conditional with the same target + * in both branches, which can happen in some corner + * cases, e.g. when converting switch instructions to + * selection ladders. */ + if (action_true.successor == action_false.successor) + { + assert(action_true.jump_type == action_false.jump_type); + } + else + { + /* At most one branch can just fall through to the + * next block, in which case we make sure it's the + * false branch. */ + if (action_true.jump_type == JUMP_NONE) + { + invert_condition = true; + } + else if (stack_depth >= 2) + { + struct vsir_cfg_structure_list *inner_loop_frame = stack[stack_depth - 2]; + struct vsir_cfg_structure *inner_loop = &inner_loop_frame->structures[inner_loop_frame->count - 1]; + + assert(inner_loop->type == STRUCTURE_TYPE_LOOP); + + /* Otherwise, if one of the branches is + * continueing the inner loop we're inside, + * make sure it's the false branch (because it + * will be optimized out later). */ + if (action_true.jump_type == JUMP_CONTINUE && action_true.target == inner_loop->u.loop.idx) + invert_condition = true; + } + + if (invert_condition) + { + struct vsir_cfg_edge_action tmp = action_true; + action_true = action_false; + action_false = tmp; + } + + assert(action_true.jump_type != JUMP_NONE); + + if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_JUMP))) + goto fail; + structure->u.jump.type = action_true.jump_type; + structure->u.jump.target = action_true.target; + structure->u.jump.condition = &block->end->src[0]; + structure->u.jump.invert_condition = invert_condition; + } + + if (action_false.jump_type != JUMP_NONE) + { + if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_JUMP))) + goto fail; + structure->u.jump.type = action_false.jump_type; + structure->u.jump.target = action_false.target; + } + break; + } + + case VKD3DSIH_RET: + if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_JUMP))) + goto fail; + structure->u.jump.type = JUMP_RET; + break; + + default: + vkd3d_unreachable(); + } + + /* Close loop intervals. */ + while (stack_depth > 0) + { + if (stack[stack_depth - 1]->end != i + 1) + break; + + --stack_depth; + } } - if ((result = flatten_control_flow_constructs(parser)) < 0) - return result; + assert(stack_depth == 0); + assert(open_interval_idx == cfg->loop_interval_count); if (TRACE_ON()) - vkd3d_shader_trace(&parser->program); + vsir_cfg_dump_structured_program(cfg); - if (!parser->failed && (result = vsir_validate(parser)) < 0) - return result; + vkd3d_free(stack); - if (parser->failed) - result = VKD3D_ERROR_INVALID_SHADER; + return VKD3D_OK; - return result; +fail: + vkd3d_free(stack); + + return VKD3D_ERROR_OUT_OF_MEMORY; +} + +static void vsir_cfg_remove_trailing_continue(struct vsir_cfg *cfg, + struct vsir_cfg_structure_list *list, unsigned int target) +{ + struct vsir_cfg_structure *last = &list->structures[list->count - 1]; + + if (last->type == STRUCTURE_TYPE_JUMP && last->u.jump.type == JUMP_CONTINUE + && !last->u.jump.condition && last->u.jump.target == target) + { + --list->count; + assert(cfg->loop_intervals[target].target_count > 0); + --cfg->loop_intervals[target].target_count; + } +} + +static struct vsir_cfg_structure *vsir_cfg_get_trailing_break(struct vsir_cfg_structure_list *list) +{ + struct vsir_cfg_structure *structure; + size_t count = list->count; + + if (count == 0) + return NULL; + + structure = &list->structures[count - 1]; + + if (structure->type != STRUCTURE_TYPE_JUMP || structure->u.jump.type != JUMP_BREAK + || structure->u.jump.condition) + return NULL; + + return structure; +} + +/* When the last instruction in both branches of a selection construct + * is an unconditional break, any of them can be moved after the + * selection construct. If they break the same loop both of them can + * be moved out, otherwise we can choose which one: we choose the one + * that breaks the innermost loop, because we hope to eventually + * remove the loop itself. + * + * In principle a similar movement could be done when the last + * instructions are continue and continue, or continue and break. But + * in practice I don't think those situations can happen given the + * previous passes we do on the program, so we don't care. */ +static enum vkd3d_result vsir_cfg_move_breaks_out_of_selections(struct vsir_cfg *cfg, + struct vsir_cfg_structure_list *list) +{ + struct vsir_cfg_structure *selection, *if_break, *else_break, *new_break; + unsigned int if_target, else_target, max_target; + size_t pos = list->count - 1; + + selection = &list->structures[pos]; + assert(selection->type == STRUCTURE_TYPE_SELECTION); + + if_break = vsir_cfg_get_trailing_break(&selection->u.selection.if_body); + else_break = vsir_cfg_get_trailing_break(&selection->u.selection.else_body); + + if (!if_break || !else_break) + return VKD3D_OK; + + if_target = if_break->u.jump.target; + else_target = else_break->u.jump.target; + max_target = max(if_target, else_target); + + if (!(new_break = vsir_cfg_structure_list_append(list, STRUCTURE_TYPE_JUMP))) + return VKD3D_ERROR_OUT_OF_MEMORY; + new_break->u.jump.type = JUMP_BREAK; + new_break->u.jump.target = max_target; + ++cfg->loop_intervals[max_target].target_count; + + /* Pointer `selection' could have been invalidated by the append + * operation. */ + selection = &list->structures[pos]; + assert(selection->type == STRUCTURE_TYPE_SELECTION); + + if (if_target == max_target) + { + --selection->u.selection.if_body.count; + assert(cfg->loop_intervals[if_target].target_count > 0); + --cfg->loop_intervals[if_target].target_count; + } + + if (else_target == max_target) + { + --selection->u.selection.else_body.count; + assert(cfg->loop_intervals[else_target].target_count > 0); + --cfg->loop_intervals[else_target].target_count; + } + + /* If a branch becomes empty, make it the else branch, so we save a block. */ + if (selection->u.selection.if_body.count == 0) + { + struct vsir_cfg_structure_list tmp; + + selection->u.selection.invert_condition = !selection->u.selection.invert_condition; + tmp = selection->u.selection.if_body; + selection->u.selection.if_body = selection->u.selection.else_body; + selection->u.selection.else_body = tmp; + } + + return VKD3D_OK; +} + +static enum vkd3d_result vsir_cfg_move_breaks_out_of_selections_recursively(struct vsir_cfg *cfg, + struct vsir_cfg_structure_list *list) +{ + struct vsir_cfg_structure *trailing; + + if (list->count == 0) + return VKD3D_OK; + + trailing = &list->structures[list->count - 1]; + + if (trailing->type != STRUCTURE_TYPE_SELECTION) + return VKD3D_OK; + + vsir_cfg_move_breaks_out_of_selections_recursively(cfg, &trailing->u.selection.if_body); + vsir_cfg_move_breaks_out_of_selections_recursively(cfg, &trailing->u.selection.else_body); + + return vsir_cfg_move_breaks_out_of_selections(cfg, list); +} + +static enum vkd3d_result vsir_cfg_synthesize_selections(struct vsir_cfg *cfg, + struct vsir_cfg_structure_list *list) +{ + enum vkd3d_result ret; + size_t i; + + for (i = 0; i < list->count; ++i) + { + struct vsir_cfg_structure *structure = &list->structures[i], new_selection, *new_jump; + + if (structure->type != STRUCTURE_TYPE_JUMP || !structure->u.jump.condition) + continue; + + vsir_cfg_structure_init(&new_selection, STRUCTURE_TYPE_SELECTION); + new_selection.u.selection.condition = structure->u.jump.condition; + new_selection.u.selection.invert_condition = structure->u.jump.invert_condition; + + if (!(new_jump = vsir_cfg_structure_list_append(&new_selection.u.selection.if_body, + STRUCTURE_TYPE_JUMP))) + return VKD3D_ERROR_OUT_OF_MEMORY; + new_jump->u.jump.type = structure->u.jump.type; + new_jump->u.jump.target = structure->u.jump.target; + + /* Move the rest of the structure list in the else branch + * rather than leaving it after the selection construct. The + * reason is that this is more conducive to further + * optimization, because all the conditional `break's appear + * as the last instruction of a branch of a cascade of + * selection constructs at the end of the structure list we're + * processing, instead of being buried in the middle of the + * structure list itself. */ + if ((ret = vsir_cfg_structure_list_append_from_region(&new_selection.u.selection.else_body, + &list->structures[i + 1], list->count - i - 1)) < 0) + return ret; + + *structure = new_selection; + list->count = i + 1; + + if ((ret = vsir_cfg_synthesize_selections(cfg, &structure->u.selection.else_body)) < 0) + return ret; + + if ((ret = vsir_cfg_move_breaks_out_of_selections(cfg, list)) < 0) + return ret; + + break; + } + + return VKD3D_OK; +} + +static enum vkd3d_result vsir_cfg_append_loop(struct vsir_cfg *cfg, + struct vsir_cfg_structure_list *new_list, struct vsir_cfg_structure *loop) +{ + struct vsir_cfg_structure_list *loop_body = &loop->u.loop.body; + unsigned int target, loop_idx = loop->u.loop.idx; + struct vsir_cfg_structure *trailing_break; + enum vkd3d_result ret; + + trailing_break = vsir_cfg_get_trailing_break(loop_body); + + /* If the loop's last instruction is not a break, we cannot remove + * the loop itself. */ + if (!trailing_break) + { + if ((ret = vsir_cfg_structure_list_append_from_region(new_list, loop, 1)) < 0) + return ret; + memset(loop, 0, sizeof(*loop)); + return VKD3D_OK; + } + + target = trailing_break->u.jump.target; + assert(cfg->loop_intervals[target].target_count > 0); + + /* If the loop is not targeted by any jump, we can remove it. The + * trailing `break' then targets another loop, so we have to keep + * it. */ + if (cfg->loop_intervals[loop_idx].target_count == 0) + { + if ((ret = vsir_cfg_structure_list_append_from_region(new_list, + &loop_body->structures[0], loop_body->count)) < 0) + return ret; + loop_body->count = 0; + return VKD3D_OK; + } + + /* If the loop is targeted only by its own trailing `break' + * instruction, then we can remove it together with the `break' + * itself. */ + if (target == loop_idx && cfg->loop_intervals[loop_idx].target_count == 1) + { + --cfg->loop_intervals[loop_idx].target_count; + if ((ret = vsir_cfg_structure_list_append_from_region(new_list, + &loop_body->structures[0], loop_body->count - 1)) < 0) + return ret; + loop_body->count = 0; + return VKD3D_OK; + } + + if ((ret = vsir_cfg_structure_list_append_from_region(new_list, loop, 1)) < 0) + return ret; + memset(loop, 0, sizeof(*loop)); + + return VKD3D_OK; +} + +static enum vkd3d_result vsir_cfg_optimize_recurse(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list) +{ + struct vsir_cfg_structure_list old_list = *list, *new_list = list; + enum vkd3d_result ret; + size_t i; + + memset(new_list, 0, sizeof(*new_list)); + + for (i = 0; i < old_list.count; ++i) + { + struct vsir_cfg_structure *loop = &old_list.structures[i], *selection; + struct vsir_cfg_structure_list *loop_body; + + if (loop->type != STRUCTURE_TYPE_LOOP) + { + if ((ret = vsir_cfg_structure_list_append_from_region(new_list, loop, 1)) < 0) + goto out; + memset(loop, 0, sizeof(*loop)); + continue; + } + + loop_body = &loop->u.loop.body; + + if (loop_body->count == 0) + { + if ((ret = vsir_cfg_structure_list_append_from_region(new_list, loop, 1)) < 0) + goto out; + memset(loop, 0, sizeof(*loop)); + continue; + } + + vsir_cfg_remove_trailing_continue(cfg, loop_body, loop->u.loop.idx); + + if ((ret = vsir_cfg_optimize_recurse(cfg, loop_body)) < 0) + goto out; + + if ((ret = vsir_cfg_synthesize_selections(cfg, loop_body)) < 0) + goto out; + + if ((ret = vsir_cfg_append_loop(cfg, new_list, loop)) < 0) + goto out; + + /* If the last pushed instruction is a selection and one of the branches terminates with a + * `break', start pushing to the other branch, in the hope of eventually push a `break' + * there too and be able to remove a loop. */ + if (new_list->count == 0) + continue; + + selection = &new_list->structures[new_list->count - 1]; + + if (selection->type == STRUCTURE_TYPE_SELECTION) + { + if (vsir_cfg_get_trailing_break(&selection->u.selection.if_body)) + new_list = &selection->u.selection.else_body; + else if (vsir_cfg_get_trailing_break(&selection->u.selection.else_body)) + new_list = &selection->u.selection.if_body; + } + } + + ret = vsir_cfg_move_breaks_out_of_selections_recursively(cfg, list); + +out: + vsir_cfg_structure_list_cleanup(&old_list); + + return ret; +} + +static void vsir_cfg_count_targets(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list) +{ + size_t i; + + for (i = 0; i < list->count; ++i) + { + struct vsir_cfg_structure *structure = &list->structures[i]; + + switch (structure->type) + { + case STRUCTURE_TYPE_BLOCK: + break; + + case STRUCTURE_TYPE_LOOP: + vsir_cfg_count_targets(cfg, &structure->u.loop.body); + break; + + case STRUCTURE_TYPE_SELECTION: + vsir_cfg_count_targets(cfg, &structure->u.selection.if_body); + vsir_cfg_count_targets(cfg, &structure->u.selection.else_body); + break; + + case STRUCTURE_TYPE_JUMP: + if (structure->u.jump.type == JUMP_BREAK || structure->u.jump.type == JUMP_CONTINUE) + ++cfg->loop_intervals[structure->u.jump.target].target_count; + break; + } + } +} + +/* Trampolines are code gadgets used to emulate multilevel jumps (which are not natively supported + * by SPIR-V). A trampoline is inserted just after a loop and checks whether control has reached the + * intended site (i.e., we just jumped out of the target block) or if other levels of jumping are + * needed. For each jump a trampoline is required for all the loops between the jump itself and the + * target loop, excluding the target loop itself. */ +static void vsir_cfg_mark_trampolines(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list, + struct vsir_cfg_structure *loop) +{ + size_t i; + + for (i = 0; i < list->count; ++i) + { + struct vsir_cfg_structure *structure = &list->structures[i]; + + switch (structure->type) + { + case STRUCTURE_TYPE_BLOCK: + break; + + case STRUCTURE_TYPE_LOOP: + structure->u.loop.outer_loop = loop; + vsir_cfg_mark_trampolines(cfg, &structure->u.loop.body, structure); + break; + + case STRUCTURE_TYPE_SELECTION: + vsir_cfg_mark_trampolines(cfg, &structure->u.selection.if_body, loop); + vsir_cfg_mark_trampolines(cfg, &structure->u.selection.else_body, loop); + break; + + case STRUCTURE_TYPE_JUMP: + { + struct vsir_cfg_structure *l; + if (structure->u.jump.type != JUMP_BREAK && structure->u.jump.type != JUMP_CONTINUE) + break; + for (l = loop; l && l->u.loop.idx != structure->u.jump.target; l = l->u.loop.outer_loop) + { + assert(l->type == STRUCTURE_TYPE_LOOP); + l->u.loop.needs_trampoline = true; + } + break; + } + } + } +} + +/* Launchers are the counterpart of trampolines. A launcher is inserted just before a jump, and + * writes in a well-known variable what is the target of the jump. Trampolines will then read that + * variable to decide how to redirect the jump to its intended target. A launcher is needed each + * time the innermost loop containing the jump itself has a trampoline (independently of whether the + * jump is targeting that loop or not). */ +static void vsir_cfg_mark_launchers(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list, + struct vsir_cfg_structure *loop) +{ + size_t i; + + for (i = 0; i < list->count; ++i) + { + struct vsir_cfg_structure *structure = &list->structures[i]; + + switch (structure->type) + { + case STRUCTURE_TYPE_BLOCK: + break; + + case STRUCTURE_TYPE_LOOP: + vsir_cfg_mark_launchers(cfg, &structure->u.loop.body, structure); + break; + + case STRUCTURE_TYPE_SELECTION: + vsir_cfg_mark_launchers(cfg, &structure->u.selection.if_body, loop); + vsir_cfg_mark_launchers(cfg, &structure->u.selection.else_body, loop); + break; + + case STRUCTURE_TYPE_JUMP: + if (structure->u.jump.type != JUMP_BREAK && structure->u.jump.type != JUMP_CONTINUE) + break; + assert(loop && loop->type == STRUCTURE_TYPE_LOOP); + if (loop->u.loop.needs_trampoline) + structure->u.jump.needs_launcher = true; + break; + } + } +} + +static enum vkd3d_result vsir_cfg_optimize(struct vsir_cfg *cfg) +{ + enum vkd3d_result ret; + + vsir_cfg_count_targets(cfg, &cfg->structured_program); + + ret = vsir_cfg_optimize_recurse(cfg, &cfg->structured_program); + + /* Trampolines and launchers cannot be marked with the same pass, + * because a jump might have to be marked as launcher even when it + * targets its innermost loop, if other jumps in the same loop + * need a trampoline anyway. So launchers can be discovered only + * once all the trampolines are known. */ + vsir_cfg_mark_trampolines(cfg, &cfg->structured_program, NULL); + vsir_cfg_mark_launchers(cfg, &cfg->structured_program, NULL); + + if (TRACE_ON()) + vsir_cfg_dump_structured_program(cfg); + + return ret; +} + +static enum vkd3d_result vsir_cfg_structure_list_emit(struct vsir_cfg *cfg, + struct vsir_cfg_structure_list *list, unsigned int loop_idx); + +static enum vkd3d_result vsir_cfg_structure_list_emit_block(struct vsir_cfg *cfg, + struct vsir_block *block) +{ + struct vsir_cfg_emit_target *target = cfg->target; + + if (!reserve_instructions(&target->instructions, &target->ins_capacity, + target->ins_count + (block->end - block->begin))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + memcpy(&target->instructions[target->ins_count], block->begin, + (char *)block->end - (char *)block->begin); + + target->ins_count += block->end - block->begin; + + return VKD3D_OK; +} + +static enum vkd3d_result vsir_cfg_structure_list_emit_loop(struct vsir_cfg *cfg, + struct vsir_cfg_structure_loop *loop, unsigned int loop_idx) +{ + struct vsir_cfg_emit_target *target = cfg->target; + const struct vkd3d_shader_location no_loc = {0}; + enum vkd3d_result ret; + + if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + vsir_instruction_init(&target->instructions[target->ins_count++], &no_loc, VKD3DSIH_LOOP); + + if ((ret = vsir_cfg_structure_list_emit(cfg, &loop->body, loop->idx)) < 0) + return ret; + + if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 5)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + vsir_instruction_init(&target->instructions[target->ins_count++], &no_loc, VKD3DSIH_ENDLOOP); + + /* Add a trampoline to implement multilevel jumping depending on the stored + * jump_target value. */ + if (loop->needs_trampoline) + { + /* If the multilevel jump is a `continue' and the target is the loop we're inside + * right now, then we can finally do the `continue'. */ + const unsigned int outer_continue_target = loop_idx << 1 | 1; + /* If the multilevel jump is a `continue' to any other target, or if it is a `break' + * and the target is not the loop we just finished emitting, then it means that + * we have to reach an outer loop, so we keep breaking. */ + const unsigned int inner_break_target = loop->idx << 1; + + if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], + &no_loc, VKD3DSIH_IEQ, 1, 2)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + dst_param_init_temp_bool(&target->instructions[target->ins_count].dst[0], target->temp_count); + src_param_init_temp_uint(&target->instructions[target->ins_count].src[0], target->jump_target_temp_idx); + src_param_init_const_uint(&target->instructions[target->ins_count].src[1], outer_continue_target); + + ++target->ins_count; + + if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], + &no_loc, VKD3DSIH_CONTINUEP, 0, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + src_param_init_temp_bool(&target->instructions[target->ins_count].src[0], target->temp_count); + + ++target->ins_count; + ++target->temp_count; + + if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], + &no_loc, VKD3DSIH_IEQ, 1, 2)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + dst_param_init_temp_bool(&target->instructions[target->ins_count].dst[0], target->temp_count); + src_param_init_temp_uint(&target->instructions[target->ins_count].src[0], target->jump_target_temp_idx); + src_param_init_const_uint(&target->instructions[target->ins_count].src[1], inner_break_target); + + ++target->ins_count; + + if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], + &no_loc, VKD3DSIH_BREAKP, 0, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + target->instructions[target->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z; + + src_param_init_temp_bool(&target->instructions[target->ins_count].src[0], target->temp_count); + + ++target->ins_count; + ++target->temp_count; + } + + return VKD3D_OK; +} + +static enum vkd3d_result vsir_cfg_structure_list_emit_selection(struct vsir_cfg *cfg, + struct vsir_cfg_structure_selection *selection, unsigned int loop_idx) +{ + struct vsir_cfg_emit_target *target = cfg->target; + const struct vkd3d_shader_location no_loc = {0}; + enum vkd3d_result ret; + + if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], + &no_loc, VKD3DSIH_IF, 0, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + target->instructions[target->ins_count].src[0] = *selection->condition; + + if (selection->invert_condition) + target->instructions[target->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z; + + ++target->ins_count; + + if ((ret = vsir_cfg_structure_list_emit(cfg, &selection->if_body, loop_idx)) < 0) + return ret; + + if (selection->else_body.count != 0) + { + if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + vsir_instruction_init(&target->instructions[target->ins_count++], &no_loc, VKD3DSIH_ELSE); + + if ((ret = vsir_cfg_structure_list_emit(cfg, &selection->else_body, loop_idx)) < 0) + return ret; + } + + if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + vsir_instruction_init(&target->instructions[target->ins_count++], &no_loc, VKD3DSIH_ENDIF); + + return VKD3D_OK; +} + +static enum vkd3d_result vsir_cfg_structure_list_emit_jump(struct vsir_cfg *cfg, + struct vsir_cfg_structure_jump *jump, unsigned int loop_idx) +{ + struct vsir_cfg_emit_target *target = cfg->target; + const struct vkd3d_shader_location no_loc = {0}; + /* Encode the jump target as the loop index plus a bit to remember whether + * we're breaking or continueing. */ + unsigned int jump_target = jump->target << 1; + enum vkd3d_shader_opcode opcode; + + switch (jump->type) + { + case JUMP_CONTINUE: + /* If we're continueing the loop we're directly inside, then we can emit a + * `continue'. Otherwise we first have to break all the loops between here + * and the loop to continue, recording our intention to continue + * in the lowest bit of jump_target. */ + if (jump->target == loop_idx) + { + opcode = jump->condition ? VKD3DSIH_CONTINUEP : VKD3DSIH_CONTINUE; + break; + } + jump_target |= 1; + /* fall through */ + + case JUMP_BREAK: + opcode = jump->condition ? VKD3DSIH_BREAKP : VKD3DSIH_BREAK; + break; + + case JUMP_RET: + assert(!jump->condition); + opcode = VKD3DSIH_RET; + break; + + default: + vkd3d_unreachable(); + } + + if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 2)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + if (jump->needs_launcher) + { + if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], + &no_loc, VKD3DSIH_MOV, 1, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + dst_param_init_temp_uint(&target->instructions[target->ins_count].dst[0], target->jump_target_temp_idx); + src_param_init_const_uint(&target->instructions[target->ins_count].src[0], jump_target); + + ++target->ins_count; + } + + if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], + &no_loc, opcode, 0, !!jump->condition)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + if (jump->invert_condition) + target->instructions[target->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z; + + if (jump->condition) + target->instructions[target->ins_count].src[0] = *jump->condition; + + ++target->ins_count; + + return VKD3D_OK; +} + +static enum vkd3d_result vsir_cfg_structure_list_emit(struct vsir_cfg *cfg, + struct vsir_cfg_structure_list *list, unsigned int loop_idx) +{ + enum vkd3d_result ret; + size_t i; + + for (i = 0; i < list->count; ++i) + { + struct vsir_cfg_structure *structure = &list->structures[i]; + + switch (structure->type) + { + case STRUCTURE_TYPE_BLOCK: + if ((ret = vsir_cfg_structure_list_emit_block(cfg, structure->u.block)) < 0) + return ret; + break; + + case STRUCTURE_TYPE_LOOP: + if ((ret = vsir_cfg_structure_list_emit_loop(cfg, &structure->u.loop, loop_idx)) < 0) + return ret; + break; + + case STRUCTURE_TYPE_SELECTION: + if ((ret = vsir_cfg_structure_list_emit_selection(cfg, &structure->u.selection, + loop_idx)) < 0) + return ret; + break; + + case STRUCTURE_TYPE_JUMP: + if ((ret = vsir_cfg_structure_list_emit_jump(cfg, &structure->u.jump, + loop_idx)) < 0) + return ret; + break; + + default: + vkd3d_unreachable(); + } + } + + return VKD3D_OK; +} + +static enum vkd3d_result vsir_cfg_emit_structured_program(struct vsir_cfg *cfg) +{ + return vsir_cfg_structure_list_emit(cfg, &cfg->structured_program, UINT_MAX); +} + +static enum vkd3d_result vsir_program_structurize_function(struct vsir_program *program, + struct vkd3d_shader_message_context *message_context, struct vsir_cfg_emit_target *target, + size_t *pos) +{ + enum vkd3d_result ret; + struct vsir_cfg cfg; + + if ((ret = vsir_cfg_init(&cfg, program, message_context, target, pos)) < 0) + return ret; + + vsir_cfg_compute_dominators(&cfg); + + if ((ret = vsir_cfg_compute_loops(&cfg)) < 0) + goto out; + + if ((ret = vsir_cfg_sort_nodes(&cfg)) < 0) + goto out; + + if ((ret = vsir_cfg_generate_synthetic_loop_intervals(&cfg)) < 0) + goto out; + + if ((ret = vsir_cfg_build_structured_program(&cfg)) < 0) + goto out; + + if ((ret = vsir_cfg_optimize(&cfg)) < 0) + goto out; + + ret = vsir_cfg_emit_structured_program(&cfg); + +out: + vsir_cfg_cleanup(&cfg); + + return ret; +} + +static enum vkd3d_result vsir_program_structurize(struct vsir_program *program, + struct vkd3d_shader_message_context *message_context) +{ + struct vsir_cfg_emit_target target = {0}; + enum vkd3d_result ret; + size_t i; + + target.jump_target_temp_idx = program->temp_count; + target.temp_count = program->temp_count + 1; + + if (!reserve_instructions(&target.instructions, &target.ins_capacity, program->instructions.count)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + for (i = 0; i < program->instructions.count;) + { + struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + + switch (ins->handler_idx) + { + case VKD3DSIH_LABEL: + assert(program->shader_version.type != VKD3D_SHADER_TYPE_HULL); + TRACE("Structurizing a non-hull shader.\n"); + if ((ret = vsir_program_structurize_function(program, message_context, + &target, &i)) < 0) + goto fail; + assert(i == program->instructions.count); + break; + + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_HS_JOIN_PHASE: + assert(program->shader_version.type == VKD3D_SHADER_TYPE_HULL); + TRACE("Structurizing phase %u of a hull shader.\n", ins->handler_idx); + target.instructions[target.ins_count++] = *ins; + ++i; + if ((ret = vsir_program_structurize_function(program, message_context, + &target, &i)) < 0) + goto fail; + break; + + default: + if (!reserve_instructions(&target.instructions, &target.ins_capacity, target.ins_count + 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + target.instructions[target.ins_count++] = *ins; + ++i; + break; + } + } + + vkd3d_free(program->instructions.elements); + program->instructions.elements = target.instructions; + program->instructions.capacity = target.ins_capacity; + program->instructions.count = target.ins_count; + program->temp_count = target.temp_count; + + return VKD3D_OK; + +fail: + vkd3d_free(target.instructions); + + return ret; +} + +static void register_map_undominated_use(struct vkd3d_shader_register *reg, struct ssas_to_temps_alloc *alloc, + struct vsir_block *block, struct vsir_block **origin_blocks) +{ + unsigned int i; + + if (register_is_ssa(reg)) + { + i = reg->idx[0].offset; + if (alloc->table[i] == UINT_MAX && !vsir_block_dominates(origin_blocks[i], block)) + alloc->table[i] = alloc->next_temp_idx++; + } + + for (i = 0; i < reg->idx_count; ++i) + if (reg->idx[i].rel_addr) + register_map_undominated_use(®->idx[i].rel_addr->reg, alloc, block, origin_blocks); +} + +/* Drivers are not necessarily optimised to handle very large numbers of temps. For example, + * using them only where necessary fixes stuttering issues in Horizon Zero Dawn on RADV. + * This can also result in the backend emitting less code because temps typically need an + * access chain and a load/store. Conversion of phi SSA values to temps should eliminate all + * undominated SSA use, but structurisation may create new occurrences. */ +static enum vkd3d_result vsir_cfg_materialize_undominated_ssas_to_temps(struct vsir_cfg *cfg) +{ + struct vsir_program *program = cfg->program; + struct ssas_to_temps_alloc alloc = {0}; + struct vsir_block **origin_blocks; + unsigned int j; + size_t i; + + if (!(origin_blocks = vkd3d_calloc(program->ssa_count, sizeof(*origin_blocks)))) + { + ERR("Failed to allocate origin block array.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + if (!ssas_to_temps_alloc_init(&alloc, program->ssa_count, program->temp_count)) + { + vkd3d_free(origin_blocks); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + for (i = 0; i < cfg->block_count; ++i) + { + struct vsir_block *block = &cfg->blocks[i]; + struct vkd3d_shader_instruction *ins; + + if (block->label == 0) + continue; + + for (ins = block->begin; ins <= block->end; ++ins) + { + for (j = 0; j < ins->dst_count; ++j) + { + if (register_is_ssa(&ins->dst[j].reg)) + origin_blocks[ins->dst[j].reg.idx[0].offset] = block; + } + } + } + + for (i = 0; i < cfg->block_count; ++i) + { + struct vsir_block *block = &cfg->blocks[i]; + struct vkd3d_shader_instruction *ins; + + if (block->label == 0) + continue; + + for (ins = block->begin; ins <= block->end; ++ins) + { + for (j = 0; j < ins->src_count; ++j) + register_map_undominated_use(&ins->src[j].reg, &alloc, block, origin_blocks); + } + } + + if (alloc.next_temp_idx == program->temp_count) + goto done; + + TRACE("Emitting temps for %u values with undominated usage.\n", alloc.next_temp_idx - program->temp_count); + + for (i = cfg->function_begin; i < cfg->function_end; ++i) + { + struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + + for (j = 0; j < ins->dst_count; ++j) + materialize_ssas_to_temps_process_reg(program, &alloc, &ins->dst[j].reg); + + for (j = 0; j < ins->src_count; ++j) + materialize_ssas_to_temps_process_reg(program, &alloc, &ins->src[j].reg); + } + + program->temp_count = alloc.next_temp_idx; +done: + vkd3d_free(origin_blocks); + vkd3d_free(alloc.table); + + return VKD3D_OK; +} + +static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps_in_function( + struct vsir_program *program, struct vkd3d_shader_message_context *message_context, + size_t *pos) +{ + enum vkd3d_result ret; + struct vsir_cfg cfg; + + if ((ret = vsir_cfg_init(&cfg, program, message_context, NULL, pos)) < 0) + return ret; + + vsir_cfg_compute_dominators(&cfg); + + ret = vsir_cfg_materialize_undominated_ssas_to_temps(&cfg); + + vsir_cfg_cleanup(&cfg); + + return ret; +} + +static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(struct vsir_program *program, + struct vkd3d_shader_message_context *message_context) +{ + enum vkd3d_result ret; + size_t i; + + for (i = 0; i < program->instructions.count;) + { + struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + + switch (ins->handler_idx) + { + case VKD3DSIH_LABEL: + assert(program->shader_version.type != VKD3D_SHADER_TYPE_HULL); + TRACE("Materializing undominated SSAs in a non-hull shader.\n"); + if ((ret = vsir_program_materialize_undominated_ssas_to_temps_in_function( + program, message_context, &i)) < 0) + return ret; + assert(i == program->instructions.count); + break; + + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_HS_JOIN_PHASE: + assert(program->shader_version.type == VKD3D_SHADER_TYPE_HULL); + TRACE("Materializing undominated SSAs in phase %u of a hull shader.\n", ins->handler_idx); + ++i; + if ((ret = vsir_program_materialize_undominated_ssas_to_temps_in_function( + program, message_context, &i)) < 0) + return ret; + break; + + default: + ++i; + break; + } + } + + return VKD3D_OK; } struct validation_context { - struct vkd3d_shader_parser *parser; + struct vkd3d_shader_message_context *message_context; const struct vsir_program *program; size_t instruction_idx; + struct vkd3d_shader_location null_location; bool invalid_instruction_idx; + enum vkd3d_result status; bool dcl_temps_found; enum vkd3d_shader_opcode phase; enum cf_type @@ -3452,16 +5306,21 @@ static void VKD3D_PRINTF_FUNC(3, 4) validator_error(struct validation_context *c if (ctx->invalid_instruction_idx) { - vkd3d_shader_parser_error(ctx->parser, error, "%s", buf.buffer); + vkd3d_shader_error(ctx->message_context, &ctx->null_location, error, "%s", buf.buffer); ERR("VSIR validation error: %s\n", buf.buffer); } else { - vkd3d_shader_parser_error(ctx->parser, error, "instruction %zu: %s", ctx->instruction_idx + 1, buf.buffer); + const struct vkd3d_shader_instruction *ins = &ctx->program->instructions.elements[ctx->instruction_idx]; + vkd3d_shader_error(ctx->message_context, &ins->location, error, + "instruction %zu: %s", ctx->instruction_idx + 1, buf.buffer); ERR("VSIR validation error: instruction %zu: %s\n", ctx->instruction_idx + 1, buf.buffer); } vkd3d_string_buffer_cleanup(&buf); + + if (!ctx->status) + ctx->status = VKD3D_ERROR_INVALID_SHADER; } static void vsir_validate_src_param(struct validation_context *ctx, @@ -3515,10 +5374,10 @@ static void vsir_validate_register(struct validation_context *ctx, if (reg->idx[0].rel_addr) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, "Non-NULL relative address for a TEMP register."); - if (reg->idx[0].offset >= ctx->parser->program.temp_count) + if (reg->idx[0].offset >= ctx->program->temp_count) { validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, "TEMP register index %u exceeds the maximum count %u.", - reg->idx[0].offset, ctx->parser->program.temp_count); + reg->idx[0].offset, ctx->program->temp_count); break; } @@ -3606,7 +5465,7 @@ static void vsir_validate_register(struct validation_context *ctx, validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, "Invalid precision %#x for a LABEL register.", reg->precision); - if (reg->data_type != VKD3D_DATA_UINT) + if (reg->data_type != VKD3D_DATA_UNUSED) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, "Invalid data type %#x for a LABEL register.", reg->data_type); @@ -3708,7 +5567,7 @@ static void vsir_validate_dst_param(struct validation_context *ctx, switch (dst->reg.type) { case VKD3DSPR_SSA: - if (dst->reg.idx[0].offset < ctx->parser->program.ssa_count) + if (dst->reg.idx[0].offset < ctx->program->ssa_count) { struct validation_context_ssa_data *data = &ctx->ssas[dst->reg.idx[0].offset]; @@ -3761,7 +5620,7 @@ static void vsir_validate_src_param(struct validation_context *ctx, switch (src->reg.type) { case VKD3DSPR_SSA: - if (src->reg.idx[0].offset < ctx->parser->program.ssa_count) + if (src->reg.idx[0].offset < ctx->program->ssa_count) { struct validation_context_ssa_data *data = &ctx->ssas[src->reg.idx[0].offset]; unsigned int i; @@ -3852,7 +5711,6 @@ static void vsir_validate_instruction(struct validation_context *ctx) size_t i; instruction = &ctx->program->instructions.elements[ctx->instruction_idx]; - ctx->parser->location = instruction->location; for (i = 0; i < instruction->dst_count; ++i) vsir_validate_dst_param(ctx, &instruction->dst[i]); @@ -3884,11 +5742,74 @@ static void vsir_validate_instruction(struct validation_context *ctx) ctx->dcl_temps_found = false; return; + case VKD3DSIH_DCL_HS_MAX_TESSFACTOR: + /* Exclude non-finite values. */ + if (!(instruction->declaration.max_tessellation_factor >= 1.0f + && instruction->declaration.max_tessellation_factor <= 64.0f)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, "Max tessellation factor %f is invalid.", + instruction->declaration.max_tessellation_factor); + return; + + case VKD3DSIH_DCL_INPUT_PRIMITIVE: + if (instruction->declaration.primitive_type.type == VKD3D_PT_UNDEFINED + || instruction->declaration.primitive_type.type >= VKD3D_PT_COUNT) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS input primitive %u is invalid.", + instruction->declaration.primitive_type.type); + return; + + case VKD3DSIH_DCL_VERTICES_OUT: + if (instruction->declaration.count > 1024) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS output vertex count %u is invalid.", + instruction->declaration.count); + return; + + case VKD3DSIH_DCL_OUTPUT_TOPOLOGY: + if (instruction->declaration.primitive_type.type == VKD3D_PT_UNDEFINED + || instruction->declaration.primitive_type.type >= VKD3D_PT_COUNT) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS output primitive %u is invalid.", + instruction->declaration.primitive_type.type); + return; + + case VKD3DSIH_DCL_GS_INSTANCES: + if (!instruction->declaration.count || instruction->declaration.count > 32) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS instance count %u is invalid.", + instruction->declaration.count); + return; + + case VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT: + if (!instruction->declaration.count || instruction->declaration.count > 32) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, "Output control point count %u is invalid.", + instruction->declaration.count); + return; + + case VKD3DSIH_DCL_TESSELLATOR_DOMAIN: + if (instruction->declaration.tessellator_domain == VKD3D_TESSELLATOR_DOMAIN_INVALID + || instruction->declaration.tessellator_domain >= VKD3D_TESSELLATOR_DOMAIN_COUNT) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, + "Tessellator domain %#x is invalid.", instruction->declaration.tessellator_domain); + return; + + case VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE: + if (!instruction->declaration.tessellator_output_primitive + || instruction->declaration.tessellator_output_primitive > VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CCW) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, + "Tessellator output primitive %#x is invalid.", instruction->declaration.tessellator_output_primitive); + return; + + case VKD3DSIH_DCL_TESSELLATOR_PARTITIONING: + if (!instruction->declaration.tessellator_partitioning + || instruction->declaration.tessellator_partitioning > VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, + "Tessellator partitioning %#x is invalid.", instruction->declaration.tessellator_partitioning); + return; + default: break; } - if (version->type == VKD3D_SHADER_TYPE_HULL && ctx->phase == VKD3DSIH_INVALID) + /* Only DCL instructions may occur outside hull shader phases. */ + if (!vsir_instruction_is_dcl(instruction) && version->type == VKD3D_SHADER_TYPE_HULL + && ctx->phase == VKD3DSIH_INVALID) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, "Instruction %#x appear before any phase instruction in a hull shader.", instruction->handler_idx); @@ -4180,7 +6101,8 @@ static void vsir_validate_instruction(struct validation_context *ctx) unsigned int value_idx = 2 * i; unsigned int label_idx = 2 * i + 1; - if (!register_is_constant(&instruction->src[value_idx].reg) && !register_is_ssa(&instruction->src[value_idx].reg)) + if (!register_is_constant_or_undef(&instruction->src[value_idx].reg) + && !register_is_ssa(&instruction->src[value_idx].reg)) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, "Invalid value register for incoming %zu of type %#x in PHI instruction, " "expected SSA, IMMCONST or IMMCONST64.", i, instruction->src[value_idx].reg.type); @@ -4203,17 +6125,20 @@ static void vsir_validate_instruction(struct validation_context *ctx) } } -enum vkd3d_result vsir_validate(struct vkd3d_shader_parser *parser) +enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t config_flags, + const char *source_name, struct vkd3d_shader_message_context *message_context) { struct validation_context ctx = { - .parser = parser, - .program = &parser->program, + .message_context = message_context, + .program = program, + .null_location = {.source_name = source_name}, + .status = VKD3D_OK, .phase = VKD3DSIH_INVALID, }; unsigned int i; - if (!(parser->config_flags & VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION)) + if (!(config_flags & VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION)) return VKD3D_OK; if (!(ctx.temps = vkd3d_calloc(ctx.program->temp_count, sizeof(*ctx.temps)))) @@ -4222,7 +6147,7 @@ enum vkd3d_result vsir_validate(struct vkd3d_shader_parser *parser) if (!(ctx.ssas = vkd3d_calloc(ctx.program->ssa_count, sizeof(*ctx.ssas)))) goto fail; - for (ctx.instruction_idx = 0; ctx.instruction_idx < parser->program.instructions.count; ++ctx.instruction_idx) + for (ctx.instruction_idx = 0; ctx.instruction_idx < program->instructions.count; ++ctx.instruction_idx) vsir_validate_instruction(&ctx); ctx.invalid_instruction_idx = true; @@ -4247,7 +6172,7 @@ enum vkd3d_result vsir_validate(struct vkd3d_shader_parser *parser) vkd3d_free(ctx.temps); vkd3d_free(ctx.ssas); - return VKD3D_OK; + return ctx.status; fail: vkd3d_free(ctx.blocks); @@ -4256,3 +6181,72 @@ fail: return VKD3D_ERROR_OUT_OF_MEMORY; } + +enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) +{ + enum vkd3d_result result = VKD3D_OK; + + if ((result = vsir_program_lower_instructions(program, message_context)) < 0) + return result; + + if (program->shader_version.major >= 6) + { + if ((result = vsir_program_materialise_phi_ssas_to_temps(program)) < 0) + return result; + + if ((result = lower_switch_to_if_ladder(program)) < 0) + return result; + + if ((result = vsir_program_structurize(program, message_context)) < 0) + return result; + + if ((result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0) + return result; + + if ((result = vsir_program_materialize_undominated_ssas_to_temps(program, message_context)) < 0) + return result; + } + else + { + if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) + { + if ((result = vsir_program_remap_output_signature(program, compile_info, message_context)) < 0) + return result; + } + + if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL) + { + if ((result = instruction_array_flatten_hull_shader_phases(&program->instructions)) < 0) + return result; + + if ((result = instruction_array_normalise_hull_shader_control_point_io(&program->instructions, + &program->input_signature)) < 0) + return result; + } + + if ((result = vsir_program_normalise_io_registers(program)) < 0) + return result; + + if ((result = instruction_array_normalise_flat_constants(program)) < 0) + return result; + + remove_dead_code(program); + + if ((result = vsir_program_normalise_combined_samplers(program, message_context)) < 0) + return result; + + if (compile_info->target_type != VKD3D_SHADER_TARGET_GLSL + && (result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0) + return result; + } + + if (TRACE_ON()) + vkd3d_shader_trace(program); + + if ((result = vsir_program_validate(program, config_flags, + compile_info->source_name, message_context)) < 0) + return result; + + return result; +} diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.l b/libs/vkd3d/libs/vkd3d-shader/preproc.l index 6fb61eff6c3..be50d3b9020 100644 --- a/libs/vkd3d/libs/vkd3d-shader/preproc.l +++ b/libs/vkd3d/libs/vkd3d-shader/preproc.l @@ -20,7 +20,6 @@ %{ -#include "preproc.h" #include "preproc.tab.h" #undef ERROR /* defined in wingdi.h */ @@ -66,7 +65,7 @@ static void update_location(struct preproc_ctx *ctx); %s LINE NEWLINE \r?\n -WS [ \t] +WS [ \t\r] IDENTIFIER [A-Za-z_][A-Za-z0-9_]* INT_SUFFIX [uUlL]{0,2} diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c index 5c87ff15503..984a4f894f6 100644 --- a/libs/vkd3d/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c @@ -45,6 +45,8 @@ static spv_target_env spv_target_env_from_vkd3d(enum vkd3d_shader_spirv_environm return SPV_ENV_OPENGL_4_5; case VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0: return SPV_ENV_VULKAN_1_0; + case VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_1: + return SPV_ENV_VULKAN_1_1; default: ERR("Invalid environment %#x.\n", environment); return SPV_ENV_VULKAN_1_0; @@ -223,9 +225,10 @@ enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d } } -#define VKD3D_SPIRV_VERSION 0x00010000 +#define VKD3D_SPIRV_VERSION_1_0 0x00010000 +#define VKD3D_SPIRV_VERSION_1_3 0x00010300 #define VKD3D_SPIRV_GENERATOR_ID 18 -#define VKD3D_SPIRV_GENERATOR_VERSION 11 +#define VKD3D_SPIRV_GENERATOR_VERSION 12 #define VKD3D_SPIRV_GENERATOR_MAGIC vkd3d_make_u32(VKD3D_SPIRV_GENERATOR_VERSION, VKD3D_SPIRV_GENERATOR_ID) struct vkd3d_spirv_stream @@ -358,6 +361,7 @@ struct vkd3d_spirv_builder uint32_t type_sampler_id; uint32_t type_bool_id; uint32_t type_void_id; + uint32_t scope_subgroup_id; struct vkd3d_spirv_stream debug_stream; /* debug instructions */ struct vkd3d_spirv_stream annotation_stream; /* decoration instructions */ @@ -1524,6 +1528,19 @@ static uint32_t vkd3d_spirv_build_op_logical_equal(struct vkd3d_spirv_builder *b SpvOpLogicalEqual, result_type, operand0, operand1); } +static uint32_t vkd3d_spirv_build_op_logical_or(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t operand0, uint32_t operand1) +{ + return vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, + SpvOpLogicalOr, result_type, operand0, operand1); +} + +static uint32_t vkd3d_spirv_build_op_logical_not(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t operand) +{ + return vkd3d_spirv_build_op_tr1(builder, &builder->function_stream, SpvOpLogicalNot, result_type, operand); +} + static uint32_t vkd3d_spirv_build_op_convert_utof(struct vkd3d_spirv_builder *builder, uint32_t result_type, uint32_t unsigned_value) { @@ -1725,6 +1742,63 @@ static void vkd3d_spirv_build_op_memory_barrier(struct vkd3d_spirv_builder *buil SpvOpMemoryBarrier, memory_id, memory_semantics_id); } +static uint32_t vkd3d_spirv_build_op_scope_subgroup(struct vkd3d_spirv_builder *builder) +{ + return vkd3d_spirv_get_op_constant(builder, vkd3d_spirv_get_op_type_int(builder, 32, 0), SpvScopeSubgroup); +} + +static uint32_t vkd3d_spirv_get_op_scope_subgroup(struct vkd3d_spirv_builder *builder) +{ + return vkd3d_spirv_build_once(builder, &builder->scope_subgroup_id, vkd3d_spirv_build_op_scope_subgroup); +} + +static uint32_t vkd3d_spirv_build_op_group_nonuniform_ballot(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t val_id) +{ + vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformBallot); + return vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, SpvOpGroupNonUniformBallot, + result_type, vkd3d_spirv_get_op_scope_subgroup(builder), val_id); +} + +static uint32_t vkd3d_spirv_build_op_group_nonuniform_ballot_bit_count(struct vkd3d_spirv_builder *builder, + uint32_t result_type, SpvGroupOperation group_op, uint32_t val_id) +{ + vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformBallot); + return vkd3d_spirv_build_op_tr3(builder, &builder->function_stream, SpvOpGroupNonUniformBallotBitCount, + result_type, vkd3d_spirv_get_op_scope_subgroup(builder), group_op, val_id); +} + +static uint32_t vkd3d_spirv_build_op_group_nonuniform_elect(struct vkd3d_spirv_builder *builder) +{ + vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniform); + return vkd3d_spirv_build_op_tr1(builder, &builder->function_stream, SpvOpGroupNonUniformElect, + vkd3d_spirv_get_op_type_bool(builder), vkd3d_spirv_get_op_scope_subgroup(builder)); +} + +static uint32_t vkd3d_spirv_build_op_group_nonuniform_broadcast(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t val_id, uint32_t lane_id) +{ + vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformBallot); + return vkd3d_spirv_build_op_tr3(builder, &builder->function_stream, SpvOpGroupNonUniformBroadcast, result_type, + vkd3d_spirv_get_op_scope_subgroup(builder), val_id, lane_id); +} + +static uint32_t vkd3d_spirv_build_op_group_nonuniform_shuffle(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t val_id, uint32_t lane_id) +{ + vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformShuffle); + return vkd3d_spirv_build_op_tr3(builder, &builder->function_stream, SpvOpGroupNonUniformShuffle, result_type, + vkd3d_spirv_get_op_scope_subgroup(builder), val_id, lane_id); +} + +static uint32_t vkd3d_spirv_build_op_group_nonuniform_broadcast_first(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t val_id) +{ + vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformBallot); + return vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, SpvOpGroupNonUniformBroadcastFirst, + result_type, vkd3d_spirv_get_op_scope_subgroup(builder), val_id); +} + static uint32_t vkd3d_spirv_build_op_glsl_std450_tr1(struct vkd3d_spirv_builder *builder, enum GLSLstd450 op, uint32_t result_type, uint32_t operand) { @@ -1825,6 +1899,7 @@ static uint32_t vkd3d_spirv_get_type_id_for_data_type(struct vkd3d_spirv_builder { switch (data_type) { + case VKD3D_DATA_HALF: /* Minimum precision. TODO: native 16-bit */ case VKD3D_DATA_FLOAT: case VKD3D_DATA_SNORM: case VKD3D_DATA_UNORM: @@ -1832,6 +1907,7 @@ static uint32_t vkd3d_spirv_get_type_id_for_data_type(struct vkd3d_spirv_builder break; case VKD3D_DATA_INT: case VKD3D_DATA_UINT: + case VKD3D_DATA_UINT16: /* Minimum precision. TODO: native 16-bit */ return vkd3d_spirv_get_op_type_int(builder, 32, data_type == VKD3D_DATA_INT); break; case VKD3D_DATA_DOUBLE: @@ -1900,7 +1976,7 @@ static void vkd3d_spirv_builder_free(struct vkd3d_spirv_builder *builder) } static bool vkd3d_spirv_compile_module(struct vkd3d_spirv_builder *builder, - struct vkd3d_shader_code *spirv, const char *entry_point) + struct vkd3d_shader_code *spirv, const char *entry_point, enum vkd3d_shader_spirv_environment environment) { uint64_t capability_mask = builder->capability_mask; struct vkd3d_spirv_stream stream; @@ -1911,7 +1987,8 @@ static bool vkd3d_spirv_compile_module(struct vkd3d_spirv_builder *builder, vkd3d_spirv_stream_init(&stream); vkd3d_spirv_build_word(&stream, SpvMagicNumber); - vkd3d_spirv_build_word(&stream, VKD3D_SPIRV_VERSION); + vkd3d_spirv_build_word(&stream, (environment == VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_1) + ? VKD3D_SPIRV_VERSION_1_3 : VKD3D_SPIRV_VERSION_1_0); vkd3d_spirv_build_word(&stream, VKD3D_SPIRV_GENERATOR_MAGIC); vkd3d_spirv_build_word(&stream, builder->current_id); /* bound */ vkd3d_spirv_build_word(&stream, 0); /* schema, reserved */ @@ -1940,6 +2017,9 @@ static bool vkd3d_spirv_compile_module(struct vkd3d_spirv_builder *builder, || vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityStorageImageArrayDynamicIndexing) || vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityShaderNonUniformEXT)) vkd3d_spirv_build_op_extension(&stream, "SPV_EXT_descriptor_indexing"); + if (vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityFragmentShaderPixelInterlockEXT) + || vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityFragmentShaderSampleInterlockEXT)) + vkd3d_spirv_build_op_extension(&stream, "SPV_EXT_fragment_shader_interlock"); if (vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityStencilExportEXT)) vkd3d_spirv_build_op_extension(&stream, "SPV_EXT_shader_stencil_export"); if (vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityShaderViewportIndexLayerEXT)) @@ -2346,6 +2426,7 @@ struct spirv_compiler unsigned int output_control_point_count; bool use_vocp; + bool use_invocation_interlock; bool emit_point_size; enum vkd3d_shader_opcode phase; @@ -2427,14 +2508,13 @@ static void spirv_compiler_destroy(struct spirv_compiler *compiler) vkd3d_free(compiler); } -static struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version *shader_version, - struct vkd3d_shader_desc *shader_desc, const struct vkd3d_shader_compile_info *compile_info, +static struct spirv_compiler *spirv_compiler_create(const struct vsir_program *program, + const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, - struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location, - uint64_t config_flags) + struct vkd3d_shader_message_context *message_context, uint64_t config_flags) { - const struct shader_signature *patch_constant_signature = &shader_desc->patch_constant_signature; - const struct shader_signature *output_signature = &shader_desc->output_signature; + const struct shader_signature *patch_constant_signature = &program->patch_constant_signature; + const struct shader_signature *output_signature = &program->output_signature; const struct vkd3d_shader_interface_info *shader_interface; const struct vkd3d_shader_descriptor_offset_info *offset_info; const struct vkd3d_shader_spirv_target_info *target_info; @@ -2447,7 +2527,7 @@ static struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_ve memset(compiler, 0, sizeof(*compiler)); compiler->message_context = message_context; - compiler->location = *location; + compiler->location.source_name = compile_info->source_name; compiler->config_flags = config_flags; if ((target_info = vkd3d_find_struct(compile_info->next, SPIRV_TARGET_INFO))) @@ -2456,6 +2536,7 @@ static struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_ve { case VKD3D_SHADER_SPIRV_ENVIRONMENT_OPENGL_4_5: case VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0: + case VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_1: break; default: WARN("Invalid target environment %#x.\n", target_info->environment); @@ -2545,7 +2626,7 @@ static struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_ve rb_init(&compiler->symbol_table, vkd3d_symbol_compare); - compiler->shader_type = shader_version->type; + compiler->shader_type = program->shader_version.type; if ((shader_interface = vkd3d_find_struct(compile_info->next, INTERFACE_INFO))) { @@ -2608,6 +2689,11 @@ static bool spirv_compiler_is_opengl_target(const struct spirv_compiler *compile return spirv_compiler_get_target_environment(compiler) == VKD3D_SHADER_SPIRV_ENVIRONMENT_OPENGL_4_5; } +static bool spirv_compiler_is_spirv_min_1_3_target(const struct spirv_compiler *compiler) +{ + return spirv_compiler_get_target_environment(compiler) == VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_1; +} + static bool spirv_compiler_is_target_extension_supported(const struct spirv_compiler *compiler, enum vkd3d_shader_spirv_extension extension) { @@ -3126,6 +3212,12 @@ static bool spirv_compiler_get_register_name(char *buffer, unsigned int buffer_s case VKD3DSPR_OUTSTENCILREF: snprintf(buffer, buffer_size, "oStencilRef"); break; + case VKD3DSPR_WAVELANECOUNT: + snprintf(buffer, buffer_size, "vWaveLaneCount"); + break; + case VKD3DSPR_WAVELANEINDEX: + snprintf(buffer, buffer_size, "vWaveLaneIndex"); + break; default: FIXME("Unhandled register %#x.\n", reg->type); snprintf(buffer, buffer_size, "unrecognized_%#x", reg->type); @@ -3372,7 +3464,7 @@ struct vkd3d_shader_register_info bool is_aggregate; }; -static bool spirv_compiler_get_register_info(const struct spirv_compiler *compiler, +static bool spirv_compiler_get_register_info(struct spirv_compiler *compiler, const struct vkd3d_shader_register *reg, struct vkd3d_shader_register_info *register_info) { struct vkd3d_symbol reg_symbol, *symbol; @@ -3398,7 +3490,8 @@ static bool spirv_compiler_get_register_info(const struct spirv_compiler *compil vkd3d_symbol_make_register(®_symbol, reg); if (!(entry = rb_get(&compiler->symbol_table, ®_symbol))) { - FIXME("Unrecognized register (%s).\n", debug_vkd3d_symbol(®_symbol)); + spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_INVALID_REGISTER_TYPE, + "Unrecognized register (%s).\n", debug_vkd3d_symbol(®_symbol)); memset(register_info, 0, sizeof(*register_info)); return false; } @@ -3548,8 +3641,9 @@ static void spirv_compiler_emit_dereference_register(struct spirv_compiler *comp indexes[index_count++] = spirv_compiler_emit_register_addressing(compiler, ®->idx[0]); } + /* Alignment is supported only in the Kernel execution model and is an optimisation only. */ if (reg->alignment) - WARN("Ignoring alignment %u.\n", reg->alignment); + TRACE("Ignoring alignment %u.\n", reg->alignment); if (index_count) { @@ -3736,6 +3830,70 @@ static uint32_t spirv_compiler_emit_bool_to_double(struct spirv_compiler *compil return vkd3d_spirv_build_op_select(builder, type_id, val_id, true_id, false_id); } +/* Based on the implementation in the OpenGL Mathematics library. */ +static uint32_t half_to_float(uint16_t value) +{ + uint32_t s = (value & 0x8000u) << 16; + uint32_t e = (value >> 10) & 0x1fu; + uint32_t m = value & 0x3ffu; + + if (!e) + { + if (!m) + { + /* Plus or minus zero */ + return s; + } + else + { + /* Denormalized number -- renormalize it */ + + while (!(m & 0x400u)) + { + m <<= 1; + --e; + } + + ++e; + m &= ~0x400u; + } + } + else if (e == 31u) + { + /* Positive or negative infinity for zero 'm'. + * Nan for non-zero 'm' -- preserve sign and significand bits */ + return s | 0x7f800000u | (m << 13); + } + + /* Normalized number */ + e += 127u - 15u; + m <<= 13; + + /* Assemble s, e and m. */ + return s | (e << 23) | m; +} + +static uint32_t convert_raw_constant32(enum vkd3d_data_type data_type, unsigned int uint_value) +{ + int16_t i; + + /* TODO: native 16-bit support. */ + if (data_type != VKD3D_DATA_UINT16 && data_type != VKD3D_DATA_HALF) + return uint_value; + + if (data_type == VKD3D_DATA_HALF) + return half_to_float(uint_value); + + /* Values in DXIL have no signedness, so it is ambiguous whether 16-bit constants should or + * should not be sign-extended when 16-bit execution is not supported. The AMD RX 580 Windows + * driver has no 16-bit support, and sign-extends all 16-bit constant ints to 32 bits. These + * results differ from SM 5. The RX 6750 XT supports 16-bit execution, so constants are not + * extended, and results match SM 5. It seems best to replicate the sign-extension, and if + * execution is 16-bit, the values will be truncated. */ + i = uint_value; + return (int32_t)i; +} + static uint32_t spirv_compiler_emit_load_constant(struct spirv_compiler *compiler, const struct vkd3d_shader_register *reg, uint32_t swizzle, uint32_t write_mask) { @@ -3748,14 +3906,15 @@ static uint32_t spirv_compiler_emit_load_constant(struct spirv_compiler *compile if (reg->dimension == VSIR_DIMENSION_SCALAR) { for (i = 0; i < component_count; ++i) - values[i] = *reg->u.immconst_u32; + values[i] = convert_raw_constant32(reg->data_type, reg->u.immconst_u32[0]); } else { for (i = 0, j = 0; i < VKD3D_VEC4_SIZE; ++i) { if (write_mask & (VKD3DSP_WRITEMASK_0 << i)) - values[j++] = reg->u.immconst_u32[vsir_swizzle_get_component(swizzle, i)]; + values[j++] = convert_raw_constant32(reg->data_type, + reg->u.immconst_u32[vsir_swizzle_get_component(swizzle, i)]); } } @@ -3899,6 +4058,13 @@ static uint32_t spirv_compiler_emit_constant_array(struct spirv_compiler *compil switch (icb->data_type) { + case VKD3D_DATA_HALF: + case VKD3D_DATA_UINT16: + /* Scalar only. */ + for (i = 0; i < element_count; ++i) + elements[i] = vkd3d_spirv_get_op_constant(builder, elem_type_id, + convert_raw_constant32(icb->data_type, icb->data[i])); + break; case VKD3D_DATA_FLOAT: case VKD3D_DATA_INT: case VKD3D_DATA_UINT: @@ -3998,7 +4164,7 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, struct vkd3d_shader_register_info reg_info; unsigned int component_count; uint32_t type_id, val_id; - uint32_t write_mask32; + uint32_t val_write_mask; if (reg->type == VKD3DSPR_IMMCONST) return spirv_compiler_emit_load_constant(compiler, reg, swizzle, write_mask); @@ -4018,17 +4184,17 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); return vkd3d_spirv_get_op_undef(builder, type_id); } - assert(reg_info.component_type != VKD3D_SHADER_COMPONENT_DOUBLE); spirv_compiler_emit_dereference_register(compiler, reg, ®_info); - write_mask32 = data_type_is_64_bit(reg->data_type) ? vsir_write_mask_32_from_64(write_mask) : write_mask; + val_write_mask = (data_type_is_64_bit(reg->data_type) && !component_type_is_64_bit(reg_info.component_type)) + ? vsir_write_mask_32_from_64(write_mask) : write_mask; /* Intermediate value (no storage class). */ if (reg_info.storage_class == SpvStorageClassMax) { val_id = reg_info.id; } - else if (vsir_write_mask_component_count(write_mask32) == 1) + else if (vsir_write_mask_component_count(val_write_mask) == 1) { return spirv_compiler_emit_load_scalar(compiler, reg, swizzle, write_mask, ®_info); } @@ -4041,7 +4207,7 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, swizzle = data_type_is_64_bit(reg->data_type) ? vsir_swizzle_32_from_64(swizzle) : swizzle; val_id = spirv_compiler_emit_swizzle(compiler, - val_id, reg_info.write_mask, reg_info.component_type, swizzle, write_mask32); + val_id, reg_info.write_mask, reg_info.component_type, swizzle, val_write_mask); if (component_type != reg_info.component_type) { @@ -4087,7 +4253,7 @@ static uint32_t spirv_compiler_emit_abs(struct spirv_compiler *compiler, uint32_t type_id; type_id = spirv_compiler_get_type_id_for_reg(compiler, reg, write_mask); - if (reg->data_type == VKD3D_DATA_FLOAT || reg->data_type == VKD3D_DATA_DOUBLE) + if (data_type_is_floating_point(reg->data_type)) return vkd3d_spirv_build_op_glsl_std450_fabs(builder, type_id, val_id); FIXME("Unhandled data type %#x.\n", reg->data_type); @@ -4101,7 +4267,7 @@ static uint32_t spirv_compiler_emit_neg(struct spirv_compiler *compiler, uint32_t type_id; type_id = spirv_compiler_get_type_id_for_reg(compiler, reg, write_mask); - if (reg->data_type == VKD3D_DATA_FLOAT || reg->data_type == VKD3D_DATA_DOUBLE) + if (data_type_is_floating_point(reg->data_type)) return vkd3d_spirv_build_op_fnegate(builder, type_id, val_id); else if (data_type_is_integer(reg->data_type)) return vkd3d_spirv_build_op_snegate(builder, type_id, val_id); @@ -4285,7 +4451,7 @@ static uint32_t spirv_compiler_emit_sat(struct spirv_compiler *compiler, } type_id = spirv_compiler_get_type_id_for_reg(compiler, reg, write_mask); - if (reg->data_type == VKD3D_DATA_FLOAT || reg->data_type == VKD3D_DATA_DOUBLE) + if (data_type_is_floating_point(reg->data_type)) return vkd3d_spirv_build_op_glsl_std450_nclamp(builder, type_id, val_id, zero_id, one_id); FIXME("Unhandled data type %#x.\n", reg->data_type); @@ -4322,11 +4488,11 @@ static void spirv_compiler_emit_store_dst_components(struct spirv_compiler *comp { unsigned int component_count = vsir_write_mask_component_count(dst->write_mask); struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - uint32_t type_id, val_id; + uint32_t type_id, dst_type_id, val_id; + type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); if (component_count > 1) { - type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); val_id = vkd3d_spirv_build_op_composite_construct(builder, type_id, component_ids, component_count); } @@ -4334,6 +4500,11 @@ static void spirv_compiler_emit_store_dst_components(struct spirv_compiler *comp { val_id = *component_ids; } + + dst_type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, component_count); + if (dst_type_id != type_id) + val_id = vkd3d_spirv_build_op_bitcast(builder, dst_type_id, val_id); + spirv_compiler_emit_store_dst(compiler, dst, val_id); } @@ -4433,6 +4604,10 @@ static void spirv_compiler_decorate_builtin(struct spirv_compiler *compiler, case SpvBuiltInCullDistance: vkd3d_spirv_enable_capability(builder, SpvCapabilityCullDistance); break; + case SpvBuiltInSubgroupSize: + case SpvBuiltInSubgroupLocalInvocationId: + vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniform); + break; default: break; } @@ -4622,6 +4797,9 @@ vkd3d_register_builtins[] = {VKD3DSPR_DEPTHOUTLE, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInFragDepth}}, {VKD3DSPR_OUTSTENCILREF, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInFragStencilRefEXT}}, + + {VKD3DSPR_WAVELANECOUNT, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSubgroupSize}}, + {VKD3DSPR_WAVELANEINDEX, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSubgroupLocalInvocationId}}, }; static void spirv_compiler_emit_register_execution_mode(struct spirv_compiler *compiler, @@ -5670,9 +5848,26 @@ static void spirv_compiler_emit_dcl_global_flags(struct spirv_compiler *compiler flags &= ~VKD3DSGF_ENABLE_INT64; } + if (flags & VKD3DSGF_ENABLE_WAVE_INTRINSICS) + { + if (!(compiler->features & VKD3D_SHADER_COMPILE_OPTION_FEATURE_WAVE_OPS)) + { + WARN("Unsupported wave ops.\n"); + spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_UNSUPPORTED_FEATURE, + "The target environment does not support wave ops."); + } + else if (!spirv_compiler_is_spirv_min_1_3_target(compiler)) + { + WARN("Wave ops enabled but environment does not support SPIR-V 1.3 or greater.\n"); + spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_UNSUPPORTED_FEATURE, + "The target environment uses wave ops but does not support SPIR-V 1.3 or greater."); + } + flags &= ~VKD3DSGF_ENABLE_WAVE_INTRINSICS; + } + if (flags & ~(VKD3DSGF_REFACTORING_ALLOWED | VKD3DSGF_ENABLE_RAW_AND_STRUCTURED_BUFFERS)) FIXME("Unhandled global flags %#"PRIx64".\n", (uint64_t)flags); - else + else if (flags) WARN("Unhandled global flags %#"PRIx64".\n", (uint64_t)flags); } @@ -5734,8 +5929,9 @@ static void spirv_compiler_emit_dcl_indexable_temp(struct spirv_compiler *compil vsir_register_init(®, VKD3DSPR_IDXTEMP, VKD3D_DATA_FLOAT, 1); reg.idx[0].offset = temp->register_idx; + /* Alignment is supported only in the Kernel execution model and is an optimisation only. */ if (temp->alignment) - WARN("Ignoring alignment %u.\n", temp->alignment); + TRACE("Ignoring alignment %u.\n", temp->alignment); function_location = spirv_compiler_get_current_function_location(compiler); vkd3d_spirv_begin_function_stream_insertion(builder, function_location); @@ -6272,9 +6468,24 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp if (!(d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ)) vkd3d_spirv_build_op_decorate(builder, var_id, SpvDecorationNonReadable, NULL, 0); - if (d->uav_flags & VKD3DSUF_GLOBALLY_COHERENT) + /* ROVs are implicitly globally coherent. */ + if (d->uav_flags & (VKD3DSUF_GLOBALLY_COHERENT | VKD3DSUF_RASTERISER_ORDERED_VIEW)) vkd3d_spirv_build_op_decorate(builder, var_id, SpvDecorationCoherent, NULL, 0); + if (d->uav_flags & VKD3DSUF_RASTERISER_ORDERED_VIEW) + { + if (compiler->shader_type != VKD3D_SHADER_TYPE_PIXEL) + spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_UNSUPPORTED_FEATURE, + "Rasteriser-ordered views are only supported in fragment shaders."); + else if (!spirv_compiler_is_target_extension_supported(compiler, + VKD3D_SHADER_SPIRV_EXTENSION_EXT_FRAGMENT_SHADER_INTERLOCK)) + spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_UNSUPPORTED_FEATURE, + "Cannot enable fragment shader interlock. " + "The target environment does not support fragment shader interlock."); + else + compiler->use_invocation_interlock = true; + } + if (d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_COUNTER) { assert(structure_stride); /* counters are valid only for structured buffers */ @@ -6324,20 +6535,26 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp } static void spirv_compiler_emit_workgroup_memory(struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg, unsigned int size, unsigned int structure_stride) + const struct vkd3d_shader_register *reg, unsigned int alignment, unsigned int size, + unsigned int structure_stride, bool zero_init) { - uint32_t type_id, array_type_id, length_id, pointer_type_id, var_id; + uint32_t type_id, array_type_id, length_id, pointer_type_id, var_id, init_id; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; const SpvStorageClass storage_class = SpvStorageClassWorkgroup; struct vkd3d_symbol reg_symbol; + /* Alignment is supported only in the Kernel execution model. */ + if (alignment) + TRACE("Ignoring alignment %u.\n", alignment); + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); length_id = spirv_compiler_get_constant_uint(compiler, size); array_type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id); pointer_type_id = vkd3d_spirv_get_op_type_pointer(builder, storage_class, array_type_id); + init_id = zero_init ? vkd3d_spirv_get_op_constant_null(builder, array_type_id) : 0; var_id = vkd3d_spirv_build_op_variable(builder, &builder->global_stream, - pointer_type_id, storage_class, 0); + pointer_type_id, storage_class, init_id); spirv_compiler_emit_register_debug_name(builder, var_id, reg); @@ -6352,8 +6569,8 @@ static void spirv_compiler_emit_dcl_tgsm_raw(struct spirv_compiler *compiler, const struct vkd3d_shader_instruction *instruction) { const struct vkd3d_shader_tgsm_raw *tgsm_raw = &instruction->declaration.tgsm_raw; - spirv_compiler_emit_workgroup_memory(compiler, &tgsm_raw->reg.reg, - tgsm_raw->byte_count / 4, 0); + spirv_compiler_emit_workgroup_memory(compiler, &tgsm_raw->reg.reg, tgsm_raw->alignment, + tgsm_raw->byte_count / 4, 0, tgsm_raw->zero_init); } static void spirv_compiler_emit_dcl_tgsm_structured(struct spirv_compiler *compiler, @@ -6361,8 +6578,8 @@ static void spirv_compiler_emit_dcl_tgsm_structured(struct spirv_compiler *compi { const struct vkd3d_shader_tgsm_structured *tgsm_structured = &instruction->declaration.tgsm_structured; unsigned int stride = tgsm_structured->byte_stride / 4; - spirv_compiler_emit_workgroup_memory(compiler, &tgsm_structured->reg.reg, - tgsm_structured->structure_count * stride, stride); + spirv_compiler_emit_workgroup_memory(compiler, &tgsm_structured->reg.reg, tgsm_structured->alignment, + tgsm_structured->structure_count * stride, stride, tgsm_structured->zero_init); } static void spirv_compiler_emit_dcl_input(struct spirv_compiler *compiler, @@ -6871,7 +7088,7 @@ static void spirv_compiler_emit_bool_cast(struct spirv_compiler *compiler, assert(src->reg.data_type == VKD3D_DATA_BOOL && dst->reg.data_type != VKD3D_DATA_BOOL); val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); - if (dst->reg.data_type == VKD3D_DATA_FLOAT) + if (dst->reg.data_type == VKD3D_DATA_HALF || dst->reg.data_type == VKD3D_DATA_FLOAT) { val_id = spirv_compiler_emit_bool_to_float(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOF); } @@ -6880,7 +7097,7 @@ static void spirv_compiler_emit_bool_cast(struct spirv_compiler *compiler, /* ITOD is not supported. Frontends which emit bool casts must use ITOF for double. */ val_id = spirv_compiler_emit_bool_to_double(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOF); } - else if (dst->reg.data_type == VKD3D_DATA_UINT) + else if (dst->reg.data_type == VKD3D_DATA_UINT16 || dst->reg.data_type == VKD3D_DATA_UINT) { val_id = spirv_compiler_emit_bool_to_int(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOI); } @@ -6909,6 +7126,15 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil SpvOp op = SpvOpMax; unsigned int i; + if (src->reg.data_type == VKD3D_DATA_UINT64 && instruction->handler_idx == VKD3DSIH_COUNTBITS) + { + /* At least some drivers support this anyway, but if validation is enabled it will fail. */ + FIXME("Unsupported 64-bit source for bit count.\n"); + spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, + "64-bit source for bit count is not supported."); + return VKD3D_ERROR_INVALID_SHADER; + } + if (src->reg.data_type == VKD3D_DATA_BOOL) { if (dst->reg.data_type == VKD3D_DATA_BOOL) @@ -6997,6 +7223,7 @@ static enum GLSLstd450 spirv_compiler_map_ext_glsl_instruction( } glsl_insts[] = { + {VKD3DSIH_ABS, GLSLstd450FAbs}, {VKD3DSIH_ACOS, GLSLstd450Acos}, {VKD3DSIH_ASIN, GLSLstd450Asin}, {VKD3DSIH_ATAN, GLSLstd450Atan}, @@ -7049,6 +7276,16 @@ static void spirv_compiler_emit_ext_glsl_instruction(struct spirv_compiler *comp unsigned int i, component_count; enum GLSLstd450 glsl_inst; + if (src[0].reg.data_type == VKD3D_DATA_UINT64 && (instruction->handler_idx == VKD3DSIH_FIRSTBIT_HI + || instruction->handler_idx == VKD3DSIH_FIRSTBIT_LO || instruction->handler_idx == VKD3DSIH_FIRSTBIT_SHI)) + { + /* At least some drivers support this anyway, but if validation is enabled it will fail. */ + FIXME("Unsupported 64-bit source for handler %#x.\n", instruction->handler_idx); + spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, + "64-bit source for handler %#x is not supported.", instruction->handler_idx); + return; + } + glsl_inst = spirv_compiler_map_ext_glsl_instruction(instruction); if (glsl_inst == GLSLstd450Bad) { @@ -7093,8 +7330,8 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler, struct vkd3d_shader_register_info dst_reg_info, src_reg_info; const struct vkd3d_shader_dst_param *dst = instruction->dst; const struct vkd3d_shader_src_param *src = instruction->src; + unsigned int i, component_count, write_mask; uint32_t components[VKD3D_VEC4_SIZE]; - unsigned int i, component_count; if (register_is_constant_or_undef(&src->reg) || src->reg.type == VKD3DSPR_SSA || dst->reg.type == VKD3DSPR_SSA || dst->modifiers || src->modifiers) @@ -7145,7 +7382,13 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler, } general_implementation: - val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); + write_mask = dst->write_mask; + if (src->reg.type == VKD3DSPR_IMMCONST64 && !data_type_is_64_bit(dst->reg.data_type)) + write_mask = vsir_write_mask_64_from_32(write_mask); + else if (!data_type_is_64_bit(src->reg.data_type) && data_type_is_64_bit(dst->reg.data_type)) + write_mask = vsir_write_mask_32_from_64(write_mask); + + val_id = spirv_compiler_emit_load_src(compiler, src, write_mask); if (dst->reg.data_type != src->reg.data_type) { val_id = vkd3d_spirv_build_op_bitcast(builder, vkd3d_spirv_get_type_id_for_data_type(builder, @@ -7171,8 +7414,15 @@ static void spirv_compiler_emit_movc(struct spirv_compiler *compiler, type_id = spirv_compiler_get_type_id_for_dst(compiler, dst); if (src[0].reg.data_type != VKD3D_DATA_BOOL) - condition_id = spirv_compiler_emit_int_to_bool(compiler, - VKD3D_SHADER_CONDITIONAL_OP_NZ, src[0].reg.data_type, component_count, condition_id); + { + if (instruction->handler_idx == VKD3DSIH_CMP) + condition_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, SpvOpFOrdGreaterThanEqual, + vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_BOOL, component_count), condition_id, + spirv_compiler_get_constant_float_vector(compiler, 0.0f, component_count)); + else + condition_id = spirv_compiler_emit_int_to_bool(compiler, + VKD3D_SHADER_CONDITIONAL_OP_NZ, src[0].reg.data_type, component_count, condition_id); + } val_id = vkd3d_spirv_build_op_select(builder, type_id, condition_id, src1_id, src2_id); spirv_compiler_emit_store_dst(compiler, dst, val_id); @@ -7335,7 +7585,7 @@ static void spirv_compiler_emit_imad(struct spirv_compiler *compiler, unsigned int i, component_count; component_count = vsir_write_mask_component_count(dst->write_mask); - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_INT, component_count); + type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, component_count); for (i = 0; i < ARRAY_SIZE(src_ids); ++i) src_ids[i] = spirv_compiler_emit_load_src(compiler, &src[i], dst->write_mask); @@ -7684,6 +7934,56 @@ static void spirv_compiler_emit_comparison_instruction(struct spirv_compiler *co spirv_compiler_emit_store_reg(compiler, &dst->reg, dst->write_mask, result_id); } +static void spirv_compiler_emit_orderedness_instruction(struct spirv_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + uint32_t type_id, src0_id, src1_id, val_id; + + type_id = spirv_compiler_get_type_id_for_dst(compiler, dst); + src0_id = spirv_compiler_emit_load_src(compiler, &src[0], dst->write_mask); + src1_id = spirv_compiler_emit_load_src(compiler, &src[1], dst->write_mask); + /* OpOrdered and OpUnordered are only available in Kernel mode. */ + src0_id = vkd3d_spirv_build_op_is_nan(builder, type_id, src0_id); + src1_id = vkd3d_spirv_build_op_is_nan(builder, type_id, src1_id); + val_id = vkd3d_spirv_build_op_logical_or(builder, type_id, src0_id, src1_id); + if (instruction->handler_idx == VKD3DSIH_ORD) + val_id = vkd3d_spirv_build_op_logical_not(builder, type_id, val_id); + spirv_compiler_emit_store_dst(compiler, dst, val_id); +} + +static void spirv_compiler_emit_float_comparison_instruction(struct spirv_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + uint32_t src0_id, src1_id, type_id, result_id; + unsigned int component_count; + SpvOp op; + + switch (instruction->handler_idx) + { + case VKD3DSIH_SLT: op = SpvOpFOrdLessThan; break; + case VKD3DSIH_SGE: op = SpvOpFOrdGreaterThanEqual; break; + default: + vkd3d_unreachable(); + } + + component_count = vsir_write_mask_component_count(dst->write_mask); + + src0_id = spirv_compiler_emit_load_src(compiler, &src[0], dst->write_mask); + src1_id = spirv_compiler_emit_load_src(compiler, &src[1], dst->write_mask); + + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_BOOL, component_count); + result_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, op, type_id, src0_id, src1_id); + + result_id = spirv_compiler_emit_bool_to_float(compiler, component_count, result_id, false); + spirv_compiler_emit_store_reg(compiler, &dst->reg, dst->write_mask, result_id); +} + static uint32_t spirv_compiler_emit_conditional_branch(struct spirv_compiler *compiler, const struct vkd3d_shader_instruction *instruction, uint32_t target_block_id) { @@ -7702,11 +8002,31 @@ static uint32_t spirv_compiler_emit_conditional_branch(struct spirv_compiler *co return merge_block_id; } +static void spirv_compiler_end_invocation_interlock(struct spirv_compiler *compiler) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + + if (vkd3d_spirv_capability_is_enabled(builder, SpvCapabilitySampleRateShading)) + { + spirv_compiler_emit_execution_mode(compiler, SpvExecutionModeSampleInterlockOrderedEXT, NULL, 0); + vkd3d_spirv_enable_capability(builder, SpvCapabilityFragmentShaderSampleInterlockEXT); + } + else + { + spirv_compiler_emit_execution_mode(compiler, SpvExecutionModePixelInterlockOrderedEXT, NULL, 0); + vkd3d_spirv_enable_capability(builder, SpvCapabilityFragmentShaderPixelInterlockEXT); + } + vkd3d_spirv_build_op(&builder->function_stream, SpvOpEndInvocationInterlockEXT); +} + static void spirv_compiler_emit_return(struct spirv_compiler *compiler, const struct vkd3d_shader_instruction *instruction) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + if (compiler->use_invocation_interlock) + spirv_compiler_end_invocation_interlock(compiler); + if (compiler->shader_type != VKD3D_SHADER_TYPE_GEOMETRY && (is_in_default_phase(compiler) || is_in_control_point_phase(compiler))) spirv_compiler_emit_shader_epilogue_invocation(compiler); @@ -7790,8 +8110,9 @@ static void spirv_compiler_emit_discard(struct spirv_compiler *compiler, * a mismatch between the VSIR structure and the SPIR-V one, which would cause problems if * structurisation is necessary. Therefore we emit it as a function call. */ condition_id = spirv_compiler_emit_load_src(compiler, src, VKD3DSP_WRITEMASK_0); - condition_id = spirv_compiler_emit_int_to_bool(compiler, - instruction->flags, src->reg.data_type, 1, condition_id); + if (src->reg.data_type != VKD3D_DATA_BOOL) + condition_id = spirv_compiler_emit_int_to_bool(compiler, + instruction->flags, src->reg.data_type, 1, condition_id); void_id = vkd3d_spirv_get_op_type_void(builder); vkd3d_spirv_build_op_function_call(builder, void_id, spirv_compiler_get_discard_function_id(compiler), &condition_id, 1); @@ -8570,7 +8891,6 @@ static void spirv_compiler_emit_ld_tgsm(struct spirv_compiler *compiler, ptr_id = vkd3d_spirv_build_op_access_chain1(builder, ptr_type_id, reg_info.id, coordinate_id); constituents[j++] = vkd3d_spirv_build_op_load(builder, type_id, ptr_id, SpvMemoryAccessMaskNone); } - assert(dst->reg.data_type == VKD3D_DATA_UINT); spirv_compiler_emit_store_dst_components(compiler, dst, VKD3D_SHADER_COMPONENT_UINT, constituents); } @@ -8678,8 +8998,8 @@ static void spirv_compiler_emit_store_tgsm(struct spirv_compiler *compiler, const struct vkd3d_shader_dst_param *dst = instruction->dst; const struct vkd3d_shader_src_param *src = instruction->src; uint32_t base_coordinate_id, component_idx; - const struct vkd3d_shader_src_param *data; struct vkd3d_shader_register_info reg_info; + struct vkd3d_shader_src_param data; unsigned int component_count; if (!spirv_compiler_get_register_info(compiler, &dst->reg, ®_info)) @@ -8691,9 +9011,9 @@ static void spirv_compiler_emit_store_tgsm(struct spirv_compiler *compiler, base_coordinate_id = spirv_compiler_emit_raw_structured_addressing(compiler, type_id, reg_info.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); - data = &src[instruction->src_count - 1]; - assert(data->reg.data_type == VKD3D_DATA_UINT); - val_id = spirv_compiler_emit_load_src(compiler, data, dst->write_mask); + data = src[instruction->src_count - 1]; + data.reg.data_type = VKD3D_DATA_UINT; + val_id = spirv_compiler_emit_load_src(compiler, &data, dst->write_mask); component_count = vsir_write_mask_component_count(dst->write_mask); for (component_idx = 0; component_idx < component_count; ++component_idx) @@ -8944,6 +9264,7 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil const struct vkd3d_shader_dst_param *resource; uint32_t coordinate_id, sample_id, pointer_id; struct vkd3d_shader_register_info reg_info; + SpvMemorySemanticsMask memory_semantic; struct vkd3d_shader_image image; unsigned int structure_stride; uint32_t coordinate_mask; @@ -9035,12 +9356,23 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil val_id = spirv_compiler_emit_load_src_with_type(compiler, &src[1], VKD3DSP_WRITEMASK_0, component_type); + if (instruction->flags & VKD3DARF_VOLATILE) + { + WARN("Ignoring 'volatile' attribute.\n"); + spirv_compiler_warning(compiler, VKD3D_SHADER_WARNING_SPV_IGNORING_FLAG, + "Ignoring the 'volatile' attribute flag for atomic instruction %#x.", instruction->handler_idx); + } + + memory_semantic = (instruction->flags & VKD3DARF_SEQ_CST) + ? SpvMemorySemanticsSequentiallyConsistentMask + : SpvMemorySemanticsMaskNone; + operands[i++] = pointer_id; operands[i++] = spirv_compiler_get_constant_uint(compiler, scope); - operands[i++] = spirv_compiler_get_constant_uint(compiler, SpvMemorySemanticsMaskNone); + operands[i++] = spirv_compiler_get_constant_uint(compiler, memory_semantic); if (instruction->src_count >= 3) { - operands[i++] = spirv_compiler_get_constant_uint(compiler, SpvMemorySemanticsMaskNone); + operands[i++] = spirv_compiler_get_constant_uint(compiler, memory_semantic); operands[i++] = spirv_compiler_emit_load_src_with_type(compiler, &src[2], VKD3DSP_WRITEMASK_0, component_type); } operands[i++] = val_id; @@ -9110,6 +9442,7 @@ static void spirv_compiler_emit_resinfo(struct spirv_compiler *compiler, const struct vkd3d_shader_dst_param *dst = instruction->dst; const struct vkd3d_shader_src_param *src = instruction->src; uint32_t type_id, lod_id, val_id, miplevel_count_id; + enum vkd3d_shader_component_type component_type; uint32_t constituents[VKD3D_VEC4_SIZE]; unsigned int i, size_component_count; struct vkd3d_shader_image image; @@ -9146,10 +9479,16 @@ static void spirv_compiler_emit_resinfo(struct spirv_compiler *compiler, val_id = vkd3d_spirv_build_op_composite_construct(builder, type_id, constituents, i + 2); + component_type = VKD3D_SHADER_COMPONENT_FLOAT; + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); if (instruction->flags == VKD3DSI_RESINFO_UINT) { - val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id); + /* SSA registers must match the specified result type. */ + if (!register_is_ssa(&dst->reg)) + val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id); + else + component_type = VKD3D_SHADER_COMPONENT_UINT; } else { @@ -9158,7 +9497,7 @@ static void spirv_compiler_emit_resinfo(struct spirv_compiler *compiler, val_id = vkd3d_spirv_build_op_convert_utof(builder, type_id, val_id); } val_id = spirv_compiler_emit_swizzle(compiler, val_id, VKD3DSP_WRITEMASK_ALL, - VKD3D_SHADER_COMPONENT_FLOAT, src[1].swizzle, dst->write_mask); + component_type, src[1].swizzle, dst->write_mask); spirv_compiler_emit_store_dst(compiler, dst, val_id); } @@ -9468,6 +9807,192 @@ static void spirv_compiler_emit_cut_stream(struct spirv_compiler *compiler, vkd3d_spirv_build_op_end_primitive(builder); } +static SpvOp map_wave_bool_op(enum vkd3d_shader_opcode handler_idx) +{ + switch (handler_idx) + { + case VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL: + return SpvOpGroupNonUniformAllEqual; + case VKD3DSIH_WAVE_ALL_TRUE: + return SpvOpGroupNonUniformAll; + case VKD3DSIH_WAVE_ANY_TRUE: + return SpvOpGroupNonUniformAny; + default: + vkd3d_unreachable(); + } +} + +static void spirv_compiler_emit_wave_bool_op(struct spirv_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + uint32_t type_id, val_id; + SpvOp op; + + vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformVote); + + op = map_wave_bool_op(instruction->handler_idx); + type_id = vkd3d_spirv_get_op_type_bool(builder); + val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); + val_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, op, + type_id, vkd3d_spirv_get_op_scope_subgroup(builder), val_id); + + spirv_compiler_emit_store_dst(compiler, dst, val_id); +} + +static uint32_t spirv_compiler_emit_group_nonuniform_ballot(struct spirv_compiler *compiler, + const struct vkd3d_shader_src_param *src) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t type_id, val_id; + + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, VKD3D_VEC4_SIZE); + val_id = spirv_compiler_emit_load_src(compiler, src, VKD3DSP_WRITEMASK_0); + val_id = vkd3d_spirv_build_op_group_nonuniform_ballot(builder, type_id, val_id); + + return val_id; +} + +static void spirv_compiler_emit_wave_active_ballot(struct spirv_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + const struct vkd3d_shader_dst_param *dst = instruction->dst; + uint32_t val_id; + + val_id = spirv_compiler_emit_group_nonuniform_ballot(compiler, instruction->src); + spirv_compiler_emit_store_dst(compiler, dst, val_id); +} + +static SpvOp map_wave_alu_op(enum vkd3d_shader_opcode handler_idx, bool is_float) +{ + switch (handler_idx) + { + case VKD3DSIH_WAVE_ACTIVE_BIT_AND: + return SpvOpGroupNonUniformBitwiseAnd; + case VKD3DSIH_WAVE_ACTIVE_BIT_OR: + return SpvOpGroupNonUniformBitwiseOr; + case VKD3DSIH_WAVE_ACTIVE_BIT_XOR: + return SpvOpGroupNonUniformBitwiseXor; + case VKD3DSIH_WAVE_OP_ADD: + return is_float ? SpvOpGroupNonUniformFAdd : SpvOpGroupNonUniformIAdd; + case VKD3DSIH_WAVE_OP_IMAX: + return SpvOpGroupNonUniformSMax; + case VKD3DSIH_WAVE_OP_IMIN: + return SpvOpGroupNonUniformSMin; + case VKD3DSIH_WAVE_OP_MAX: + return SpvOpGroupNonUniformFMax; + case VKD3DSIH_WAVE_OP_MIN: + return SpvOpGroupNonUniformFMin; + case VKD3DSIH_WAVE_OP_MUL: + return is_float ? SpvOpGroupNonUniformFMul : SpvOpGroupNonUniformIMul; + case VKD3DSIH_WAVE_OP_UMAX: + return SpvOpGroupNonUniformUMax; + case VKD3DSIH_WAVE_OP_UMIN: + return SpvOpGroupNonUniformUMin; + default: + vkd3d_unreachable(); + } +} + +static void spirv_compiler_emit_wave_alu_op(struct spirv_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + uint32_t type_id, val_id; + SpvOp op; + + op = map_wave_alu_op(instruction->handler_idx, data_type_is_floating_point(src->reg.data_type)); + + type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, + vsir_write_mask_component_count(dst->write_mask)); + val_id = spirv_compiler_emit_load_src(compiler, &src[0], dst->write_mask); + + vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformArithmetic); + val_id = vkd3d_spirv_build_op_tr3(builder, &builder->function_stream, op, type_id, + vkd3d_spirv_get_op_scope_subgroup(builder), + (instruction->flags & VKD3DSI_WAVE_PREFIX) ? SpvGroupOperationExclusiveScan : SpvGroupOperationReduce, + val_id); + + spirv_compiler_emit_store_dst(compiler, dst, val_id); +} + +static void spirv_compiler_emit_wave_bit_count(struct spirv_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + SpvGroupOperation group_op; + uint32_t type_id, val_id; + + group_op = (instruction->handler_idx == VKD3DSIH_WAVE_PREFIX_BIT_COUNT) ? SpvGroupOperationExclusiveScan + : SpvGroupOperationReduce; + + val_id = spirv_compiler_emit_group_nonuniform_ballot(compiler, instruction->src); + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + val_id = vkd3d_spirv_build_op_group_nonuniform_ballot_bit_count(builder, type_id, group_op, val_id); + + spirv_compiler_emit_store_dst(compiler, dst, val_id); +} + +static void spirv_compiler_emit_wave_is_first_lane(struct spirv_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + uint32_t val_id; + + val_id = vkd3d_spirv_build_op_group_nonuniform_elect(builder); + spirv_compiler_emit_store_dst(compiler, dst, val_id); +} + +static void spirv_compiler_emit_wave_read_lane_at(struct spirv_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + uint32_t type_id, lane_id, val_id; + + type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, + vsir_write_mask_component_count(dst->write_mask)); + val_id = spirv_compiler_emit_load_src(compiler, &src[0], dst->write_mask); + lane_id = spirv_compiler_emit_load_src(compiler, &src[1], VKD3DSP_WRITEMASK_0); + + /* TODO: detect values loaded from a const buffer? */ + if (register_is_constant_or_undef(&src[1].reg)) + { + /* Uniform lane_id only. */ + val_id = vkd3d_spirv_build_op_group_nonuniform_broadcast(builder, type_id, val_id, lane_id); + } + else + { + /* WaveReadLaneAt supports non-uniform lane ids, so if lane_id is not constant it may not be uniform. */ + val_id = vkd3d_spirv_build_op_group_nonuniform_shuffle(builder, type_id, val_id, lane_id); + } + + spirv_compiler_emit_store_dst(compiler, dst, val_id); +} + +static void spirv_compiler_emit_wave_read_lane_first(struct spirv_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + uint32_t type_id, val_id; + + type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, + vsir_write_mask_component_count(dst->write_mask)); + val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); + val_id = vkd3d_spirv_build_op_group_nonuniform_broadcast_first(builder, type_id, val_id); + + spirv_compiler_emit_store_dst(compiler, dst, val_id); +} + /* This function is called after declarations are processed. */ static void spirv_compiler_emit_main_prolog(struct spirv_compiler *compiler) { @@ -9475,6 +10000,11 @@ static void spirv_compiler_emit_main_prolog(struct spirv_compiler *compiler) if (compiler->emit_point_size) spirv_compiler_emit_point_size(compiler); + + /* Maybe in the future we can try to shrink the size of the interlocked + * section. */ + if (compiler->use_invocation_interlock) + vkd3d_spirv_build_op(&compiler->spirv_builder.function_stream, SpvOpBeginInvocationInterlockEXT); } static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, @@ -9482,6 +10012,8 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, { int ret = VKD3D_OK; + compiler->location = instruction->location; + switch (instruction->handler_idx) { case VKD3DSIH_DCL_GLOBAL_FLAGS: @@ -9549,6 +10081,7 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, break; case VKD3DSIH_DMOVC: case VKD3DSIH_MOVC: + case VKD3DSIH_CMP: spirv_compiler_emit_movc(compiler, instruction); break; case VKD3DSIH_SWAPC: @@ -9587,6 +10120,7 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VKD3DSIH_ISFINITE: spirv_compiler_emit_isfinite(compiler, instruction); break; + case VKD3DSIH_ABS: case VKD3DSIH_ACOS: case VKD3DSIH_ASIN: case VKD3DSIH_ATAN: @@ -9669,6 +10203,14 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VKD3DSIH_ULT: spirv_compiler_emit_comparison_instruction(compiler, instruction); break; + case VKD3DSIH_ORD: + case VKD3DSIH_UNO: + spirv_compiler_emit_orderedness_instruction(compiler, instruction); + break; + case VKD3DSIH_SLT: + case VKD3DSIH_SGE: + spirv_compiler_emit_float_comparison_instruction(compiler, instruction); + break; case VKD3DSIH_BFI: case VKD3DSIH_IBFE: case VKD3DSIH_UBFE: @@ -9795,8 +10337,41 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VKD3DSIH_CUT_STREAM: spirv_compiler_emit_cut_stream(compiler, instruction); break; + case VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL: + case VKD3DSIH_WAVE_ALL_TRUE: + case VKD3DSIH_WAVE_ANY_TRUE: + spirv_compiler_emit_wave_bool_op(compiler, instruction); + break; + case VKD3DSIH_WAVE_ACTIVE_BALLOT: + spirv_compiler_emit_wave_active_ballot(compiler, instruction); + break; + case VKD3DSIH_WAVE_ACTIVE_BIT_AND: + case VKD3DSIH_WAVE_ACTIVE_BIT_OR: + case VKD3DSIH_WAVE_ACTIVE_BIT_XOR: + case VKD3DSIH_WAVE_OP_ADD: + case VKD3DSIH_WAVE_OP_IMAX: + case VKD3DSIH_WAVE_OP_IMIN: + case VKD3DSIH_WAVE_OP_MAX: + case VKD3DSIH_WAVE_OP_MIN: + case VKD3DSIH_WAVE_OP_MUL: + case VKD3DSIH_WAVE_OP_UMAX: + case VKD3DSIH_WAVE_OP_UMIN: + spirv_compiler_emit_wave_alu_op(compiler, instruction); + break; + case VKD3DSIH_WAVE_ALL_BIT_COUNT: + case VKD3DSIH_WAVE_PREFIX_BIT_COUNT: + spirv_compiler_emit_wave_bit_count(compiler, instruction); + break; + case VKD3DSIH_WAVE_IS_FIRST_LANE: + spirv_compiler_emit_wave_is_first_lane(compiler, instruction); + break; + case VKD3DSIH_WAVE_READ_LANE_AT: + spirv_compiler_emit_wave_read_lane_at(compiler, instruction); + break; + case VKD3DSIH_WAVE_READ_LANE_FIRST: + spirv_compiler_emit_wave_read_lane_first(compiler, instruction); + break; case VKD3DSIH_DCL: - case VKD3DSIH_DCL_CONSTANT_BUFFER: case VKD3DSIH_DCL_HS_MAX_TESSFACTOR: case VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT: case VKD3DSIH_DCL_INPUT_SGV: @@ -9892,20 +10467,19 @@ static void spirv_compiler_emit_descriptor_declarations(struct spirv_compiler *c } } -static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_parser *parser, - struct vkd3d_shader_code *spirv) +static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct vsir_program *program, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *spirv) { const struct vkd3d_shader_spirv_target_info *info = compiler->spirv_target_info; const struct vkd3d_shader_spirv_domain_shader_target_info *ds_info; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - struct vkd3d_shader_desc *shader_desc = &parser->shader_desc; struct vkd3d_shader_instruction_array instructions; - struct vsir_program *program = &parser->program; + enum vkd3d_shader_spirv_environment environment; enum vkd3d_result result = VKD3D_OK; unsigned int i; - if ((result = vkd3d_shader_normalise(parser, compile_info)) < 0) + if ((result = vsir_program_normalise(program, compiler->config_flags, + compile_info, compiler->message_context)) < 0) return result; if (program->temp_count) @@ -9915,21 +10489,18 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, spirv_compiler_emit_descriptor_declarations(compiler); - compiler->location.column = 0; - compiler->location.line = 1; - if (program->block_count && !spirv_compiler_init_blocks(compiler, program->block_count)) return VKD3D_ERROR_OUT_OF_MEMORY; instructions = program->instructions; memset(&program->instructions, 0, sizeof(program->instructions)); - compiler->input_signature = shader_desc->input_signature; - compiler->output_signature = shader_desc->output_signature; - compiler->patch_constant_signature = shader_desc->patch_constant_signature; - memset(&shader_desc->input_signature, 0, sizeof(shader_desc->input_signature)); - memset(&shader_desc->output_signature, 0, sizeof(shader_desc->output_signature)); - memset(&shader_desc->patch_constant_signature, 0, sizeof(shader_desc->patch_constant_signature)); + compiler->input_signature = program->input_signature; + compiler->output_signature = program->output_signature; + compiler->patch_constant_signature = program->patch_constant_signature; + memset(&program->input_signature, 0, sizeof(program->input_signature)); + memset(&program->output_signature, 0, sizeof(program->output_signature)); + memset(&program->patch_constant_signature, 0, sizeof(program->patch_constant_signature)); compiler->use_vocp = program->use_vocp; compiler->block_names = program->block_names; compiler->block_name_count = program->block_name_count; @@ -9942,7 +10513,6 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, for (i = 0; i < instructions.count && result >= 0; ++i) { - compiler->location.line = i + 1; result = spirv_compiler_handle_instruction(compiler, &instructions.elements[i]); } @@ -9985,12 +10555,12 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, if (compiler->strip_debug) vkd3d_spirv_stream_clear(&builder->debug_stream); - if (!vkd3d_spirv_compile_module(builder, spirv, spirv_compiler_get_entry_point_name(compiler))) + environment = spirv_compiler_get_target_environment(compiler); + if (!vkd3d_spirv_compile_module(builder, spirv, spirv_compiler_get_entry_point_name(compiler), environment)) return VKD3D_ERROR; - if (TRACE_ON() || parser->config_flags & VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION) + if (TRACE_ON() || compiler->config_flags & VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION) { - enum vkd3d_shader_spirv_environment environment = spirv_compiler_get_target_environment(compiler); struct vkd3d_string_buffer buffer; if (TRACE_ON()) @@ -10018,7 +10588,6 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, if (compile_info->target_type == VKD3D_SHADER_TARGET_SPIRV_TEXT) { struct vkd3d_shader_code text; - enum vkd3d_shader_spirv_environment environment = spirv_compiler_get_target_environment(compiler); if (vkd3d_spirv_binary_to_text(spirv, environment, compiler->formatting, &text) != VKD3D_OK) return VKD3D_ERROR; vkd3d_shader_free_shader_code(spirv); @@ -10028,7 +10597,7 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, return VKD3D_OK; } -int spirv_compile(struct vkd3d_shader_parser *parser, +int spirv_compile(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) @@ -10036,14 +10605,14 @@ int spirv_compile(struct vkd3d_shader_parser *parser, struct spirv_compiler *spirv_compiler; int ret; - if (!(spirv_compiler = spirv_compiler_create(&parser->program.shader_version, &parser->shader_desc, - compile_info, scan_descriptor_info, message_context, &parser->location, parser->config_flags))) + if (!(spirv_compiler = spirv_compiler_create(program, compile_info, + scan_descriptor_info, message_context, config_flags))) { ERR("Failed to create SPIR-V compiler.\n"); return VKD3D_ERROR; } - ret = spirv_compiler_generate_spirv(spirv_compiler, compile_info, parser, out); + ret = spirv_compiler_generate_spirv(spirv_compiler, program, compile_info, out); spirv_compiler_destroy(spirv_compiler); return ret; diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c index 3be4e40ab0c..b562e815a81 100644 --- a/libs/vkd3d/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c @@ -719,14 +719,9 @@ static const enum vkd3d_data_type data_type_table[] = /* VKD3D_SM4_DATA_UNUSED */ VKD3D_DATA_UNUSED, }; -static struct vkd3d_shader_sm4_parser *vkd3d_shader_sm4_parser(struct vkd3d_shader_parser *parser) -{ - return CONTAINING_RECORD(parser, struct vkd3d_shader_sm4_parser, p); -} - static bool shader_is_sm_5_1(const struct vkd3d_shader_sm4_parser *sm4) { - const struct vkd3d_shader_version *version = &sm4->p.program.shader_version; + const struct vkd3d_shader_version *version = &sm4->p.program->shader_version; return version->major >= 5 && version->minor >= 1; } @@ -811,7 +806,7 @@ static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, ui icb->element_count = icb_size / VKD3D_VEC4_SIZE; icb->is_null = false; memcpy(icb->data, tokens, sizeof(*tokens) * icb_size); - shader_instruction_array_add_icb(&priv->p.program.instructions, icb); + shader_instruction_array_add_icb(&priv->p.program->instructions, icb); ins->declaration.icb = icb; } @@ -933,6 +928,7 @@ static void shader_sm4_read_dcl_index_range(struct vkd3d_shader_instruction *ins uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) { struct vkd3d_shader_index_range *index_range = &ins->declaration.index_range; + struct vsir_program *program = priv->p.program; unsigned int i, register_idx, register_count; const struct shader_signature *signature; enum vkd3d_shader_register_type type; @@ -954,32 +950,32 @@ static void shader_sm4_read_dcl_index_range(struct vkd3d_shader_instruction *ins case VKD3DSPR_INCONTROLPOINT: io_masks = priv->input_register_masks; ranges = &priv->input_index_ranges; - signature = &priv->p.shader_desc.input_signature; + signature = &program->input_signature; break; case VKD3DSPR_OUTPUT: if (sm4_parser_is_in_fork_or_join_phase(priv)) { io_masks = priv->patch_constant_register_masks; ranges = &priv->patch_constant_index_ranges; - signature = &priv->p.shader_desc.patch_constant_signature; + signature = &program->patch_constant_signature; } else { io_masks = priv->output_register_masks; ranges = &priv->output_index_ranges; - signature = &priv->p.shader_desc.output_signature; + signature = &program->output_signature; } break; case VKD3DSPR_COLOROUT: case VKD3DSPR_OUTCONTROLPOINT: io_masks = priv->output_register_masks; ranges = &priv->output_index_ranges; - signature = &priv->p.shader_desc.output_signature; + signature = &program->output_signature; break; case VKD3DSPR_PATCHCONST: io_masks = priv->patch_constant_register_masks; ranges = &priv->patch_constant_index_ranges; - signature = &priv->p.shader_desc.patch_constant_signature; + signature = &program->patch_constant_signature; break; default: @@ -1057,16 +1053,17 @@ static void shader_sm4_read_dcl_output_topology(struct vkd3d_shader_instruction } static void shader_sm4_read_dcl_input_primitive(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *sm4) { enum vkd3d_sm4_input_primitive_type primitive_type; + struct vsir_program *program = sm4->p.program; primitive_type = (opcode_token & VKD3D_SM4_PRIMITIVE_TYPE_MASK) >> VKD3D_SM4_PRIMITIVE_TYPE_SHIFT; if (VKD3D_SM5_INPUT_PT_PATCH1 <= primitive_type && primitive_type <= VKD3D_SM5_INPUT_PT_PATCH32) { ins->declaration.primitive_type.type = VKD3D_PT_PATCH; ins->declaration.primitive_type.patch_vertex_count = primitive_type - VKD3D_SM5_INPUT_PT_PATCH1 + 1; - priv->p.program.input_control_point_count = ins->declaration.primitive_type.patch_vertex_count; + program->input_control_point_count = ins->declaration.primitive_type.patch_vertex_count; } else if (primitive_type >= ARRAY_SIZE(input_primitive_type_table)) { @@ -1075,7 +1072,7 @@ static void shader_sm4_read_dcl_input_primitive(struct vkd3d_shader_instruction else { ins->declaration.primitive_type.type = input_primitive_type_table[primitive_type].vkd3d_type; - priv->p.program.input_control_point_count = input_primitive_type_table[primitive_type].control_point_count; + program->input_control_point_count = input_primitive_type_table[primitive_type].control_point_count; } if (ins->declaration.primitive_type.type == VKD3D_PT_UNDEFINED) @@ -1083,11 +1080,13 @@ static void shader_sm4_read_dcl_input_primitive(struct vkd3d_shader_instruction } static void shader_sm4_read_declaration_count(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *sm4) { + struct vsir_program *program = sm4->p.program; + ins->declaration.count = *tokens; if (opcode == VKD3D_SM4_OP_DCL_TEMPS) - priv->p.program.temp_count = max(priv->p.program.temp_count, *tokens); + program->temp_count = max(program->temp_count, *tokens); } static void shader_sm4_read_declaration_dst(struct vkd3d_shader_instruction *ins, uint32_t opcode, @@ -1113,7 +1112,7 @@ static void shader_sm4_read_dcl_input_ps(struct vkd3d_shader_instruction *ins, u if (shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, dst)) { struct signature_element *e = vsir_signature_find_element_for_reg( - &priv->p.shader_desc.input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); + &priv->p.program->input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); e->interpolation_mode = ins->flags; } @@ -1128,7 +1127,7 @@ static void shader_sm4_read_dcl_input_ps_siv(struct vkd3d_shader_instruction *in if (shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, dst)) { struct signature_element *e = vsir_signature_find_element_for_reg( - &priv->p.shader_desc.input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); + &priv->p.program->input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); e->interpolation_mode = ins->flags; } @@ -1183,15 +1182,17 @@ static void shader_sm5_read_dcl_interface(struct vkd3d_shader_instruction *ins, } static void shader_sm5_read_control_point_count(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *sm4) { + struct vsir_program *program = sm4->p.program; + ins->declaration.count = (opcode_token & VKD3D_SM5_CONTROL_POINT_COUNT_MASK) >> VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT; if (opcode == VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT) - priv->p.program.input_control_point_count = ins->declaration.count; + program->input_control_point_count = ins->declaration.count; else - priv->p.program.output_control_point_count = ins->declaration.count; + program->output_control_point_count = ins->declaration.count; } static void shader_sm5_read_dcl_tessellator_domain(struct vkd3d_shader_instruction *ins, uint32_t opcode, @@ -1263,6 +1264,7 @@ static void shader_sm5_read_dcl_tgsm_raw(struct vkd3d_shader_instruction *ins, u ins->declaration.tgsm_raw.byte_count = *tokens; if (ins->declaration.tgsm_raw.byte_count % 4) FIXME("Byte count %u is not multiple of 4.\n", ins->declaration.tgsm_raw.byte_count); + ins->declaration.tgsm_raw.zero_init = false; } static void shader_sm5_read_dcl_tgsm_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, @@ -1274,6 +1276,7 @@ static void shader_sm5_read_dcl_tgsm_structured(struct vkd3d_shader_instruction ins->declaration.tgsm_structured.structure_count = *tokens; if (ins->declaration.tgsm_structured.byte_stride % 4) FIXME("Byte stride %u is not multiple of 4.\n", ins->declaration.tgsm_structured.byte_stride); + ins->declaration.tgsm_structured.zero_init = false; } static void shader_sm5_read_dcl_resource_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, @@ -1743,21 +1746,12 @@ static enum vkd3d_data_type map_data_type(char t) } } -static void shader_sm4_destroy(struct vkd3d_shader_parser *parser) -{ - struct vkd3d_shader_sm4_parser *sm4 = vkd3d_shader_sm4_parser(parser); - - vsir_program_cleanup(&parser->program); - free_shader_desc(&parser->shader_desc); - vkd3d_free(sm4); -} - static bool shader_sm4_read_reg_idx(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, const uint32_t *end, uint32_t addressing, struct vkd3d_shader_register_index *reg_idx) { if (addressing & VKD3D_SM4_ADDRESSING_RELATIVE) { - struct vkd3d_shader_src_param *rel_addr = vsir_program_get_src_params(&priv->p.program, 1); + struct vkd3d_shader_src_param *rel_addr = vsir_program_get_src_params(priv->p.program, 1); if (!(reg_idx->rel_addr = rel_addr)) { @@ -2035,7 +2029,7 @@ static bool register_is_control_point_input(const struct vkd3d_shader_register * { return reg->type == VKD3DSPR_INCONTROLPOINT || reg->type == VKD3DSPR_OUTCONTROLPOINT || (reg->type == VKD3DSPR_INPUT && (priv->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE - || priv->p.program.shader_version.type == VKD3D_SHADER_TYPE_GEOMETRY)); + || priv->p.program->shader_version.type == VKD3D_SHADER_TYPE_GEOMETRY)); } static uint32_t mask_from_swizzle(uint32_t swizzle) @@ -2359,7 +2353,7 @@ static void shader_sm4_read_instruction_modifier(uint32_t modifier, struct vkd3d static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, struct vkd3d_shader_instruction *ins) { const struct vkd3d_sm4_opcode_info *opcode_info; - struct vsir_program *program = &sm4->p.program; + struct vsir_program *program = sm4->p.program; uint32_t opcode_token, opcode, previous_token; struct vkd3d_shader_dst_param *dst_params; struct vkd3d_shader_src_param *src_params; @@ -2498,13 +2492,8 @@ fail: return; } -static const struct vkd3d_shader_parser_ops shader_sm4_parser_ops = -{ - .parser_destroy = shader_sm4_destroy, -}; - -static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, const uint32_t *byte_code, - size_t byte_code_size, const char *source_name, const struct shader_signature *output_signature, +static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, struct vsir_program *program, + const uint32_t *byte_code, size_t byte_code_size, const char *source_name, struct vkd3d_shader_message_context *message_context) { struct vkd3d_shader_version version; @@ -2563,9 +2552,9 @@ static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, const uint32_t version.minor = VKD3D_SM4_VERSION_MINOR(version_token); /* Estimate instruction count to avoid reallocation in most shaders. */ - if (!vkd3d_shader_parser_init(&sm4->p, message_context, source_name, &version, &shader_sm4_parser_ops, - token_count / 7u + 20)) + if (!vsir_program_init(program, &version, token_count / 7u + 20)) return false; + vkd3d_shader_parser_init(&sm4->p, program, message_context, source_name); sm4->ptr = sm4->start; init_sm4_lookup_tables(&sm4->lookup); @@ -2644,94 +2633,88 @@ static void shader_sm4_validate_default_phase_index_ranges(struct vkd3d_shader_s return; } -int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) +int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags, + struct vkd3d_shader_message_context *message_context, struct vsir_program *program) { struct vkd3d_shader_instruction_array *instructions; - struct vkd3d_shader_desc *shader_desc; + struct vkd3d_shader_sm4_parser sm4 = {0}; + struct dxbc_shader_desc dxbc_desc = {0}; struct vkd3d_shader_instruction *ins; - struct vkd3d_shader_sm4_parser *sm4; int ret; - if (!(sm4 = vkd3d_calloc(1, sizeof(*sm4)))) - { - ERR("Failed to allocate parser.\n"); - return VKD3D_ERROR_OUT_OF_MEMORY; - } - - shader_desc = &sm4->p.shader_desc; - shader_desc->is_dxil = false; + dxbc_desc.is_dxil = false; if ((ret = shader_extract_from_dxbc(&compile_info->source, - message_context, compile_info->source_name, shader_desc)) < 0) + message_context, compile_info->source_name, &dxbc_desc)) < 0) { WARN("Failed to extract shader, vkd3d result %d.\n", ret); - vkd3d_free(sm4); return ret; } - if (!shader_sm4_init(sm4, shader_desc->byte_code, shader_desc->byte_code_size, - compile_info->source_name, &shader_desc->output_signature, message_context)) + if (!shader_sm4_init(&sm4, program, dxbc_desc.byte_code, dxbc_desc.byte_code_size, + compile_info->source_name, message_context)) { WARN("Failed to initialise shader parser.\n"); - free_shader_desc(shader_desc); - vkd3d_free(sm4); + free_dxbc_shader_desc(&dxbc_desc); return VKD3D_ERROR_INVALID_ARGUMENT; } + program->input_signature = dxbc_desc.input_signature; + program->output_signature = dxbc_desc.output_signature; + program->patch_constant_signature = dxbc_desc.patch_constant_signature; + memset(&dxbc_desc, 0, sizeof(dxbc_desc)); + /* DXBC stores used masks inverted for output signatures, for some reason. * We return them un-inverted. */ - uninvert_used_masks(&shader_desc->output_signature); - if (sm4->p.program.shader_version.type == VKD3D_SHADER_TYPE_HULL) - uninvert_used_masks(&shader_desc->patch_constant_signature); + uninvert_used_masks(&program->output_signature); + if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL) + uninvert_used_masks(&program->patch_constant_signature); - if (!shader_sm4_parser_validate_signature(sm4, &shader_desc->input_signature, - sm4->input_register_masks, "Input") - || !shader_sm4_parser_validate_signature(sm4, &shader_desc->output_signature, - sm4->output_register_masks, "Output") - || !shader_sm4_parser_validate_signature(sm4, &shader_desc->patch_constant_signature, - sm4->patch_constant_register_masks, "Patch constant")) + if (!shader_sm4_parser_validate_signature(&sm4, &program->input_signature, + sm4.input_register_masks, "Input") + || !shader_sm4_parser_validate_signature(&sm4, &program->output_signature, + sm4.output_register_masks, "Output") + || !shader_sm4_parser_validate_signature(&sm4, &program->patch_constant_signature, + sm4.patch_constant_register_masks, "Patch constant")) { - shader_sm4_destroy(&sm4->p); + vsir_program_cleanup(program); return VKD3D_ERROR_INVALID_SHADER; } - instructions = &sm4->p.program.instructions; - while (sm4->ptr != sm4->end) + instructions = &program->instructions; + while (sm4.ptr != sm4.end) { if (!shader_instruction_array_reserve(instructions, instructions->count + 1)) { ERR("Failed to allocate instructions.\n"); - vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); - shader_sm4_destroy(&sm4->p); + vkd3d_shader_parser_error(&sm4.p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); + vsir_program_cleanup(program); return VKD3D_ERROR_OUT_OF_MEMORY; } ins = &instructions->elements[instructions->count]; - shader_sm4_read_instruction(sm4, ins); + shader_sm4_read_instruction(&sm4, ins); if (ins->handler_idx == VKD3DSIH_INVALID) { WARN("Encountered unrecognized or invalid instruction.\n"); - shader_sm4_destroy(&sm4->p); + vsir_program_cleanup(program); return VKD3D_ERROR_OUT_OF_MEMORY; } ++instructions->count; } - if (sm4->p.program.shader_version.type == VKD3D_SHADER_TYPE_HULL - && !sm4->has_control_point_phase && !sm4->p.failed) - shader_sm4_validate_default_phase_index_ranges(sm4); + if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL + && !sm4.has_control_point_phase && !sm4.p.failed) + shader_sm4_validate_default_phase_index_ranges(&sm4); - if (!sm4->p.failed) - vsir_validate(&sm4->p); + if (!sm4.p.failed) + vkd3d_shader_parser_validate(&sm4.p, config_flags); - if (sm4->p.failed) + if (sm4.p.failed) { WARN("Failed to parse shader.\n"); - shader_sm4_destroy(&sm4->p); + vsir_program_cleanup(program); return VKD3D_ERROR_INVALID_SHADER; } - *parser = &sm4->p; - return VKD3D_OK; } @@ -2739,7 +2722,7 @@ static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_bloc static bool type_is_integer(const struct hlsl_type *type) { - switch (type->base_type) + switch (type->e.numeric.type) { case HLSL_TYPE_BOOL: case HLSL_TYPE_INT: @@ -2928,7 +2911,7 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, put_u32(&buffer, 0); /* name */ put_u32(&buffer, usage_idx); put_u32(&buffer, usage); - switch (var->data_type->base_type) + switch (var->data_type->e.numeric.type) { case HLSL_TYPE_FLOAT: case HLSL_TYPE_HALF: @@ -2989,31 +2972,39 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) { switch (type->class) { - case HLSL_CLASS_ARRAY: - return sm4_class(type->e.array.type); case HLSL_CLASS_MATRIX: assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) return D3D_SVC_MATRIX_COLUMNS; else return D3D_SVC_MATRIX_ROWS; - case HLSL_CLASS_OBJECT: - return D3D_SVC_OBJECT; case HLSL_CLASS_SCALAR: return D3D_SVC_SCALAR; - case HLSL_CLASS_STRUCT: - return D3D_SVC_STRUCT; case HLSL_CLASS_VECTOR: return D3D_SVC_VECTOR; - default: - ERR("Invalid class %#x.\n", type->class); - vkd3d_unreachable(); + + case HLSL_CLASS_ARRAY: + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_STRUCT: + case HLSL_CLASS_PASS: + case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_STRING: + case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_TEXTURE: + case HLSL_CLASS_UAV: + case HLSL_CLASS_VERTEX_SHADER: + case HLSL_CLASS_VOID: + break; } + vkd3d_unreachable(); } static D3D_SHADER_VARIABLE_TYPE sm4_base_type(const struct hlsl_type *type) { - switch (type->base_type) + switch (type->e.numeric.type) { case HLSL_TYPE_BOOL: return D3D_SVT_BOOL; @@ -3024,68 +3015,8 @@ static D3D_SHADER_VARIABLE_TYPE sm4_base_type(const struct hlsl_type *type) return D3D_SVT_FLOAT; case HLSL_TYPE_INT: return D3D_SVT_INT; - case HLSL_TYPE_PIXELSHADER: - return D3D_SVT_PIXELSHADER; - case HLSL_TYPE_SAMPLER: - switch (type->sampler_dim) - { - case HLSL_SAMPLER_DIM_1D: - return D3D_SVT_SAMPLER1D; - case HLSL_SAMPLER_DIM_2D: - return D3D_SVT_SAMPLER2D; - case HLSL_SAMPLER_DIM_3D: - return D3D_SVT_SAMPLER3D; - case HLSL_SAMPLER_DIM_CUBE: - return D3D_SVT_SAMPLERCUBE; - case HLSL_SAMPLER_DIM_GENERIC: - return D3D_SVT_SAMPLER; - default: - vkd3d_unreachable(); - } - break; - case HLSL_TYPE_STRING: - return D3D_SVT_STRING; - case HLSL_TYPE_TEXTURE: - switch (type->sampler_dim) - { - case HLSL_SAMPLER_DIM_1D: - return D3D_SVT_TEXTURE1D; - case HLSL_SAMPLER_DIM_2D: - return D3D_SVT_TEXTURE2D; - case HLSL_SAMPLER_DIM_2DMS: - return D3D_SVT_TEXTURE2DMS; - case HLSL_SAMPLER_DIM_3D: - return D3D_SVT_TEXTURE3D; - case HLSL_SAMPLER_DIM_CUBE: - return D3D_SVT_TEXTURECUBE; - case HLSL_SAMPLER_DIM_GENERIC: - return D3D_SVT_TEXTURE; - default: - vkd3d_unreachable(); - } - break; case HLSL_TYPE_UINT: return D3D_SVT_UINT; - case HLSL_TYPE_VERTEXSHADER: - return D3D_SVT_VERTEXSHADER; - case HLSL_TYPE_VOID: - return D3D_SVT_VOID; - case HLSL_TYPE_UAV: - switch (type->sampler_dim) - { - case HLSL_SAMPLER_DIM_1D: - return D3D_SVT_RWTEXTURE1D; - case HLSL_SAMPLER_DIM_2D: - return D3D_SVT_RWTEXTURE2D; - case HLSL_SAMPLER_DIM_3D: - return D3D_SVT_RWTEXTURE3D; - case HLSL_SAMPLER_DIM_1DARRAY: - return D3D_SVT_RWTEXTURE1DARRAY; - case HLSL_SAMPLER_DIM_2DARRAY: - return D3D_SVT_RWTEXTURE2DARRAY; - default: - vkd3d_unreachable(); - } default: vkd3d_unreachable(); } @@ -3096,8 +3027,8 @@ static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b const struct hlsl_type *array_type = hlsl_get_multiarray_element_type(type); const char *name = array_type->name ? array_type->name : ""; const struct hlsl_profile_info *profile = ctx->profile; - unsigned int field_count = 0, array_size = 0; - size_t fields_offset = 0, name_offset = 0; + unsigned int array_size = 0; + size_t name_offset = 0; size_t i; if (type->bytecode_offset) @@ -3111,32 +3042,47 @@ static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b if (array_type->class == HLSL_CLASS_STRUCT) { - field_count = array_type->e.record.field_count; + unsigned int field_count = 0; + size_t fields_offset = 0; - for (i = 0; i < field_count; ++i) + for (i = 0; i < array_type->e.record.field_count; ++i) { struct hlsl_struct_field *field = &array_type->e.record.fields[i]; + if (!field->type->reg_size[HLSL_REGSET_NUMERIC]) + continue; + field->name_bytecode_offset = put_string(buffer, field->name); write_sm4_type(ctx, buffer, field->type); + ++field_count; } fields_offset = bytecode_align(buffer); - for (i = 0; i < field_count; ++i) + for (i = 0; i < array_type->e.record.field_count; ++i) { struct hlsl_struct_field *field = &array_type->e.record.fields[i]; + if (!field->type->reg_size[HLSL_REGSET_NUMERIC]) + continue; + put_u32(buffer, field->name_bytecode_offset); put_u32(buffer, field->type->bytecode_offset); - put_u32(buffer, field->reg_offset[HLSL_REGSET_NUMERIC]); + put_u32(buffer, field->reg_offset[HLSL_REGSET_NUMERIC] * sizeof(float)); } + type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(D3D_SVC_STRUCT, D3D_SVT_VOID)); + put_u32(buffer, vkd3d_make_u32(1, hlsl_type_component_count(array_type))); + put_u32(buffer, vkd3d_make_u32(array_size, field_count)); + put_u32(buffer, fields_offset); + } + else + { + assert(array_type->class <= HLSL_CLASS_LAST_NUMERIC); + type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm4_class(array_type), sm4_base_type(array_type))); + put_u32(buffer, vkd3d_make_u32(array_type->dimy, array_type->dimx)); + put_u32(buffer, vkd3d_make_u32(array_size, 0)); + put_u32(buffer, 1); } - - type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm4_class(type), sm4_base_type(type))); - put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx)); - put_u32(buffer, vkd3d_make_u32(array_size, field_count)); - put_u32(buffer, fields_offset); if (profile->major_version >= 5) { @@ -3150,20 +3096,21 @@ static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type) { - if (type->class == HLSL_CLASS_ARRAY) - return sm4_resource_type(type->e.array.type); - - switch (type->base_type) + switch (type->class) { - case HLSL_TYPE_SAMPLER: + case HLSL_CLASS_ARRAY: + return sm4_resource_type(type->e.array.type); + case HLSL_CLASS_SAMPLER: return D3D_SIT_SAMPLER; - case HLSL_TYPE_TEXTURE: + case HLSL_CLASS_TEXTURE: return D3D_SIT_TEXTURE; - case HLSL_TYPE_UAV: + case HLSL_CLASS_UAV: return D3D_SIT_UAV_RWTYPED; default: - vkd3d_unreachable(); + break; } + + vkd3d_unreachable(); } static D3D_RESOURCE_RETURN_TYPE sm4_resource_format(const struct hlsl_type *type) @@ -3171,7 +3118,7 @@ static D3D_RESOURCE_RETURN_TYPE sm4_resource_format(const struct hlsl_type *type if (type->class == HLSL_CLASS_ARRAY) return sm4_resource_format(type->e.array.type); - switch (type->e.resource.format->base_type) + switch (type->e.resource.format->e.numeric.type) { case HLSL_TYPE_DOUBLE: return D3D_RETURN_TYPE_DOUBLE; @@ -3328,7 +3275,7 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un extern_resources[*count].name = name; extern_resources[*count].data_type = component_type; - extern_resources[*count].is_user_packed = false; + extern_resources[*count].is_user_packed = !!var->reg_reservation.reg_type; extern_resources[*count].regset = regset; extern_resources[*count].id = var->regs[regset].id + regset_offset; @@ -3428,10 +3375,10 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) if (profile->major_version >= 5) { - put_u32(&buffer, TAG_RD11); + put_u32(&buffer, hlsl_version_ge(ctx, 5, 1) ? TAG_RD11_REVERSE : TAG_RD11); put_u32(&buffer, 15 * sizeof(uint32_t)); /* size of RDEF header including this header */ put_u32(&buffer, 6 * sizeof(uint32_t)); /* size of buffer desc */ - put_u32(&buffer, 8 * sizeof(uint32_t)); /* size of binding desc */ + put_u32(&buffer, (hlsl_version_ge(ctx, 5, 1) ? 10 : 8) * sizeof(uint32_t)); /* size of binding desc */ put_u32(&buffer, 10 * sizeof(uint32_t)); /* size of variable desc */ put_u32(&buffer, 9 * sizeof(uint32_t)); /* size of type desc */ put_u32(&buffer, 3 * sizeof(uint32_t)); /* size of member desc */ @@ -3448,6 +3395,9 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) const struct extern_resource *resource = &extern_resources[i]; uint32_t flags = 0; + if (hlsl_version_ge(ctx, 5, 1)) + hlsl_fixme(ctx, &resource->var->loc, "Shader model 5.1 resource reflection."); + if (resource->is_user_packed) flags |= D3D_SIF_USERPACKED; @@ -3480,6 +3430,9 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) if (!cbuffer->reg.allocated) continue; + if (hlsl_version_ge(ctx, 5, 1)) + hlsl_fixme(ctx, &cbuffer->loc, "Shader model 5.1 resource reflection."); + if (cbuffer->reservation.reg_type) flags |= D3D_SIF_USERPACKED; @@ -3523,8 +3476,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - if (var->is_uniform && var->buffer == cbuffer - && var->data_type->class != HLSL_CLASS_OBJECT) + if (var->is_uniform && var->buffer == cbuffer && var->data_type->reg_size[HLSL_REGSET_NUMERIC]) ++var_count; } @@ -3558,8 +3510,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - if (var->is_uniform && var->buffer == cbuffer - && var->data_type->class != HLSL_CLASS_OBJECT) + if (var->is_uniform && var->buffer == cbuffer && var->data_type->reg_size[HLSL_REGSET_NUMERIC]) { uint32_t flags = 0; @@ -3586,8 +3537,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) j = 0; LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - if (var->is_uniform && var->buffer == cbuffer - && var->data_type->class != HLSL_CLASS_OBJECT) + if (var->is_uniform && var->buffer == cbuffer && var->data_type->reg_size[HLSL_REGSET_NUMERIC]) { const unsigned int var_size = (profile->major_version >= 5 ? 10 : 6); size_t var_offset = vars_start + j * var_size * sizeof(uint32_t); @@ -4598,7 +4548,7 @@ static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node enum hlsl_sampler_dim dim) { const struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, resource); - bool multisampled = resource_type->base_type == HLSL_TYPE_TEXTURE + bool multisampled = resource_type->class == HLSL_CLASS_TEXTURE && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY); bool uav = (hlsl_deref_get_regset(tpf->ctx, resource) == HLSL_REGSET_UAVS); unsigned int coords_writemask = VKD3DSP_WRITEMASK_ALL; @@ -4756,11 +4706,11 @@ static void write_sm4_sampleinfo(const struct tpf_writer *tpf, const struct hlsl const struct hlsl_ir_node *dst = &load->node; struct sm4_instruction instr; - assert(dst->data_type->base_type == HLSL_TYPE_UINT || dst->data_type->base_type == HLSL_TYPE_FLOAT); + assert(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); memset(&instr, 0, sizeof(instr)); instr.opcode = VKD3D_SM4_OP_SAMPLE_INFO; - if (dst->data_type->base_type == HLSL_TYPE_UINT) + if (dst->data_type->e.numeric.type == HLSL_TYPE_UINT) instr.extra_bits |= VKD3DSI_SAMPLE_INFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; sm4_dst_from_node(&instr.dsts[0], dst); @@ -4785,11 +4735,11 @@ static void write_sm4_resinfo(const struct tpf_writer *tpf, const struct hlsl_ir return; } - assert(dst->data_type->base_type == HLSL_TYPE_UINT || dst->data_type->base_type == HLSL_TYPE_FLOAT); + assert(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); memset(&instr, 0, sizeof(instr)); instr.opcode = VKD3D_SM4_OP_RESINFO; - if (dst->data_type->base_type == HLSL_TYPE_UINT) + if (dst->data_type->e.numeric.type == HLSL_TYPE_UINT) instr.extra_bits |= VKD3DSI_RESINFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; sm4_dst_from_node(&instr.dsts[0], dst); @@ -4804,7 +4754,7 @@ static void write_sm4_resinfo(const struct tpf_writer *tpf, const struct hlsl_ir static bool type_is_float(const struct hlsl_type *type) { - return type->base_type == HLSL_TYPE_FLOAT || type->base_type == HLSL_TYPE_HALF; + return type->e.numeric.type == HLSL_TYPE_FLOAT || type->e.numeric.type == HLSL_TYPE_HALF; } static void write_sm4_cast_from_bool(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr, @@ -4841,11 +4791,11 @@ static void write_sm4_cast(const struct tpf_writer *tpf, const struct hlsl_ir_ex /* Narrowing casts were already lowered. */ assert(src_type->dimx == dst_type->dimx); - switch (dst_type->base_type) + switch (dst_type->e.numeric.type) { case HLSL_TYPE_HALF: case HLSL_TYPE_FLOAT: - switch (src_type->base_type) + switch (src_type->e.numeric.type) { case HLSL_TYPE_HALF: case HLSL_TYPE_FLOAT: @@ -4874,7 +4824,7 @@ static void write_sm4_cast(const struct tpf_writer *tpf, const struct hlsl_ir_ex break; case HLSL_TYPE_INT: - switch (src_type->base_type) + switch (src_type->e.numeric.type) { case HLSL_TYPE_HALF: case HLSL_TYPE_FLOAT: @@ -4900,7 +4850,7 @@ static void write_sm4_cast(const struct tpf_writer *tpf, const struct hlsl_ir_ex break; case HLSL_TYPE_UINT: - switch (src_type->base_type) + switch (src_type->e.numeric.type) { case HLSL_TYPE_HALF: case HLSL_TYPE_FLOAT: @@ -4970,7 +4920,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex switch (expr->op) { case HLSL_OP1_ABS: - switch (dst_type->base_type) + switch (dst_type->e.numeric.type) { case HLSL_TYPE_FLOAT: write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3DSPSM_ABS); @@ -5051,12 +5001,12 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex break; case HLSL_OP1_LOGIC_NOT: - assert(dst_type->base_type == HLSL_TYPE_BOOL); + assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); write_sm4_unary_op(tpf, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); break; case HLSL_OP1_NEG: - switch (dst_type->base_type) + switch (dst_type->e.numeric.type) { case HLSL_TYPE_FLOAT: write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3DSPSM_NEG); @@ -5109,7 +5059,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex break; case HLSL_OP2_ADD: - switch (dst_type->base_type) + switch (dst_type->e.numeric.type) { case HLSL_TYPE_FLOAT: write_sm4_binary_op(tpf, VKD3D_SM4_OP_ADD, &expr->node, arg1, arg2); @@ -5141,7 +5091,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex break; case HLSL_OP2_DIV: - switch (dst_type->base_type) + switch (dst_type->e.numeric.type) { case HLSL_TYPE_FLOAT: write_sm4_binary_op(tpf, VKD3D_SM4_OP_DIV, &expr->node, arg1, arg2); @@ -5157,7 +5107,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex break; case HLSL_OP2_DOT: - switch (dst_type->base_type) + switch (dst_type->e.numeric.type) { case HLSL_TYPE_FLOAT: switch (arg1->data_type->dimx) @@ -5189,9 +5139,9 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex { const struct hlsl_type *src_type = arg1->data_type; - assert(dst_type->base_type == HLSL_TYPE_BOOL); + assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - switch (src_type->base_type) + switch (src_type->e.numeric.type) { case HLSL_TYPE_FLOAT: write_sm4_binary_op(tpf, VKD3D_SM4_OP_EQ, &expr->node, arg1, arg2); @@ -5215,9 +5165,9 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex { const struct hlsl_type *src_type = arg1->data_type; - assert(dst_type->base_type == HLSL_TYPE_BOOL); + assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - switch (src_type->base_type) + switch (src_type->e.numeric.type) { case HLSL_TYPE_FLOAT: write_sm4_binary_op(tpf, VKD3D_SM4_OP_GE, &expr->node, arg1, arg2); @@ -5244,9 +5194,9 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex { const struct hlsl_type *src_type = arg1->data_type; - assert(dst_type->base_type == HLSL_TYPE_BOOL); + assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - switch (src_type->base_type) + switch (src_type->e.numeric.type) { case HLSL_TYPE_FLOAT: write_sm4_binary_op(tpf, VKD3D_SM4_OP_LT, &expr->node, arg1, arg2); @@ -5270,23 +5220,23 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex } case HLSL_OP2_LOGIC_AND: - assert(dst_type->base_type == HLSL_TYPE_BOOL); + assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); write_sm4_binary_op(tpf, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); break; case HLSL_OP2_LOGIC_OR: - assert(dst_type->base_type == HLSL_TYPE_BOOL); + assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); write_sm4_binary_op(tpf, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); break; case HLSL_OP2_LSHIFT: assert(type_is_integer(dst_type)); - assert(dst_type->base_type != HLSL_TYPE_BOOL); + assert(dst_type->e.numeric.type != HLSL_TYPE_BOOL); write_sm4_binary_op(tpf, VKD3D_SM4_OP_ISHL, &expr->node, arg1, arg2); break; case HLSL_OP2_MAX: - switch (dst_type->base_type) + switch (dst_type->e.numeric.type) { case HLSL_TYPE_FLOAT: write_sm4_binary_op(tpf, VKD3D_SM4_OP_MAX, &expr->node, arg1, arg2); @@ -5306,7 +5256,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex break; case HLSL_OP2_MIN: - switch (dst_type->base_type) + switch (dst_type->e.numeric.type) { case HLSL_TYPE_FLOAT: write_sm4_binary_op(tpf, VKD3D_SM4_OP_MIN, &expr->node, arg1, arg2); @@ -5326,7 +5276,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex break; case HLSL_OP2_MOD: - switch (dst_type->base_type) + switch (dst_type->e.numeric.type) { case HLSL_TYPE_UINT: write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_UDIV, &expr->node, 1, arg1, arg2); @@ -5338,7 +5288,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex break; case HLSL_OP2_MUL: - switch (dst_type->base_type) + switch (dst_type->e.numeric.type) { case HLSL_TYPE_FLOAT: write_sm4_binary_op(tpf, VKD3D_SM4_OP_MUL, &expr->node, arg1, arg2); @@ -5360,9 +5310,9 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex { const struct hlsl_type *src_type = arg1->data_type; - assert(dst_type->base_type == HLSL_TYPE_BOOL); + assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - switch (src_type->base_type) + switch (src_type->e.numeric.type) { case HLSL_TYPE_FLOAT: write_sm4_binary_op(tpf, VKD3D_SM4_OP_NE, &expr->node, arg1, arg2); @@ -5384,12 +5334,12 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex case HLSL_OP2_RSHIFT: assert(type_is_integer(dst_type)); - assert(dst_type->base_type != HLSL_TYPE_BOOL); - write_sm4_binary_op(tpf, dst_type->base_type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR, + assert(dst_type->e.numeric.type != HLSL_TYPE_BOOL); + write_sm4_binary_op(tpf, dst_type->e.numeric.type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR, &expr->node, arg1, arg2); break; - case HLSL_OP3_MOVC: + case HLSL_OP3_TERNARY: write_sm4_ternary_op(tpf, VKD3D_SM4_OP_MOVC, &expr->node, arg1, arg2, arg3); break; @@ -5445,7 +5395,8 @@ static void write_sm4_jump(const struct tpf_writer *tpf, const struct hlsl_ir_ju case HLSL_IR_JUMP_DISCARD_NZ: { - instr.opcode = VKD3D_SM4_OP_DISCARD | VKD3D_SM4_CONDITIONAL_NZ; + instr.opcode = VKD3D_SM4_OP_DISCARD; + instr.extra_bits = VKD3D_SM4_CONDITIONAL_NZ; memset(&instr.srcs[0], 0, sizeof(*instr.srcs)); instr.src_count = 1; @@ -5486,7 +5437,7 @@ static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_lo instr.dst_count = 1; assert(hlsl_is_numeric_type(type)); - if (type->base_type == HLSL_TYPE_BOOL && var_is_user_input(tpf->ctx, load->src.var)) + if (type->e.numeric.type == HLSL_TYPE_BOOL && var_is_user_input(tpf->ctx, load->src.var)) { struct hlsl_constant_value value; @@ -5746,18 +5697,12 @@ static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_bloc { if (instr->data_type) { - if (instr->data_type->class == HLSL_CLASS_MATRIX) + if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) { - hlsl_fixme(tpf->ctx, &instr->loc, "Matrix operations need to be lowered."); + hlsl_fixme(tpf->ctx, &instr->loc, "Class %#x should have been lowered or removed.", + instr->data_type->class); break; } - else if (instr->data_type->class == HLSL_CLASS_OBJECT) - { - hlsl_fixme(tpf->ctx, &instr->loc, "Object copy."); - break; - } - - assert(instr->data_type->class == HLSL_CLASS_SCALAR || instr->data_type->class == HLSL_CLASS_VECTOR); if (!instr->reg.allocated) { @@ -5854,13 +5799,21 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) { if (cbuffer->reg.allocated) + { + if (hlsl_version_ge(ctx, 5, 1)) + hlsl_fixme(ctx, &cbuffer->loc, "Shader model 5.1 resource definition."); + write_sm4_dcl_constant_buffer(&tpf, cbuffer); + } } for (i = 0; i < extern_resources_count; ++i) { const struct extern_resource *resource = &extern_resources[i]; + if (hlsl_version_ge(ctx, 5, 1)) + hlsl_fixme(ctx, &resource->var->loc, "Shader model 5.1 resource declaration."); + if (resource->regset == HLSL_REGSET_SAMPLERS) write_sm4_dcl_samplers(&tpf, resource); else if (resource->regset == HLSL_REGSET_TEXTURES) diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c index 4f400d19f6f..14a3fa778e5 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c @@ -71,8 +71,16 @@ void vkd3d_string_buffer_cleanup(struct vkd3d_string_buffer *buffer) void vkd3d_string_buffer_clear(struct vkd3d_string_buffer *buffer) { - buffer->buffer[0] = '\0'; - buffer->content_size = 0; + vkd3d_string_buffer_truncate(buffer, 0); +} + +void vkd3d_string_buffer_truncate(struct vkd3d_string_buffer *buffer, size_t size) +{ + if (size < buffer->content_size) + { + buffer->buffer[size] = '\0'; + buffer->content_size = size; + } } static bool vkd3d_string_buffer_resize(struct vkd3d_string_buffer *buffer, int rc) @@ -224,6 +232,16 @@ void vkd3d_string_buffer_release(struct vkd3d_string_buffer_cache *cache, struct cache->buffers[cache->count++] = buffer; } +void vkd3d_shader_code_from_string_buffer(struct vkd3d_shader_code *code, struct vkd3d_string_buffer *buffer) +{ + code->code = buffer->buffer; + code->size = buffer->content_size; + + buffer->buffer = NULL; + buffer->buffer_size = 0; + buffer->content_size = 0; +} + void vkd3d_shader_message_context_init(struct vkd3d_shader_message_context *context, enum vkd3d_shader_log_level log_level) { @@ -520,7 +538,7 @@ static const struct vkd3d_debug_option vkd3d_shader_config_options[] = {"force_validation", VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION}, /* force validation of internal shader representations */ }; -static uint64_t vkd3d_shader_init_config_flags(void) +uint64_t vkd3d_shader_init_config_flags(void) { uint64_t config_flags; const char *config; @@ -534,18 +552,14 @@ static uint64_t vkd3d_shader_init_config_flags(void) return config_flags; } -bool vkd3d_shader_parser_init(struct vkd3d_shader_parser *parser, - struct vkd3d_shader_message_context *message_context, const char *source_name, - const struct vkd3d_shader_version *version, const struct vkd3d_shader_parser_ops *ops, - unsigned int instruction_reserve) +void vkd3d_shader_parser_init(struct vkd3d_shader_parser *parser, struct vsir_program *program, + struct vkd3d_shader_message_context *message_context, const char *source_name) { parser->message_context = message_context; parser->location.source_name = source_name; parser->location.line = 1; parser->location.column = 0; - parser->ops = ops; - parser->config_flags = vkd3d_shader_init_config_flags(); - return vsir_program_init(&parser->program, version, instruction_reserve); + parser->program = program; } void VKD3D_PRINTF_FUNC(3, 4) vkd3d_shader_parser_error(struct vkd3d_shader_parser *parser, @@ -641,7 +655,15 @@ static bool vkd3d_shader_signature_from_shader_signature(struct vkd3d_shader_sig struct vkd3d_shader_signature_element *d = &signature->elements[i]; struct signature_element *e = &src->elements[i]; - d->semantic_name = e->semantic_name; + if (!(d->semantic_name = vkd3d_strdup(e->semantic_name))) + { + for (unsigned int j = 0; j < i; ++j) + { + vkd3d_free((void *)signature->elements[j].semantic_name); + } + vkd3d_free(signature->elements); + return false; + } d->semantic_index = e->semantic_index; d->stream_index = e->stream_index; d->sysval_semantic = e->sysval_semantic; @@ -1375,9 +1397,9 @@ static void vkd3d_shader_free_scan_descriptor_info1(struct vkd3d_shader_scan_des vkd3d_free(scan_descriptor_info->descriptors); } -static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info, +static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context, - struct vkd3d_shader_scan_descriptor_info1 *descriptor_info1, struct vkd3d_shader_parser *parser) + struct vkd3d_shader_scan_descriptor_info1 *descriptor_info1) { struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info; struct vkd3d_shader_scan_descriptor_info1 local_descriptor_info1 = {0}; @@ -1408,27 +1430,27 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info descriptor_info1 = &local_descriptor_info1; } - vkd3d_shader_scan_context_init(&context, &parser->program.shader_version, compile_info, + vkd3d_shader_scan_context_init(&context, &program->shader_version, compile_info, descriptor_info1, combined_sampler_info, message_context); if (TRACE_ON()) - vkd3d_shader_trace(&parser->program); + vkd3d_shader_trace(program); - for (i = 0; i < parser->program.instructions.count; ++i) + for (i = 0; i < program->instructions.count; ++i) { - instruction = &parser->program.instructions.elements[i]; + instruction = &program->instructions.elements[i]; if ((ret = vkd3d_shader_scan_instruction(&context, instruction)) < 0) break; } - for (i = 0; i < ARRAY_SIZE(parser->shader_desc.flat_constant_count); ++i) + for (i = 0; i < ARRAY_SIZE(program->flat_constant_count); ++i) { - unsigned int size = parser->shader_desc.flat_constant_count[i].external; struct vkd3d_shader_register_range range = {.space = 0, .first = i, .last = i}; struct vkd3d_shader_register reg = {.idx[0].offset = i, .idx_count = 1}; + unsigned int size = program->flat_constant_count[i]; struct vkd3d_shader_descriptor_info1 *d; - if (parser->shader_desc.flat_constant_count[i].external) + if (size) { if ((d = vkd3d_shader_scan_add_descriptor(&context, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, ®, &range, VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT))) @@ -1438,11 +1460,11 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info if (!ret && signature_info) { - if (!vkd3d_shader_signature_from_shader_signature(&signature_info->input, &parser->shader_desc.input_signature) + if (!vkd3d_shader_signature_from_shader_signature(&signature_info->input, &program->input_signature) || !vkd3d_shader_signature_from_shader_signature(&signature_info->output, - &parser->shader_desc.output_signature) + &program->output_signature) || !vkd3d_shader_signature_from_shader_signature(&signature_info->patch_constant, - &parser->shader_desc.patch_constant_signature)) + &program->patch_constant_signature)) { ret = VKD3D_ERROR_OUT_OF_MEMORY; } @@ -1470,60 +1492,6 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info return ret; } -static int scan_dxbc(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_message_context *message_context) -{ - struct vkd3d_shader_parser *parser; - int ret; - - if ((ret = vkd3d_shader_sm4_parser_create(compile_info, message_context, &parser)) < 0) - { - WARN("Failed to initialise shader parser.\n"); - return ret; - } - - ret = scan_with_parser(compile_info, message_context, NULL, parser); - vkd3d_shader_parser_destroy(parser); - - return ret; -} - -static int scan_d3dbc(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_message_context *message_context) -{ - struct vkd3d_shader_parser *parser; - int ret; - - if ((ret = vkd3d_shader_sm1_parser_create(compile_info, message_context, &parser)) < 0) - { - WARN("Failed to initialise shader parser.\n"); - return ret; - } - - ret = scan_with_parser(compile_info, message_context, NULL, parser); - vkd3d_shader_parser_destroy(parser); - - return ret; -} - -static int scan_dxil(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_message_context *message_context) -{ - struct vkd3d_shader_parser *parser; - int ret; - - if ((ret = vkd3d_shader_sm6_parser_create(compile_info, message_context, &parser)) < 0) - { - WARN("Failed to initialise shader parser.\n"); - return ret; - } - - ret = scan_with_parser(compile_info, message_context, NULL, parser); - vkd3d_shader_parser_destroy(parser); - - return ret; -} - int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char **messages) { struct vkd3d_shader_message_context message_context; @@ -1543,29 +1511,45 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char vkd3d_shader_dump_shader(compile_info); - switch (compile_info->source_type) + if (compile_info->source_type == VKD3D_SHADER_SOURCE_HLSL) { - case VKD3D_SHADER_SOURCE_DXBC_TPF: - ret = scan_dxbc(compile_info, &message_context); - break; + FIXME("HLSL support not implemented.\n"); + ret = VKD3D_ERROR_NOT_IMPLEMENTED; + } + else + { + uint64_t config_flags = vkd3d_shader_init_config_flags(); + struct vsir_program program; - case VKD3D_SHADER_SOURCE_HLSL: - FIXME("HLSL support not implemented.\n"); - ret = VKD3D_ERROR_NOT_IMPLEMENTED; - break; + switch (compile_info->source_type) + { + case VKD3D_SHADER_SOURCE_D3D_BYTECODE: + ret = d3dbc_parse(compile_info, config_flags, &message_context, &program); + break; - case VKD3D_SHADER_SOURCE_D3D_BYTECODE: - ret = scan_d3dbc(compile_info, &message_context); - break; + case VKD3D_SHADER_SOURCE_DXBC_TPF: + ret = tpf_parse(compile_info, config_flags, &message_context, &program); + break; - case VKD3D_SHADER_SOURCE_DXBC_DXIL: - ret = scan_dxil(compile_info, &message_context); - break; + case VKD3D_SHADER_SOURCE_DXBC_DXIL: + ret = dxil_parse(compile_info, config_flags, &message_context, &program); + break; - default: - ERR("Unsupported source type %#x.\n", compile_info->source_type); - ret = VKD3D_ERROR_INVALID_ARGUMENT; - break; + default: + ERR("Unsupported source type %#x.\n", compile_info->source_type); + ret = VKD3D_ERROR_INVALID_ARGUMENT; + break; + } + + if (ret < 0) + { + WARN("Failed to parse shader.\n"); + } + else + { + ret = vsir_program_scan(&program, compile_info, &message_context, NULL); + vsir_program_cleanup(&program); + } } vkd3d_shader_message_context_trace_messages(&message_context); @@ -1575,12 +1559,11 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char return ret; } -static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, - const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) +int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, + struct vkd3d_shader_message_context *message_context) { struct vkd3d_shader_scan_descriptor_info1 scan_descriptor_info; - struct vkd3d_glsl_generator *glsl_generator; struct vkd3d_shader_compile_info scan_info; int ret; @@ -1589,30 +1572,22 @@ static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, switch (compile_info->target_type) { case VKD3D_SHADER_TARGET_D3D_ASM: - ret = vkd3d_dxbc_binary_to_text(&parser->program, compile_info, out, VSIR_ASM_D3D); + ret = d3d_asm_compile(program, compile_info, out, VSIR_ASM_FLAG_NONE); break; case VKD3D_SHADER_TARGET_GLSL: - if ((ret = scan_with_parser(&scan_info, message_context, &scan_descriptor_info, parser)) < 0) + if ((ret = vsir_program_scan(program, &scan_info, message_context, &scan_descriptor_info)) < 0) return ret; - if (!(glsl_generator = vkd3d_glsl_generator_create(&parser->program.shader_version, - message_context, &parser->location))) - { - ERR("Failed to create GLSL generator.\n"); - vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); - return VKD3D_ERROR; - } - - ret = vkd3d_glsl_generator_generate(glsl_generator, &parser->program, out); - vkd3d_glsl_generator_destroy(glsl_generator); + ret = glsl_compile(program, config_flags, compile_info, out, message_context); vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); break; case VKD3D_SHADER_TARGET_SPIRV_BINARY: case VKD3D_SHADER_TARGET_SPIRV_TEXT: - if ((ret = scan_with_parser(&scan_info, message_context, &scan_descriptor_info, parser)) < 0) + if ((ret = vsir_program_scan(program, &scan_info, message_context, &scan_descriptor_info)) < 0) return ret; - ret = spirv_compile(parser, &scan_descriptor_info, compile_info, out, message_context); + ret = spirv_compile(program, config_flags, &scan_descriptor_info, + compile_info, out, message_context); vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); break; @@ -1624,24 +1599,6 @@ static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, return ret; } -static int compile_dxbc_tpf(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) -{ - struct vkd3d_shader_parser *parser; - int ret; - - if ((ret = vkd3d_shader_sm4_parser_create(compile_info, message_context, &parser)) < 0) - { - WARN("Failed to initialise shader parser.\n"); - return ret; - } - - ret = vkd3d_shader_parser_compile(parser, compile_info, out, message_context); - - vkd3d_shader_parser_destroy(parser); - return ret; -} - static int compile_hlsl(const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) { @@ -1657,42 +1614,6 @@ static int compile_hlsl(const struct vkd3d_shader_compile_info *compile_info, return ret; } -static int compile_d3d_bytecode(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) -{ - struct vkd3d_shader_parser *parser; - int ret; - - if ((ret = vkd3d_shader_sm1_parser_create(compile_info, message_context, &parser)) < 0) - { - WARN("Failed to initialise shader parser.\n"); - return ret; - } - - ret = vkd3d_shader_parser_compile(parser, compile_info, out, message_context); - - vkd3d_shader_parser_destroy(parser); - return ret; -} - -static int compile_dxbc_dxil(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) -{ - struct vkd3d_shader_parser *parser; - int ret; - - if ((ret = vkd3d_shader_sm6_parser_create(compile_info, message_context, &parser)) < 0) - { - WARN("Failed to initialise shader parser.\n"); - return ret; - } - - ret = vkd3d_shader_parser_compile(parser, compile_info, out, message_context); - - vkd3d_shader_parser_destroy(parser); - return ret; -} - int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, char **messages) { @@ -1713,26 +1634,44 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, vkd3d_shader_dump_shader(compile_info); - switch (compile_info->source_type) + if (compile_info->source_type == VKD3D_SHADER_SOURCE_HLSL) { - case VKD3D_SHADER_SOURCE_DXBC_TPF: - ret = compile_dxbc_tpf(compile_info, out, &message_context); - break; + ret = compile_hlsl(compile_info, out, &message_context); + } + else + { + uint64_t config_flags = vkd3d_shader_init_config_flags(); + struct vsir_program program; - case VKD3D_SHADER_SOURCE_HLSL: - ret = compile_hlsl(compile_info, out, &message_context); - break; + switch (compile_info->source_type) + { + case VKD3D_SHADER_SOURCE_D3D_BYTECODE: + ret = d3dbc_parse(compile_info, config_flags, &message_context, &program); + break; - case VKD3D_SHADER_SOURCE_D3D_BYTECODE: - ret = compile_d3d_bytecode(compile_info, out, &message_context); - break; + case VKD3D_SHADER_SOURCE_DXBC_TPF: + ret = tpf_parse(compile_info, config_flags, &message_context, &program); + break; - case VKD3D_SHADER_SOURCE_DXBC_DXIL: - ret = compile_dxbc_dxil(compile_info, out, &message_context); - break; + case VKD3D_SHADER_SOURCE_DXBC_DXIL: + ret = dxil_parse(compile_info, config_flags, &message_context, &program); + break; - default: - vkd3d_unreachable(); + default: + ERR("Unsupported source type %#x.\n", compile_info->source_type); + ret = VKD3D_ERROR_INVALID_ARGUMENT; + break; + } + + if (ret < 0) + { + WARN("Failed to parse shader.\n"); + } + else + { + ret = vsir_program_compile(&program, config_flags, compile_info, out, &message_context); + vsir_program_cleanup(&program); + } } vkd3d_shader_message_context_trace_messages(&message_context); @@ -1830,6 +1769,10 @@ void vkd3d_shader_free_root_signature(struct vkd3d_shader_versioned_root_signatu void shader_signature_cleanup(struct shader_signature *signature) { + for (unsigned int i = 0; i < signature->element_count; ++i) + { + vkd3d_free((void *)signature->elements[i].semantic_name); + } vkd3d_free(signature->elements); signature->elements = NULL; } @@ -1887,6 +1830,10 @@ void vkd3d_shader_free_shader_signature(struct vkd3d_shader_signature *signature { TRACE("signature %p.\n", signature); + for (unsigned int i = 0; i < signature->element_count; ++i) + { + vkd3d_free((void *)signature->elements[i].semantic_name); + } vkd3d_free(signature->elements); signature->elements = NULL; } @@ -1937,13 +1884,18 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( VKD3D_SHADER_TARGET_SPIRV_TEXT, #endif VKD3D_SHADER_TARGET_D3D_ASM, -#if 0 +#ifdef VKD3D_SHADER_UNSUPPORTED_GLSL VKD3D_SHADER_TARGET_GLSL, #endif }; static const enum vkd3d_shader_target_type hlsl_types[] = { + VKD3D_SHADER_TARGET_SPIRV_BINARY, +#ifdef HAVE_SPIRV_TOOLS + VKD3D_SHADER_TARGET_SPIRV_TEXT, +#endif + VKD3D_SHADER_TARGET_D3D_ASM, VKD3D_SHADER_TARGET_D3D_BYTECODE, VKD3D_SHADER_TARGET_DXBC_TPF, VKD3D_SHADER_TARGET_FX, @@ -1958,13 +1910,21 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( VKD3D_SHADER_TARGET_D3D_ASM, }; +#ifdef VKD3D_SHADER_UNSUPPORTED_DXIL + static const enum vkd3d_shader_target_type dxbc_dxil_types[] = + { + VKD3D_SHADER_TARGET_SPIRV_BINARY, +# ifdef HAVE_SPIRV_TOOLS + VKD3D_SHADER_TARGET_SPIRV_TEXT, +# endif + VKD3D_SHADER_TARGET_D3D_ASM, + }; +#endif + TRACE("source_type %#x, count %p.\n", source_type, count); switch (source_type) { -#ifdef VKD3D_SHADER_UNSUPPORTED_DXIL - case VKD3D_SHADER_SOURCE_DXBC_DXIL: -#endif case VKD3D_SHADER_SOURCE_DXBC_TPF: *count = ARRAY_SIZE(dxbc_tpf_types); return dxbc_tpf_types; @@ -1977,6 +1937,12 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( *count = ARRAY_SIZE(d3dbc_types); return d3dbc_types; +#ifdef VKD3D_SHADER_UNSUPPORTED_DXIL + case VKD3D_SHADER_SOURCE_DXBC_DXIL: + *count = ARRAY_SIZE(dxbc_dxil_types); + return dxbc_dxil_types; +#endif + default: *count = 0; return NULL; @@ -2050,7 +2016,7 @@ void *shader_param_allocator_get(struct vkd3d_shader_param_allocator *allocator, { void *params; - if (count > allocator->count - allocator->index) + if (!allocator->current || count > allocator->count - allocator->index) { struct vkd3d_shader_param_node *next; diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h index 2d3b3254638..29b8d6ad022 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h @@ -100,6 +100,7 @@ enum vkd3d_shader_error VKD3D_SHADER_WARNING_SPV_INVALID_SWIZZLE = 2300, VKD3D_SHADER_WARNING_SPV_INVALID_UAV_FLAGS = 2301, + VKD3D_SHADER_WARNING_SPV_IGNORING_FLAG = 2302, VKD3D_SHADER_ERROR_RS_OUT_OF_MEMORY = 3000, VKD3D_SHADER_ERROR_RS_INVALID_VERSION = 3001, @@ -148,6 +149,8 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_HLSL_INCONSISTENT_SAMPLER = 5026, VKD3D_SHADER_ERROR_HLSL_NON_FINITE_RESULT = 5027, VKD3D_SHADER_ERROR_HLSL_DUPLICATE_SWITCH_CASE = 5028, + VKD3D_SHADER_ERROR_HLSL_MISSING_TECHNIQUE = 5029, + VKD3D_SHADER_ERROR_HLSL_UNKNOWN_MODIFIER = 5030, VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, @@ -199,6 +202,7 @@ enum vkd3d_shader_error VKD3D_SHADER_WARNING_DXIL_INVALID_MASK = 8307, VKD3D_SHADER_WARNING_DXIL_INVALID_OPERATION = 8308, VKD3D_SHADER_WARNING_DXIL_IGNORING_ATTACHMENT = 8309, + VKD3D_SHADER_WARNING_DXIL_UNDEFINED_OPERAND = 8310, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED = 9000, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER = 9001, @@ -218,6 +222,8 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX = 9015, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW = 9016, VKD3D_SHADER_ERROR_VSIR_INVALID_SSA_USAGE = 9017, + VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION = 9018, + VKD3D_SHADER_ERROR_VSIR_INVALID_GS = 9019, VKD3D_SHADER_WARNING_VSIR_DYNAMIC_DESCRIPTOR_ARRAY = 9300, }; @@ -445,6 +451,7 @@ enum vkd3d_shader_opcode VKD3DSIH_NOT, VKD3DSIH_NRM, VKD3DSIH_OR, + VKD3DSIH_ORD, VKD3DSIH_PHASE, VKD3DSIH_PHI, VKD3DSIH_POW, @@ -516,10 +523,31 @@ enum vkd3d_shader_opcode VKD3DSIH_UMAX, VKD3DSIH_UMIN, VKD3DSIH_UMUL, + VKD3DSIH_UNO, VKD3DSIH_USHR, VKD3DSIH_UTOD, VKD3DSIH_UTOF, VKD3DSIH_UTOU, + VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL, + VKD3DSIH_WAVE_ACTIVE_BALLOT, + VKD3DSIH_WAVE_ACTIVE_BIT_AND, + VKD3DSIH_WAVE_ACTIVE_BIT_OR, + VKD3DSIH_WAVE_ACTIVE_BIT_XOR, + VKD3DSIH_WAVE_ALL_BIT_COUNT, + VKD3DSIH_WAVE_ALL_TRUE, + VKD3DSIH_WAVE_ANY_TRUE, + VKD3DSIH_WAVE_IS_FIRST_LANE, + VKD3DSIH_WAVE_OP_ADD, + VKD3DSIH_WAVE_OP_IMAX, + VKD3DSIH_WAVE_OP_IMIN, + VKD3DSIH_WAVE_OP_MAX, + VKD3DSIH_WAVE_OP_MIN, + VKD3DSIH_WAVE_OP_MUL, + VKD3DSIH_WAVE_OP_UMAX, + VKD3DSIH_WAVE_OP_UMIN, + VKD3DSIH_WAVE_PREFIX_BIT_COUNT, + VKD3DSIH_WAVE_READ_LANE_AT, + VKD3DSIH_WAVE_READ_LANE_FIRST, VKD3DSIH_XOR, VKD3DSIH_INVALID, @@ -583,6 +611,8 @@ enum vkd3d_shader_register_type VKD3DSPR_OUTSTENCILREF, VKD3DSPR_UNDEF, VKD3DSPR_SSA, + VKD3DSPR_WAVELANECOUNT, + VKD3DSPR_WAVELANEINDEX, VKD3DSPR_COUNT, @@ -620,14 +650,16 @@ enum vkd3d_data_type VKD3D_DATA_UINT8, VKD3D_DATA_UINT64, VKD3D_DATA_BOOL, + VKD3D_DATA_UINT16, + VKD3D_DATA_HALF, VKD3D_DATA_COUNT, }; static inline bool data_type_is_integer(enum vkd3d_data_type data_type) { - return data_type == VKD3D_DATA_INT || data_type == VKD3D_DATA_UINT8 || data_type == VKD3D_DATA_UINT - || data_type == VKD3D_DATA_UINT64; + return data_type == VKD3D_DATA_INT || data_type == VKD3D_DATA_UINT8 || data_type == VKD3D_DATA_UINT16 + || data_type == VKD3D_DATA_UINT || data_type == VKD3D_DATA_UINT64; } static inline bool data_type_is_bool(enum vkd3d_data_type data_type) @@ -635,6 +667,11 @@ static inline bool data_type_is_bool(enum vkd3d_data_type data_type) return data_type == VKD3D_DATA_BOOL; } +static inline bool data_type_is_floating_point(enum vkd3d_data_type data_type) +{ + return data_type == VKD3D_DATA_HALF || data_type == VKD3D_DATA_FLOAT || data_type == VKD3D_DATA_DOUBLE; +} + static inline bool data_type_is_64_bit(enum vkd3d_data_type data_type) { return data_type == VKD3D_DATA_DOUBLE || data_type == VKD3D_DATA_UINT64; @@ -749,11 +786,21 @@ enum vkd3d_shader_uav_flags VKD3DSUF_ORDER_PRESERVING_COUNTER = 0x100, }; +enum vkd3d_shader_atomic_rmw_flags +{ + VKD3DARF_SEQ_CST = 0x1, + VKD3DARF_VOLATILE = 0x2, +}; + enum vkd3d_tessellator_domain { + VKD3D_TESSELLATOR_DOMAIN_INVALID = 0, + VKD3D_TESSELLATOR_DOMAIN_LINE = 1, VKD3D_TESSELLATOR_DOMAIN_TRIANGLE = 2, VKD3D_TESSELLATOR_DOMAIN_QUAD = 3, + + VKD3D_TESSELLATOR_DOMAIN_COUNT = 4, }; #define VKD3DSI_NONE 0x0 @@ -764,6 +811,7 @@ enum vkd3d_tessellator_domain #define VKD3DSI_SAMPLE_INFO_UINT 0x1 #define VKD3DSI_SAMPLER_COMPARISON_MODE 0x1 #define VKD3DSI_SHIFT_UNMASKED 0x1 +#define VKD3DSI_WAVE_PREFIX 0x1 #define VKD3DSI_PRECISE_X 0x100 #define VKD3DSI_PRECISE_Y 0x200 @@ -808,6 +856,8 @@ enum vkd3d_shader_type VKD3D_SHADER_TYPE_COUNT, }; +struct vkd3d_shader_message_context; + struct vkd3d_shader_version { enum vkd3d_shader_type type; @@ -1025,7 +1075,7 @@ struct signature_element *vsir_signature_find_element_for_reg(const struct shade unsigned int reg_idx, unsigned int write_mask); void shader_signature_cleanup(struct shader_signature *signature); -struct vkd3d_shader_desc +struct dxbc_shader_desc { const uint32_t *byte_code; size_t byte_code_size; @@ -1033,11 +1083,6 @@ struct vkd3d_shader_desc struct shader_signature input_signature; struct shader_signature output_signature; struct shader_signature patch_constant_signature; - - struct - { - uint32_t used, external; - } flat_constant_count[3]; }; struct vkd3d_shader_register_semantic @@ -1079,14 +1124,18 @@ struct vkd3d_shader_tgsm struct vkd3d_shader_tgsm_raw { struct vkd3d_shader_dst_param reg; + unsigned int alignment; unsigned int byte_count; + bool zero_init; }; struct vkd3d_shader_tgsm_structured { struct vkd3d_shader_dst_param reg; + unsigned int alignment; unsigned int byte_stride; unsigned int structure_count; + bool zero_init; }; struct vkd3d_shader_thread_group_size @@ -1121,6 +1170,8 @@ enum vkd3d_primitive_type VKD3D_PT_TRIANGLELIST_ADJ = 12, VKD3D_PT_TRIANGLESTRIP_ADJ = 13, VKD3D_PT_PATCH = 14, + + VKD3D_PT_COUNT = 15, }; struct vkd3d_shader_primitive_type @@ -1216,6 +1267,12 @@ static inline bool register_is_scalar_constant_zero(const struct vkd3d_shader_re && (data_type_is_64_bit(reg->data_type) ? !reg->u.immconst_u64[0] : !reg->u.immconst_u32[0]); } +static inline bool register_is_numeric_array(const struct vkd3d_shader_register *reg) +{ + return (reg->type == VKD3DSPR_IMMCONSTBUFFER || reg->type == VKD3DSPR_IDXTEMP + || reg->type == VKD3DSPR_GROUPSHAREDMEM); +} + static inline bool vsir_register_is_label(const struct vkd3d_shader_register *reg) { return reg->type == VKD3DSPR_LABEL; @@ -1268,6 +1325,8 @@ struct vkd3d_shader_instruction_array struct vkd3d_shader_immediate_constant_buffer **icbs; size_t icb_capacity; size_t icb_count; + + struct vkd3d_shader_src_param *outpointid_param; }; bool shader_instruction_array_init(struct vkd3d_shader_instruction_array *instructions, unsigned int reserve); @@ -1278,6 +1337,8 @@ bool shader_instruction_array_add_icb(struct vkd3d_shader_instruction_array *ins struct vkd3d_shader_immediate_constant_buffer *icb); bool shader_instruction_array_clone_instruction(struct vkd3d_shader_instruction_array *instructions, unsigned int dst, unsigned int src); +struct vkd3d_shader_src_param *instruction_array_create_outpointid_param( + struct vkd3d_shader_instruction_array *instructions); void shader_instruction_array_destroy(struct vkd3d_shader_instruction_array *instructions); enum vkd3d_shader_config_flags @@ -1290,7 +1351,12 @@ struct vsir_program struct vkd3d_shader_version shader_version; struct vkd3d_shader_instruction_array instructions; + struct shader_signature input_signature; + struct shader_signature output_signature; + struct shader_signature patch_constant_signature; + unsigned int input_control_point_count, output_control_point_count; + unsigned int flat_constant_count[3]; unsigned int block_count; unsigned int temp_count; unsigned int ssa_count; @@ -1300,8 +1366,15 @@ struct vsir_program size_t block_name_count; }; -bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_version *version, unsigned int reserve); void vsir_program_cleanup(struct vsir_program *program); +int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, + struct vkd3d_shader_message_context *message_context); +bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_version *version, unsigned int reserve); +enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context); +enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t config_flags, + const char *source_name, struct vkd3d_shader_message_context *message_context); static inline struct vkd3d_shader_dst_param *vsir_program_get_dst_params( struct vsir_program *program, unsigned int count) @@ -1319,32 +1392,21 @@ struct vkd3d_shader_parser { struct vkd3d_shader_message_context *message_context; struct vkd3d_shader_location location; + struct vsir_program *program; bool failed; - - struct vkd3d_shader_desc shader_desc; - const struct vkd3d_shader_parser_ops *ops; - struct vsir_program program; - - uint64_t config_flags; -}; - -struct vkd3d_shader_parser_ops -{ - void (*parser_destroy)(struct vkd3d_shader_parser *parser); }; void vkd3d_shader_parser_error(struct vkd3d_shader_parser *parser, enum vkd3d_shader_error error, const char *format, ...) VKD3D_PRINTF_FUNC(3, 4); -bool vkd3d_shader_parser_init(struct vkd3d_shader_parser *parser, - struct vkd3d_shader_message_context *message_context, const char *source_name, - const struct vkd3d_shader_version *version, const struct vkd3d_shader_parser_ops *ops, - unsigned int instruction_reserve); +void vkd3d_shader_parser_init(struct vkd3d_shader_parser *parser, struct vsir_program *program, + struct vkd3d_shader_message_context *message_context, const char *source_name); void vkd3d_shader_parser_warning(struct vkd3d_shader_parser *parser, enum vkd3d_shader_error error, const char *format, ...) VKD3D_PRINTF_FUNC(3, 4); -static inline void vkd3d_shader_parser_destroy(struct vkd3d_shader_parser *parser) +static inline enum vkd3d_result vkd3d_shader_parser_validate(struct vkd3d_shader_parser *parser, uint64_t config_flags) { - parser->ops->parser_destroy(parser); + return vsir_program_validate(parser->program, config_flags, + parser->location.source_name, parser->message_context); } struct vkd3d_shader_descriptor_info1 @@ -1385,21 +1447,22 @@ struct vkd3d_string_buffer_cache size_t count, max_count, capacity; }; -enum vsir_asm_dialect +enum vsir_asm_flags { - VSIR_ASM_VSIR, - VSIR_ASM_D3D, + VSIR_ASM_FLAG_NONE = 0, + VSIR_ASM_FLAG_DUMP_TYPES = 0x1, }; -enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, +enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_code *out, enum vsir_asm_dialect dialect); + struct vkd3d_shader_code *out, enum vsir_asm_flags flags); void vkd3d_string_buffer_cleanup(struct vkd3d_string_buffer *buffer); struct vkd3d_string_buffer *vkd3d_string_buffer_get(struct vkd3d_string_buffer_cache *list); void vkd3d_string_buffer_init(struct vkd3d_string_buffer *buffer); void vkd3d_string_buffer_cache_cleanup(struct vkd3d_string_buffer_cache *list); void vkd3d_string_buffer_cache_init(struct vkd3d_string_buffer_cache *list); void vkd3d_string_buffer_clear(struct vkd3d_string_buffer *buffer); +void vkd3d_string_buffer_truncate(struct vkd3d_string_buffer *buffer, size_t size); int vkd3d_string_buffer_print_f32(struct vkd3d_string_buffer *buffer, float f); int vkd3d_string_buffer_print_f64(struct vkd3d_string_buffer *buffer, double d); int vkd3d_string_buffer_printf(struct vkd3d_string_buffer *buffer, const char *format, ...) VKD3D_PRINTF_FUNC(2, 3); @@ -1408,6 +1471,7 @@ void vkd3d_string_buffer_release(struct vkd3d_string_buffer_cache *list, struct vkd3d_string_buffer_trace_(buffer, __FUNCTION__) void vkd3d_string_buffer_trace_(const struct vkd3d_string_buffer *buffer, const char *function); int vkd3d_string_buffer_vprintf(struct vkd3d_string_buffer *buffer, const char *format, va_list args); +void vkd3d_shader_code_from_string_buffer(struct vkd3d_shader_code *code, struct vkd3d_string_buffer *buffer); struct vkd3d_bytecode_buffer { @@ -1472,35 +1536,32 @@ void vkd3d_shader_vwarning(struct vkd3d_shader_message_context *context, const s enum vkd3d_shader_error error, const char *format, va_list args); void vkd3d_shader_dump_shader(const struct vkd3d_shader_compile_info *compile_info); +uint64_t vkd3d_shader_init_config_flags(void); void vkd3d_shader_trace_text_(const char *text, size_t size, const char *function); #define vkd3d_shader_trace_text(text, size) \ vkd3d_shader_trace_text_(text, size, __FUNCTION__) -int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser); -int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser); -int vkd3d_shader_sm6_parser_create(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser); +int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags, + struct vkd3d_shader_message_context *message_context, struct vsir_program *program); +int dxil_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags, + struct vkd3d_shader_message_context *message_context, struct vsir_program *program); +int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags, + struct vkd3d_shader_message_context *message_context, struct vsir_program *program); -void free_shader_desc(struct vkd3d_shader_desc *desc); +void free_dxbc_shader_desc(struct dxbc_shader_desc *desc); int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, - struct vkd3d_shader_message_context *message_context, const char *source_name, struct vkd3d_shader_desc *desc); + struct vkd3d_shader_message_context *message_context, const char *source_name, struct dxbc_shader_desc *desc); int shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, struct vkd3d_shader_message_context *message_context, struct shader_signature *signature); -struct vkd3d_glsl_generator; - -struct vkd3d_glsl_generator *vkd3d_glsl_generator_create(const struct vkd3d_shader_version *version, - struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location); -int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *generator, - struct vsir_program *program, struct vkd3d_shader_code *out); -void vkd3d_glsl_generator_destroy(struct vkd3d_glsl_generator *generator); +int glsl_compile(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, + struct vkd3d_shader_message_context *message_context); #define SPIRV_MAX_SRC_COUNT 6 -int spirv_compile(struct vkd3d_shader_parser *parser, +int spirv_compile(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); @@ -1513,17 +1574,17 @@ int preproc_lexer_parse(const struct vkd3d_shader_compile_info *compile_info, int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); -enum vkd3d_result vsir_validate(struct vkd3d_shader_parser *parser); - static inline enum vkd3d_shader_component_type vkd3d_component_type_from_data_type( enum vkd3d_data_type data_type) { switch (data_type) { + case VKD3D_DATA_HALF: /* Minimum precision. TODO: native 16-bit */ case VKD3D_DATA_FLOAT: case VKD3D_DATA_UNORM: case VKD3D_DATA_SNORM: return VKD3D_SHADER_COMPONENT_FLOAT; + case VKD3D_DATA_UINT16: /* Minimum precision. TODO: native 16-bit */ case VKD3D_DATA_UINT: return VKD3D_SHADER_COMPONENT_UINT; case VKD3D_DATA_INT: @@ -1585,6 +1646,11 @@ static inline enum vkd3d_shader_component_type vkd3d_component_type_from_resourc } } +static inline bool component_type_is_64_bit(enum vkd3d_shader_component_type component_type) +{ + return component_type == VKD3D_SHADER_COMPONENT_DOUBLE || component_type == VKD3D_SHADER_COMPONENT_UINT64; +} + enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d_shader_sysval_semantic sysval, unsigned int index); @@ -1724,6 +1790,31 @@ static inline unsigned int vkd3d_compact_swizzle(uint32_t swizzle, uint32_t writ return compacted_swizzle; } +static inline uint32_t vsir_swizzle_from_writemask(unsigned int writemask) +{ + static const unsigned int swizzles[16] = + { + 0, + VKD3D_SHADER_SWIZZLE(X, X, X, X), + VKD3D_SHADER_SWIZZLE(Y, Y, Y, Y), + VKD3D_SHADER_SWIZZLE(X, Y, X, X), + VKD3D_SHADER_SWIZZLE(Z, Z, Z, Z), + VKD3D_SHADER_SWIZZLE(X, Z, X, X), + VKD3D_SHADER_SWIZZLE(Y, Z, X, X), + VKD3D_SHADER_SWIZZLE(X, Y, Z, X), + VKD3D_SHADER_SWIZZLE(W, W, W, W), + VKD3D_SHADER_SWIZZLE(X, W, X, X), + VKD3D_SHADER_SWIZZLE(Y, W, X, X), + VKD3D_SHADER_SWIZZLE(X, Y, W, X), + VKD3D_SHADER_SWIZZLE(Z, W, X, X), + VKD3D_SHADER_SWIZZLE(X, Z, W, X), + VKD3D_SHADER_SWIZZLE(Y, Z, W, X), + VKD3D_SHADER_SWIZZLE(X, Y, Z, W), + }; + + return swizzles[writemask & 0xf]; +} + struct vkd3d_struct { enum vkd3d_shader_structure_type type; @@ -1760,7 +1851,4 @@ void dxbc_writer_add_section(struct dxbc_writer *dxbc, uint32_t tag, const void void dxbc_writer_init(struct dxbc_writer *dxbc); int dxbc_writer_write(struct dxbc_writer *dxbc, struct vkd3d_shader_code *code); -enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, - const struct vkd3d_shader_compile_info *compile_info); - #endif /* __VKD3D_SHADER_PRIVATE_H */ diff --git a/libs/vkd3d/libs/vkd3d/cache.c b/libs/vkd3d/libs/vkd3d/cache.c new file mode 100644 index 00000000000..a0a29ed30cb --- /dev/null +++ b/libs/vkd3d/libs/vkd3d/cache.c @@ -0,0 +1,254 @@ +/* + * Copyright 2024 Stefan Dösinger for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "vkd3d_private.h" + +struct vkd3d_cache_entry_header +{ + uint64_t hash; + uint64_t key_size; + uint64_t value_size; +}; + +struct vkd3d_shader_cache +{ + unsigned int refcount; + struct vkd3d_mutex lock; + + struct rb_tree tree; +}; + +struct shader_cache_entry +{ + struct vkd3d_cache_entry_header h; + struct rb_entry entry; + uint8_t *payload; +}; + +struct shader_cache_key +{ + uint64_t hash; + const void *key; + uint64_t key_size; +}; + +static int vkd3d_shader_cache_compare_key(const void *key, const struct rb_entry *entry) +{ + const struct shader_cache_entry *e = RB_ENTRY_VALUE(entry, struct shader_cache_entry, entry); + const struct shader_cache_key *k = key; + int ret; + + if ((ret = vkd3d_u64_compare(k->hash, e->h.hash))) + return ret; + if ((ret = vkd3d_u64_compare(k->key_size, e->h.key_size))) + return ret; + + /* Until now we have not seen an actual hash collision. If the key didn't match it was always + * due to a bug in the serialization code or memory corruption. If you see this FIXME please + * investigate. */ + if ((ret = memcmp(k->key, e->payload, k->key_size))) + FIXME("Actual case of a hash collision found.\n"); + return ret; +} + +static void vkd3d_shader_cache_add_entry(struct vkd3d_shader_cache *cache, + struct shader_cache_entry *e) +{ + rb_put(&cache->tree, &e->h.hash, &e->entry); +} + +int vkd3d_shader_open_cache(struct vkd3d_shader_cache **cache) +{ + struct vkd3d_shader_cache *object; + + TRACE("%p.\n", cache); + + object = vkd3d_malloc(sizeof(*object)); + if (!object) + return VKD3D_ERROR_OUT_OF_MEMORY; + + object->refcount = 1; + rb_init(&object->tree, vkd3d_shader_cache_compare_key); + vkd3d_mutex_init(&object->lock); + + *cache = object; + + return VKD3D_OK; +} + +unsigned int vkd3d_shader_cache_incref(struct vkd3d_shader_cache *cache) +{ + unsigned int refcount = vkd3d_atomic_increment_u32(&cache->refcount); + TRACE("cache %p refcount %u.\n", cache, refcount); + return refcount; +} + +static void vkd3d_shader_cache_destroy_entry(struct rb_entry *entry, void *context) +{ + struct shader_cache_entry *e = RB_ENTRY_VALUE(entry, struct shader_cache_entry, entry); + vkd3d_free(e->payload); + vkd3d_free(e); +} + +unsigned int vkd3d_shader_cache_decref(struct vkd3d_shader_cache *cache) +{ + unsigned int refcount = vkd3d_atomic_decrement_u32(&cache->refcount); + TRACE("cache %p refcount %u.\n", cache, refcount); + + if (refcount) + return refcount; + + rb_destroy(&cache->tree, vkd3d_shader_cache_destroy_entry, NULL); + vkd3d_mutex_destroy(&cache->lock); + + vkd3d_free(cache); + return 0; +} + +static uint64_t vkd3d_shader_cache_hash_key(const void *key, size_t size) +{ + static const uint64_t fnv_prime = 0x00000100000001b3; + uint64_t hash = 0xcbf29ce484222325; + const uint8_t *k = key; + size_t i; + + for (i = 0; i < size; ++i) + hash = (hash ^ k[i]) * fnv_prime; + + return hash; +} + +static void vkd3d_shader_cache_lock(struct vkd3d_shader_cache *cache) +{ + vkd3d_mutex_lock(&cache->lock); +} + +static void vkd3d_shader_cache_unlock(struct vkd3d_shader_cache *cache) +{ + vkd3d_mutex_unlock(&cache->lock); +} + +int vkd3d_shader_cache_put(struct vkd3d_shader_cache *cache, + const void *key, size_t key_size, const void *value, size_t value_size) +{ + struct shader_cache_entry *e; + struct shader_cache_key k; + struct rb_entry *entry; + enum vkd3d_result ret; + + TRACE("%p, %p, %#zx, %p, %#zx.\n", cache, key, key_size, value, value_size); + + k.hash = vkd3d_shader_cache_hash_key(key, key_size); + k.key = key; + k.key_size = key_size; + + vkd3d_shader_cache_lock(cache); + + entry = rb_get(&cache->tree, &k); + e = entry ? RB_ENTRY_VALUE(entry, struct shader_cache_entry, entry) : NULL; + + if (e) + { + WARN("Key already exists, returning VKD3D_ERROR_KEY_ALREADY_EXISTS.\n"); + ret = VKD3D_ERROR_KEY_ALREADY_EXISTS; + goto done; + } + + e = vkd3d_malloc(sizeof(*e)); + if (!e) + { + ret = VKD3D_ERROR_OUT_OF_MEMORY; + goto done; + } + e->payload = vkd3d_malloc(key_size + value_size); + if (!e->payload) + { + vkd3d_free(e); + ret = VKD3D_ERROR_OUT_OF_MEMORY; + goto done; + } + + e->h.key_size = key_size; + e->h.value_size = value_size; + e->h.hash = k.hash; + memcpy(e->payload, key, key_size); + memcpy(e->payload + key_size, value, value_size); + + vkd3d_shader_cache_add_entry(cache, e); + TRACE("Cache entry %#"PRIx64" stored.\n", k.hash); + ret = VKD3D_OK; + +done: + vkd3d_shader_cache_unlock(cache); + return ret; +} + +int vkd3d_shader_cache_get(struct vkd3d_shader_cache *cache, + const void *key, size_t key_size, void *value, size_t *value_size) +{ + struct shader_cache_entry *e; + struct shader_cache_key k; + struct rb_entry *entry; + enum vkd3d_result ret; + size_t size_in; + + TRACE("%p, %p, %#zx, %p, %p.\n", cache, key, key_size, value, value_size); + + size_in = *value_size; + + k.hash = vkd3d_shader_cache_hash_key(key, key_size); + k.key = key; + k.key_size = key_size; + + vkd3d_shader_cache_lock(cache); + + entry = rb_get(&cache->tree, &k); + if (!entry) + { + WARN("Entry not found.\n"); + ret = VKD3D_ERROR_NOT_FOUND; + goto done; + } + + e = RB_ENTRY_VALUE(entry, struct shader_cache_entry, entry); + + *value_size = e->h.value_size; + if (!value) + { + TRACE("Found item %#"PRIx64", returning needed size %#"PRIx64".\n", + e->h.hash, e->h.value_size); + ret = VKD3D_OK; + goto done; + } + + if (size_in < e->h.value_size) + { + WARN("Output buffer is too small for item %#"PRIx64", got %#zx want %#"PRIx64".\n", + e->h.hash, size_in, e->h.value_size); + ret = VKD3D_ERROR_MORE_DATA; + goto done; + } + + memcpy(value, e->payload + e->h.key_size, e->h.value_size); + ret = VKD3D_OK; + TRACE("Returning cached item %#"PRIx64".\n", e->h.hash); + +done: + vkd3d_shader_cache_unlock(cache); + return ret; +} diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c index 7115a74a6f2..95366d3441b 100644 --- a/libs/vkd3d/libs/vkd3d/command.c +++ b/libs/vkd3d/libs/vkd3d/command.c @@ -2052,20 +2052,15 @@ static bool vk_barrier_parameters_from_d3d12_resource_state(unsigned int state, * state when GPU finishes execution of a command list. */ if (is_swapchain_image) { - if (resource->present_state == D3D12_RESOURCE_STATE_PRESENT) - { - *access_mask = VK_ACCESS_MEMORY_READ_BIT; - *stage_flags = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - if (image_layout) - *image_layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; - return true; - } - else if (resource->present_state != D3D12_RESOURCE_STATE_COMMON) - { - vk_barrier_parameters_from_d3d12_resource_state(resource->present_state, 0, + if (resource->present_state != D3D12_RESOURCE_STATE_PRESENT) + return vk_barrier_parameters_from_d3d12_resource_state(resource->present_state, 0, resource, vk_queue_flags, vk_info, access_mask, stage_flags, image_layout); - return true; - } + + *access_mask = VK_ACCESS_MEMORY_READ_BIT; + *stage_flags = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + if (image_layout) + *image_layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + return true; } *access_mask = VK_ACCESS_HOST_READ_BIT | VK_ACCESS_HOST_WRITE_BIT; @@ -5414,6 +5409,26 @@ static const struct vkd3d_format *vkd3d_fixup_clear_uav_uint_colour(struct d3d12 | ((colour->uint32[2] & 0x3ff) << 22); return vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false); + case DXGI_FORMAT_B5G6R5_UNORM: + colour->uint32[0] = (colour->uint32[2] & 0x1f) + | ((colour->uint32[1] & 0x3f) << 5) + | ((colour->uint32[0] & 0x1f) << 11); + return vkd3d_get_format(device, DXGI_FORMAT_R16_UINT, false); + + case DXGI_FORMAT_B5G5R5A1_UNORM: + colour->uint32[0] = (colour->uint32[2] & 0x1f) + | ((colour->uint32[1] & 0x1f) << 5) + | ((colour->uint32[0] & 0x1f) << 10) + | ((colour->uint32[3] & 0x1) << 15); + return vkd3d_get_format(device, DXGI_FORMAT_R16_UINT, false); + + case DXGI_FORMAT_B4G4R4A4_UNORM: + colour->uint32[0] = (colour->uint32[2] & 0xf) + | ((colour->uint32[1] & 0xf) << 4) + | ((colour->uint32[0] & 0xf) << 8) + | ((colour->uint32[3] & 0xf) << 12); + return vkd3d_get_format(device, DXGI_FORMAT_R16_UINT, false); + default: return NULL; } diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c index 17c7ccb3e31..cfc9c5f5ed3 100644 --- a/libs/vkd3d/libs/vkd3d/device.c +++ b/libs/vkd3d/libs/vkd3d/device.c @@ -89,11 +89,13 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] = VK_EXTENSION(KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE, KHR_sampler_mirror_clamp_to_edge), VK_EXTENSION(KHR_TIMELINE_SEMAPHORE, KHR_timeline_semaphore), /* EXT extensions */ + VK_EXTENSION(EXT_4444_FORMATS, EXT_4444_formats), VK_EXTENSION(EXT_CALIBRATED_TIMESTAMPS, EXT_calibrated_timestamps), VK_EXTENSION(EXT_CONDITIONAL_RENDERING, EXT_conditional_rendering), VK_EXTENSION(EXT_DEBUG_MARKER, EXT_debug_marker), VK_EXTENSION(EXT_DEPTH_CLIP_ENABLE, EXT_depth_clip_enable), VK_EXTENSION(EXT_DESCRIPTOR_INDEXING, EXT_descriptor_indexing), + VK_EXTENSION(EXT_FRAGMENT_SHADER_INTERLOCK, EXT_fragment_shader_interlock), VK_EXTENSION(EXT_MUTABLE_DESCRIPTOR_TYPE, EXT_mutable_descriptor_type), VK_EXTENSION(EXT_ROBUSTNESS_2, EXT_robustness2), VK_EXTENSION(EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION, EXT_shader_demote_to_helper_invocation), @@ -270,13 +272,15 @@ static bool has_extension(const VkExtensionProperties *extensions, for (i = 0; i < count; ++i) { - if (is_extension_disabled(extension_name)) - { - WARN("Extension %s is disabled.\n", debugstr_a(extension_name)); - continue; - } if (!strcmp(extensions[i].extensionName, extension_name)) + { + if (is_extension_disabled(extension_name)) + { + WARN("Extension %s is disabled.\n", debugstr_a(extension_name)); + return false; + } return true; + } } return false; } @@ -420,8 +424,6 @@ static HRESULT vkd3d_init_instance_caps(struct vkd3d_instance *instance, ERR("Failed to enumerate instance extensions, vr %d.\n", vr); return hresult_from_vk_result(vr); } - if (!count) - return S_OK; if (!(vk_extensions = vkd3d_calloc(count, sizeof(*vk_extensions)))) return E_OUTOFMEMORY; @@ -557,12 +559,14 @@ static HRESULT vkd3d_instance_init(struct vkd3d_instance *instance, const struct vkd3d_optional_instance_extensions_info *optional_extensions; const struct vkd3d_application_info *vkd3d_application_info; const struct vkd3d_host_time_domain_info *time_domain_info; + PFN_vkEnumerateInstanceVersion vkEnumerateInstanceVersion; bool *user_extension_supported = NULL; VkApplicationInfo application_info; VkInstanceCreateInfo instance_info; char application_name[PATH_MAX]; uint32_t extension_count; const char **extensions; + uint32_t vk_api_version; VkInstance vk_instance; VkResult vr; HRESULT hr; @@ -615,6 +619,16 @@ static HRESULT vkd3d_instance_init(struct vkd3d_instance *instance, application_info.apiVersion = VK_API_VERSION_1_0; instance->api_version = VKD3D_API_VERSION_1_0; + /* vkEnumerateInstanceVersion was added in Vulkan 1.1, and its absence indicates only 1.0 is supported. */ + vkEnumerateInstanceVersion = (void *)vk_global_procs->vkGetInstanceProcAddr(NULL, "vkEnumerateInstanceVersion"); + if (vkEnumerateInstanceVersion && vkEnumerateInstanceVersion(&vk_api_version) >= 0 + && vk_api_version >= VK_API_VERSION_1_1) + { + TRACE("Vulkan API version 1.1 is available; requesting it.\n"); + application_info.apiVersion = VK_API_VERSION_1_1; + } + instance->vk_api_version = application_info.apiVersion; + if ((vkd3d_application_info = vkd3d_find_struct(create_info->next, APPLICATION_INFO))) { if (vkd3d_application_info->application_name) @@ -774,6 +788,11 @@ VkInstance vkd3d_instance_get_vk_instance(struct vkd3d_instance *instance) return instance->vk_instance; } +static bool d3d12_device_environment_is_vulkan_min_1_1(struct d3d12_device *device) +{ + return device->environment == VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_1; +} + struct vkd3d_physical_device_info { /* properties */ @@ -782,6 +801,7 @@ struct vkd3d_physical_device_info VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT texel_buffer_alignment_properties; VkPhysicalDeviceTransformFeedbackPropertiesEXT xfb_properties; VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT vertex_divisor_properties; + VkPhysicalDeviceSubgroupProperties subgroup_properties; VkPhysicalDeviceProperties2KHR properties2; @@ -789,6 +809,7 @@ struct vkd3d_physical_device_info VkPhysicalDeviceConditionalRenderingFeaturesEXT conditional_rendering_features; VkPhysicalDeviceDepthClipEnableFeaturesEXT depth_clip_features; VkPhysicalDeviceDescriptorIndexingFeaturesEXT descriptor_indexing_features; + VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT fragment_shader_interlock_features; VkPhysicalDeviceRobustness2FeaturesEXT robustness2_features; VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote_features; VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT texel_buffer_alignment_features; @@ -796,6 +817,7 @@ struct vkd3d_physical_device_info VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT vertex_divisor_features; VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore_features; VkPhysicalDeviceMutableDescriptorTypeFeaturesEXT mutable_features; + VkPhysicalDevice4444FormatsFeaturesEXT formats4444_features; VkPhysicalDeviceFeatures2 features2; }; @@ -808,6 +830,7 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *vertex_divisor_properties; VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *buffer_alignment_properties; VkPhysicalDeviceDescriptorIndexingFeaturesEXT *descriptor_indexing_features; + VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *fragment_shader_interlock_features; VkPhysicalDeviceRobustness2FeaturesEXT *robustness2_features; VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *vertex_divisor_features; VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *buffer_alignment_features; @@ -818,13 +841,16 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i VkPhysicalDeviceMaintenance3Properties *maintenance3_properties; VkPhysicalDeviceTransformFeedbackPropertiesEXT *xfb_properties; VkPhysicalDevice physical_device = device->vk_physical_device; + VkPhysicalDevice4444FormatsFeaturesEXT *formats4444_features; VkPhysicalDeviceTransformFeedbackFeaturesEXT *xfb_features; struct vkd3d_vulkan_info *vulkan_info = &device->vk_info; + VkPhysicalDeviceSubgroupProperties *subgroup_properties; memset(info, 0, sizeof(*info)); conditional_rendering_features = &info->conditional_rendering_features; depth_clip_features = &info->depth_clip_features; descriptor_indexing_features = &info->descriptor_indexing_features; + fragment_shader_interlock_features = &info->fragment_shader_interlock_features; robustness2_features = &info->robustness2_features; descriptor_indexing_properties = &info->descriptor_indexing_properties; maintenance3_properties = &info->maintenance3_properties; @@ -835,31 +861,49 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i vertex_divisor_properties = &info->vertex_divisor_properties; timeline_semaphore_features = &info->timeline_semaphore_features; mutable_features = &info->mutable_features; + formats4444_features = &info->formats4444_features; xfb_features = &info->xfb_features; xfb_properties = &info->xfb_properties; + subgroup_properties = &info->subgroup_properties; info->features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; conditional_rendering_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT; - vk_prepend_struct(&info->features2, conditional_rendering_features); + if (vulkan_info->EXT_conditional_rendering) + vk_prepend_struct(&info->features2, conditional_rendering_features); depth_clip_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT; - vk_prepend_struct(&info->features2, depth_clip_features); + if (vulkan_info->EXT_depth_clip_enable) + vk_prepend_struct(&info->features2, depth_clip_features); descriptor_indexing_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT; - vk_prepend_struct(&info->features2, descriptor_indexing_features); + if (vulkan_info->EXT_descriptor_indexing) + vk_prepend_struct(&info->features2, descriptor_indexing_features); + fragment_shader_interlock_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT; + if (vulkan_info->EXT_fragment_shader_interlock) + vk_prepend_struct(&info->features2, fragment_shader_interlock_features); robustness2_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; - vk_prepend_struct(&info->features2, robustness2_features); + if (vulkan_info->EXT_robustness2) + vk_prepend_struct(&info->features2, robustness2_features); demote_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT; - vk_prepend_struct(&info->features2, demote_features); + if (vulkan_info->EXT_shader_demote_to_helper_invocation) + vk_prepend_struct(&info->features2, demote_features); buffer_alignment_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT; - vk_prepend_struct(&info->features2, buffer_alignment_features); + if (vulkan_info->EXT_texel_buffer_alignment) + vk_prepend_struct(&info->features2, buffer_alignment_features); xfb_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; - vk_prepend_struct(&info->features2, xfb_features); + if (vulkan_info->EXT_transform_feedback) + vk_prepend_struct(&info->features2, xfb_features); vertex_divisor_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT; - vk_prepend_struct(&info->features2, vertex_divisor_features); + if (vulkan_info->EXT_vertex_attribute_divisor) + vk_prepend_struct(&info->features2, vertex_divisor_features); timeline_semaphore_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR; - vk_prepend_struct(&info->features2, timeline_semaphore_features); + if (vulkan_info->KHR_timeline_semaphore) + vk_prepend_struct(&info->features2, timeline_semaphore_features); mutable_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MUTABLE_DESCRIPTOR_TYPE_FEATURES_EXT; - vk_prepend_struct(&info->features2, mutable_features); + if (vulkan_info->EXT_mutable_descriptor_type) + vk_prepend_struct(&info->features2, mutable_features); + formats4444_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT; + if (vulkan_info->EXT_4444_formats) + vk_prepend_struct(&info->features2, formats4444_features); if (vulkan_info->KHR_get_physical_device_properties2) VK_CALL(vkGetPhysicalDeviceFeatures2KHR(physical_device, &info->features2)); @@ -869,15 +913,23 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i info->properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; maintenance3_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES; - vk_prepend_struct(&info->properties2, maintenance3_properties); + if (vulkan_info->KHR_maintenance3) + vk_prepend_struct(&info->properties2, maintenance3_properties); descriptor_indexing_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES_EXT; - vk_prepend_struct(&info->properties2, descriptor_indexing_properties); + if (vulkan_info->EXT_descriptor_indexing) + vk_prepend_struct(&info->properties2, descriptor_indexing_properties); buffer_alignment_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES_EXT; - vk_prepend_struct(&info->properties2, buffer_alignment_properties); + if (vulkan_info->EXT_texel_buffer_alignment) + vk_prepend_struct(&info->properties2, buffer_alignment_properties); xfb_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT; - vk_prepend_struct(&info->properties2, xfb_properties); + if (vulkan_info->EXT_transform_feedback) + vk_prepend_struct(&info->properties2, xfb_properties); vertex_divisor_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT; - vk_prepend_struct(&info->properties2, vertex_divisor_properties); + if (vulkan_info->EXT_vertex_attribute_divisor) + vk_prepend_struct(&info->properties2, vertex_divisor_properties); + subgroup_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES; + if (d3d12_device_environment_is_vulkan_min_1_1(device)) + vk_prepend_struct(&info->properties2, subgroup_properties); if (vulkan_info->KHR_get_physical_device_properties2) VK_CALL(vkGetPhysicalDeviceProperties2KHR(physical_device, &info->properties2)); @@ -1158,6 +1210,7 @@ static void vkd3d_trace_physical_device_limits(const struct vkd3d_physical_devic static void vkd3d_trace_physical_device_features(const struct vkd3d_physical_device_info *info) { + const VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *fragment_shader_interlock_features; const VkPhysicalDeviceConditionalRenderingFeaturesEXT *conditional_rendering_features; const VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *demote_features; const VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *buffer_alignment_features; @@ -1279,6 +1332,15 @@ static void vkd3d_trace_physical_device_features(const struct vkd3d_physical_dev TRACE(" VkPhysicalDeviceDepthClipEnableFeaturesEXT:\n"); TRACE(" depthClipEnable: %#x.\n", depth_clip_features->depthClipEnable); + fragment_shader_interlock_features = &info->fragment_shader_interlock_features; + TRACE(" VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT:\n"); + TRACE(" fragmentShaderSampleInterlock: %#x.\n", + fragment_shader_interlock_features->fragmentShaderSampleInterlock); + TRACE(" fragmentShaderPixelInterlock: %#x.\n", + fragment_shader_interlock_features->fragmentShaderPixelInterlock); + TRACE(" fragmentShaderShadingRateInterlock: %#x.\n", + fragment_shader_interlock_features->fragmentShaderShadingRateInterlock); + demote_features = &info->demote_features; TRACE(" VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT:\n"); TRACE(" shaderDemoteToHelperInvocation: %#x.\n", demote_features->shaderDemoteToHelperInvocation); @@ -1470,22 +1532,92 @@ static bool d3d12_device_supports_typed_uav_load_additional_formats(const struct return true; } +static HRESULT vkd3d_check_device_extensions(struct d3d12_device *device, + const struct vkd3d_device_create_info *create_info, VkExtensionProperties **vk_extensions, + uint32_t *vk_extension_count, uint32_t *device_extension_count, bool **user_extension_supported) +{ + const struct vkd3d_vk_instance_procs *vk_procs = &device->vkd3d_instance->vk_procs; + const struct vkd3d_optional_device_extensions_info *optional_extensions; + VkPhysicalDevice physical_device = device->vk_physical_device; + struct vkd3d_vulkan_info *vulkan_info = &device->vk_info; + VkResult vr; + + *device_extension_count = 0; + + if ((vr = VK_CALL(vkEnumerateDeviceExtensionProperties(physical_device, NULL, vk_extension_count, NULL))) < 0) + { + ERR("Failed to enumerate device extensions, vr %d.\n", vr); + return hresult_from_vk_result(vr); + } + + if (!(*vk_extensions = vkd3d_calloc(*vk_extension_count, sizeof(**vk_extensions)))) + return E_OUTOFMEMORY; + + TRACE("Enumerating %u device extensions.\n", *vk_extension_count); + if ((vr = VK_CALL(vkEnumerateDeviceExtensionProperties(physical_device, NULL, vk_extension_count, *vk_extensions))) < 0) + { + ERR("Failed to enumerate device extensions, vr %d.\n", vr); + vkd3d_free(*vk_extensions); + return hresult_from_vk_result(vr); + } + + optional_extensions = vkd3d_find_struct(create_info->next, OPTIONAL_DEVICE_EXTENSIONS_INFO); + if (optional_extensions && optional_extensions->extension_count) + { + if (!(*user_extension_supported = vkd3d_calloc(optional_extensions->extension_count, sizeof(bool)))) + { + vkd3d_free(*vk_extensions); + return E_OUTOFMEMORY; + } + } + else + { + *user_extension_supported = NULL; + } + + *device_extension_count = vkd3d_check_extensions(*vk_extensions, *vk_extension_count, + required_device_extensions, ARRAY_SIZE(required_device_extensions), + optional_device_extensions, ARRAY_SIZE(optional_device_extensions), + create_info->device_extensions, create_info->device_extension_count, + optional_extensions ? optional_extensions->extensions : NULL, + optional_extensions ? optional_extensions->extension_count : 0, + *user_extension_supported, vulkan_info, "device", + device->vkd3d_instance->config_flags & VKD3D_CONFIG_FLAG_VULKAN_DEBUG); + + return S_OK; +} + static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, const struct vkd3d_device_create_info *create_info, struct vkd3d_physical_device_info *physical_device_info, uint32_t *device_extension_count, bool **user_extension_supported) { + const VkPhysicalDeviceSubgroupProperties *subgroup_properties = &physical_device_info->subgroup_properties; const struct vkd3d_vk_instance_procs *vk_procs = &device->vkd3d_instance->vk_procs; - const struct vkd3d_optional_device_extensions_info *optional_extensions; + VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *fragment_shader_interlock; VkPhysicalDeviceDescriptorIndexingFeaturesEXT *descriptor_indexing; VkPhysicalDevice physical_device = device->vk_physical_device; struct vkd3d_vulkan_info *vulkan_info = &device->vk_info; VkExtensionProperties *vk_extensions; VkPhysicalDeviceFeatures *features; - uint32_t count; - VkResult vr; + uint32_t vk_extension_count; + HRESULT hr; - *device_extension_count = 0; + /* SHUFFLE is required to implement WaveReadLaneAt with dynamically uniform index before SPIR-V 1.5 / Vulkan 1.2. */ + static const VkSubgroupFeatureFlags required_subgroup_features = VK_SUBGROUP_FEATURE_ARITHMETIC_BIT + | VK_SUBGROUP_FEATURE_BASIC_BIT + | VK_SUBGROUP_FEATURE_BALLOT_BIT + | VK_SUBGROUP_FEATURE_SHUFFLE_BIT + | VK_SUBGROUP_FEATURE_QUAD_BIT + | VK_SUBGROUP_FEATURE_VOTE_BIT; + + static const VkSubgroupFeatureFlags required_stages = VK_SHADER_STAGE_COMPUTE_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; + + if (FAILED(hr = vkd3d_check_device_extensions(device, create_info, &vk_extensions, &vk_extension_count, + device_extension_count, user_extension_supported))) + return hr; + + vkd3d_physical_device_info_init(physical_device_info, device); vkd3d_trace_physical_device(physical_device, physical_device_info, vk_procs); vkd3d_trace_physical_device_features(physical_device_info); @@ -1539,8 +1671,6 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, device->feature_options.TypedUAVLoadAdditionalFormats = features->shaderStorageImageReadWithoutFormat && d3d12_device_supports_typed_uav_load_additional_formats(device); - /* GL_INTEL_fragment_shader_ordering, no Vulkan equivalent. */ - device->feature_options.ROVsSupported = FALSE; /* GL_INTEL_conservative_rasterization, no Vulkan equivalent. */ device->feature_options.ConservativeRasterizationTier = D3D12_CONSERVATIVE_RASTERIZATION_TIER_NOT_SUPPORTED; device->feature_options.MaxGPUVirtualAddressBitsPerResource = 40; /* FIXME */ @@ -1550,10 +1680,12 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, device->feature_options.ResourceHeapTier = D3D12_RESOURCE_HEAP_TIER_2; /* Shader Model 6 support. */ - device->feature_options1.WaveOps = FALSE; - device->feature_options1.WaveLaneCountMin = 0; - device->feature_options1.WaveLaneCountMax = 0; - device->feature_options1.TotalLaneCount = 0; + device->feature_options1.WaveOps = subgroup_properties->subgroupSize >= 4 + && (subgroup_properties->supportedOperations & required_subgroup_features) == required_subgroup_features + && (subgroup_properties->supportedStages & required_stages) == required_stages; + device->feature_options1.WaveLaneCountMin = subgroup_properties->subgroupSize; + device->feature_options1.WaveLaneCountMax = subgroup_properties->subgroupSize; + device->feature_options1.TotalLaneCount = 32 * subgroup_properties->subgroupSize; /* approx. */ device->feature_options1.ExpandedComputeResourceStates = TRUE; device->feature_options1.Int64ShaderOps = features->shaderInt64; @@ -1577,47 +1709,11 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, device->feature_options5.RenderPassesTier = D3D12_RENDER_PASS_TIER_0; device->feature_options5.RaytracingTier = D3D12_RAYTRACING_TIER_NOT_SUPPORTED; - if ((vr = VK_CALL(vkEnumerateDeviceExtensionProperties(physical_device, NULL, &count, NULL))) < 0) - { - ERR("Failed to enumerate device extensions, vr %d.\n", vr); - return hresult_from_vk_result(vr); - } - if (!count) - return S_OK; - - if (!(vk_extensions = vkd3d_calloc(count, sizeof(*vk_extensions)))) - return E_OUTOFMEMORY; - - TRACE("Enumerating %u device extensions.\n", count); - if ((vr = VK_CALL(vkEnumerateDeviceExtensionProperties(physical_device, NULL, &count, vk_extensions))) < 0) - { - ERR("Failed to enumerate device extensions, vr %d.\n", vr); - vkd3d_free(vk_extensions); - return hresult_from_vk_result(vr); - } - - optional_extensions = vkd3d_find_struct(create_info->next, OPTIONAL_DEVICE_EXTENSIONS_INFO); - if (optional_extensions && optional_extensions->extension_count) - { - if (!(*user_extension_supported = vkd3d_calloc(optional_extensions->extension_count, sizeof(bool)))) - { - vkd3d_free(vk_extensions); - return E_OUTOFMEMORY; - } - } - else - { - *user_extension_supported = NULL; - } - - *device_extension_count = vkd3d_check_extensions(vk_extensions, count, - required_device_extensions, ARRAY_SIZE(required_device_extensions), - optional_device_extensions, ARRAY_SIZE(optional_device_extensions), - create_info->device_extensions, create_info->device_extension_count, - optional_extensions ? optional_extensions->extensions : NULL, - optional_extensions ? optional_extensions->extension_count : 0, - *user_extension_supported, vulkan_info, "device", - device->vkd3d_instance->config_flags & VKD3D_CONFIG_FLAG_VULKAN_DEBUG); + fragment_shader_interlock = &physical_device_info->fragment_shader_interlock_features; + if (!fragment_shader_interlock->fragmentShaderSampleInterlock + || !fragment_shader_interlock->fragmentShaderPixelInterlock) + vulkan_info->EXT_fragment_shader_interlock = false; + device->feature_options.ROVsSupported = vulkan_info->EXT_fragment_shader_interlock; if (!physical_device_info->conditional_rendering_features.conditionalRendering) vulkan_info->EXT_conditional_rendering = false; @@ -1634,9 +1730,11 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, if (!physical_device_info->timeline_semaphore_features.timelineSemaphore) vulkan_info->KHR_timeline_semaphore = false; + physical_device_info->formats4444_features.formatA4B4G4R4 = VK_FALSE; + vulkan_info->texel_buffer_alignment_properties = physical_device_info->texel_buffer_alignment_properties; - if (get_spec_version(vk_extensions, count, VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME) >= 3) + if (get_spec_version(vk_extensions, vk_extension_count, VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME) >= 3) { const VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *divisor_features; divisor_features = &physical_device_info->vertex_divisor_features; @@ -1675,6 +1773,10 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, vulkan_info->shader_extensions[vulkan_info->shader_extension_count++] = VKD3D_SHADER_SPIRV_EXTENSION_EXT_DESCRIPTOR_INDEXING; + if (vulkan_info->EXT_fragment_shader_interlock) + vulkan_info->shader_extensions[vulkan_info->shader_extension_count++] + = VKD3D_SHADER_SPIRV_EXTENSION_EXT_FRAGMENT_SHADER_INTERLOCK; + if (vulkan_info->EXT_shader_stencil_export) vulkan_info->shader_extensions[vulkan_info->shader_extension_count++] = VKD3D_SHADER_SPIRV_EXTENSION_EXT_STENCIL_EXPORT; @@ -2029,8 +2131,6 @@ static HRESULT vkd3d_create_vk_device(struct d3d12_device *device, VK_CALL(vkGetPhysicalDeviceMemoryProperties(physical_device, &device->memory_properties)); - vkd3d_physical_device_info_init(&physical_device_info, device); - if (FAILED(hr = vkd3d_init_device_caps(device, create_info, &physical_device_info, &extension_count, &user_extension_supported))) return hr; @@ -2498,18 +2598,310 @@ static void vkd3d_desc_object_cache_cleanup(struct vkd3d_desc_object_cache *cach } } -/* ID3D12Device */ -static inline struct d3d12_device *impl_from_ID3D12Device7(ID3D12Device7 *iface) +/* ID3D12ShaderCacheSession */ +struct d3d12_cache_session { - return CONTAINING_RECORD(iface, struct d3d12_device, ID3D12Device7_iface); + ID3D12ShaderCacheSession ID3D12ShaderCacheSession_iface; + unsigned int refcount; + + struct list cache_list_entry; + + struct d3d12_device *device; + struct vkd3d_private_store private_store; + D3D12_SHADER_CACHE_SESSION_DESC desc; + struct vkd3d_shader_cache *cache; +}; + +static struct vkd3d_mutex cache_list_mutex = VKD3D_MUTEX_INITIALIZER; +static struct list cache_list = LIST_INIT(cache_list); + +static inline struct d3d12_cache_session *impl_from_ID3D12ShaderCacheSession(ID3D12ShaderCacheSession *iface) +{ + return CONTAINING_RECORD(iface, struct d3d12_cache_session, ID3D12ShaderCacheSession_iface); } -static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_cache_session_QueryInterface(ID3D12ShaderCacheSession *iface, + REFIID iid, void **object) +{ + TRACE("iface %p, iid %s, object %p.\n", iface, debugstr_guid(iid), object); + + if (!object) + { + WARN("Output pointer is NULL, returning E_POINTER.\n"); + return E_POINTER; + } + + if (IsEqualGUID(iid, &IID_ID3D12ShaderCacheSession) + || IsEqualGUID(iid, &IID_ID3D12DeviceChild) + || IsEqualGUID(iid, &IID_ID3D12Object) + || IsEqualGUID(iid, &IID_IUnknown)) + { + ID3D12ShaderCacheSession_AddRef(iface); + *object = iface; + return S_OK; + } + + WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(iid)); + + *object = NULL; + return E_NOINTERFACE; +} + +static ULONG STDMETHODCALLTYPE d3d12_cache_session_AddRef(ID3D12ShaderCacheSession *iface) +{ + struct d3d12_cache_session *session = impl_from_ID3D12ShaderCacheSession(iface); + unsigned int refcount = vkd3d_atomic_increment_u32(&session->refcount); + + TRACE("%p increasing refcount to %u.\n", session, refcount); + + return refcount; +} + +static void d3d12_cache_session_destroy(struct d3d12_cache_session *session) +{ + struct d3d12_device *device = session->device; + + TRACE("Destroying cache session %p.\n", session); + + vkd3d_mutex_lock(&cache_list_mutex); + list_remove(&session->cache_list_entry); + vkd3d_mutex_unlock(&cache_list_mutex); + + vkd3d_shader_cache_decref(session->cache); + vkd3d_private_store_destroy(&session->private_store); + vkd3d_free(session); + + d3d12_device_release(device); +} + +static ULONG STDMETHODCALLTYPE d3d12_cache_session_Release(ID3D12ShaderCacheSession *iface) +{ + struct d3d12_cache_session *session = impl_from_ID3D12ShaderCacheSession(iface); + unsigned int refcount = vkd3d_atomic_decrement_u32(&session->refcount); + + TRACE("%p decreasing refcount to %u.\n", session, refcount); + + if (!refcount) + d3d12_cache_session_destroy(session); + + return refcount; +} + +static HRESULT STDMETHODCALLTYPE d3d12_cache_session_GetPrivateData(ID3D12ShaderCacheSession *iface, + REFGUID guid, UINT *data_size, void *data) +{ + struct d3d12_cache_session *session = impl_from_ID3D12ShaderCacheSession(iface); + + TRACE("iface %p, guid %s, data_size %p, data %p.\n", iface, debugstr_guid(guid), data_size, data); + + return vkd3d_get_private_data(&session->private_store, guid, data_size, data); +} + +static HRESULT STDMETHODCALLTYPE d3d12_cache_session_SetPrivateData(ID3D12ShaderCacheSession *iface, + REFGUID guid, UINT data_size, const void *data) +{ + struct d3d12_cache_session *session = impl_from_ID3D12ShaderCacheSession(iface); + + TRACE("iface %p, guid %s, data_size %u, data %p.\n", iface, debugstr_guid(guid), data_size, data); + + return vkd3d_set_private_data(&session->private_store, guid, data_size, data); +} + +static HRESULT STDMETHODCALLTYPE d3d12_cache_session_SetPrivateDataInterface( + ID3D12ShaderCacheSession *iface, REFGUID guid, const IUnknown *data) +{ + struct d3d12_cache_session *session = impl_from_ID3D12ShaderCacheSession(iface); + + TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data); + + return vkd3d_set_private_data_interface(&session->private_store, guid, data); +} + +static HRESULT STDMETHODCALLTYPE d3d12_cache_session_SetName(ID3D12ShaderCacheSession *iface, + const WCHAR *name) +{ + struct d3d12_cache_session *session = impl_from_ID3D12ShaderCacheSession(iface); + + TRACE("iface %p, name %s.\n", iface, debugstr_w(name, session->device->wchar_size)); + + return name ? S_OK : E_INVALIDARG; +} + +static HRESULT STDMETHODCALLTYPE d3d12_cache_session_GetDevice(ID3D12ShaderCacheSession *iface, + REFIID iid, void **device) +{ + struct d3d12_cache_session *session = impl_from_ID3D12ShaderCacheSession(iface); + + TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device); + + return d3d12_device_query_interface(session->device, iid, device); +} + +static HRESULT STDMETHODCALLTYPE d3d12_cache_session_FindValue(ID3D12ShaderCacheSession *iface, + const void *key, UINT key_size, void *value, UINT *value_size) +{ + struct d3d12_cache_session *session = impl_from_ID3D12ShaderCacheSession(iface); + enum vkd3d_result ret; + size_t size; + + TRACE("iface %p, key %p, key_size %#x, value %p, value_size %p.\n", + iface, key, key_size, value, value_size); + + if (!value_size) + { + WARN("value_size is NULL, returning E_INVALIDARG.\n"); + return E_INVALIDARG; + } + + size = *value_size; + ret = vkd3d_shader_cache_get(session->cache, key, key_size, value, &size); + *value_size = size; + + return hresult_from_vkd3d_result(ret); +} + +static HRESULT STDMETHODCALLTYPE d3d12_cache_session_StoreValue(ID3D12ShaderCacheSession *iface, + const void *key, UINT key_size, const void *value, UINT value_size) +{ + struct d3d12_cache_session *session = impl_from_ID3D12ShaderCacheSession(iface); + enum vkd3d_result ret; + + TRACE("iface %p, key %p, key_size %#x, value %p, value_size %u.\n", + iface, key, key_size, value, value_size); + + if (!key || !key_size || !value || !value_size) + { + WARN("Invalid input parameters, returning E_INVALIDARG.\n"); + return E_INVALIDARG; + } + + ret = vkd3d_shader_cache_put(session->cache, key, key_size, value, value_size); + return hresult_from_vkd3d_result(ret); +} + +static void STDMETHODCALLTYPE d3d12_cache_session_SetDeleteOnDestroy(ID3D12ShaderCacheSession *iface) +{ + FIXME("iface %p stub!\n", iface); +} + +static D3D12_SHADER_CACHE_SESSION_DESC * STDMETHODCALLTYPE d3d12_cache_session_GetDesc( + ID3D12ShaderCacheSession *iface, D3D12_SHADER_CACHE_SESSION_DESC *desc) +{ + struct d3d12_cache_session *session = impl_from_ID3D12ShaderCacheSession(iface); + + TRACE("iface %p.\n", iface); + *desc = session->desc; + return desc; +} + +static const struct ID3D12ShaderCacheSessionVtbl d3d12_cache_session_vtbl = +{ + /* IUnknown methods */ + d3d12_cache_session_QueryInterface, + d3d12_cache_session_AddRef, + d3d12_cache_session_Release, + /* ID3D12Object methods */ + d3d12_cache_session_GetPrivateData, + d3d12_cache_session_SetPrivateData, + d3d12_cache_session_SetPrivateDataInterface, + d3d12_cache_session_SetName, + /* ID3D12DeviceChild methods */ + d3d12_cache_session_GetDevice, + /* ID3D12ShaderCacheSession methods */ + d3d12_cache_session_FindValue, + d3d12_cache_session_StoreValue, + d3d12_cache_session_SetDeleteOnDestroy, + d3d12_cache_session_GetDesc, +}; + +static HRESULT d3d12_cache_session_init(struct d3d12_cache_session *session, + struct d3d12_device *device, const D3D12_SHADER_CACHE_SESSION_DESC *desc) +{ + struct d3d12_cache_session *i; + enum vkd3d_result ret; + HRESULT hr; + + session->ID3D12ShaderCacheSession_iface.lpVtbl = &d3d12_cache_session_vtbl; + session->refcount = 1; + session->desc = *desc; + session->cache = NULL; + + if (!session->desc.MaximumValueFileSizeBytes) + session->desc.MaximumValueFileSizeBytes = 128 * 1024 * 1024; + if (!session->desc.MaximumInMemoryCacheSizeBytes) + session->desc.MaximumInMemoryCacheSizeBytes = 1024 * 1024; + if (!session->desc.MaximumInMemoryCacheEntries) + session->desc.MaximumInMemoryCacheEntries = 128; + + if (FAILED(hr = vkd3d_private_store_init(&session->private_store))) + return hr; + + vkd3d_mutex_lock(&cache_list_mutex); + + /* We expect the number of open caches to be small. */ + LIST_FOR_EACH_ENTRY(i, &cache_list, struct d3d12_cache_session, cache_list_entry) + { + if (!memcmp(&i->desc.Identifier, &desc->Identifier, sizeof(desc->Identifier))) + { + TRACE("Found an existing cache %p from session %p.\n", i->cache, i); + if (desc->Version == i->desc.Version) + { + session->desc = i->desc; + vkd3d_shader_cache_incref(session->cache = i->cache); + break; + } + else + { + WARN("version mismatch: Existing %"PRIu64" new %"PRIu64".\n", + i->desc.Version, desc->Version); + hr = DXGI_ERROR_ALREADY_EXISTS; + goto error; + } + } + } + + if (!session->cache) + { + if (session->desc.Mode == D3D12_SHADER_CACHE_MODE_DISK) + FIXME("Disk caches are not yet implemented.\n"); + + ret = vkd3d_shader_open_cache(&session->cache); + if (ret) + { + WARN("Failed to open shader cache.\n"); + hr = hresult_from_vkd3d_result(ret); + goto error; + } + } + + /* Add it to the list even if we reused an existing cache. The other session might be destroyed, + * but the cache stays alive and can be opened a third time. */ + list_add_tail(&cache_list, &session->cache_list_entry); + d3d12_device_add_ref(session->device = device); + + vkd3d_mutex_unlock(&cache_list_mutex); + return S_OK; + +error: + vkd3d_private_store_destroy(&session->private_store); + vkd3d_mutex_unlock(&cache_list_mutex); + return hr; +} + +/* ID3D12Device */ +static inline struct d3d12_device *impl_from_ID3D12Device9(ID3D12Device9 *iface) +{ + return CONTAINING_RECORD(iface, struct d3d12_device, ID3D12Device9_iface); +} + +static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device9 *iface, REFIID riid, void **object) { TRACE("iface %p, riid %s, object %p.\n", iface, debugstr_guid(riid), object); - if (IsEqualGUID(riid, &IID_ID3D12Device7) + if (IsEqualGUID(riid, &IID_ID3D12Device9) + || IsEqualGUID(riid, &IID_ID3D12Device8) + || IsEqualGUID(riid, &IID_ID3D12Device7) || IsEqualGUID(riid, &IID_ID3D12Device6) || IsEqualGUID(riid, &IID_ID3D12Device5) || IsEqualGUID(riid, &IID_ID3D12Device4) @@ -2520,7 +2912,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device7 *ifac || IsEqualGUID(riid, &IID_ID3D12Object) || IsEqualGUID(riid, &IID_IUnknown)) { - ID3D12Device_AddRef(iface); + ID3D12Device9_AddRef(iface); *object = iface; return S_OK; } @@ -2531,9 +2923,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device7 *ifac return E_NOINTERFACE; } -static ULONG STDMETHODCALLTYPE d3d12_device_AddRef(ID3D12Device7 *iface) +static ULONG STDMETHODCALLTYPE d3d12_device_AddRef(ID3D12Device9 *iface) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device9(iface); unsigned int refcount = vkd3d_atomic_increment_u32(&device->refcount); TRACE("%p increasing refcount to %u.\n", device, refcount); @@ -2563,9 +2955,9 @@ static HRESULT device_worker_stop(struct d3d12_device *device) return S_OK; } -static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device7 *iface) +static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device9 *iface) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device9(iface); unsigned int refcount = vkd3d_atomic_decrement_u32(&device->refcount); TRACE("%p decreasing refcount to %u.\n", device, refcount); @@ -2602,10 +2994,10 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device7 *iface) return refcount; } -static HRESULT STDMETHODCALLTYPE d3d12_device_GetPrivateData(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_GetPrivateData(ID3D12Device9 *iface, REFGUID guid, UINT *data_size, void *data) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device9(iface); TRACE("iface %p, guid %s, data_size %p, data %p.\n", iface, debugstr_guid(guid), data_size, data); @@ -2613,10 +3005,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_GetPrivateData(ID3D12Device7 *ifac return vkd3d_get_private_data(&device->private_store, guid, data_size, data); } -static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateData(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateData(ID3D12Device9 *iface, REFGUID guid, UINT data_size, const void *data) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device9(iface); TRACE("iface %p, guid %s, data_size %u, data %p.\n", iface, debugstr_guid(guid), data_size, data); @@ -2624,19 +3016,19 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateData(ID3D12Device7 *ifac return vkd3d_set_private_data(&device->private_store, guid, data_size, data); } -static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateDataInterface(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateDataInterface(ID3D12Device9 *iface, REFGUID guid, const IUnknown *data) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device9(iface); TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data); return vkd3d_set_private_data_interface(&device->private_store, guid, data); } -static HRESULT STDMETHODCALLTYPE d3d12_device_SetName(ID3D12Device7 *iface, const WCHAR *name) +static HRESULT STDMETHODCALLTYPE d3d12_device_SetName(ID3D12Device9 *iface, const WCHAR *name) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device9(iface); TRACE("iface %p, name %s.\n", iface, debugstr_w(name, device->wchar_size)); @@ -2644,17 +3036,17 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetName(ID3D12Device7 *iface, cons VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT, name); } -static UINT STDMETHODCALLTYPE d3d12_device_GetNodeCount(ID3D12Device7 *iface) +static UINT STDMETHODCALLTYPE d3d12_device_GetNodeCount(ID3D12Device9 *iface) { TRACE("iface %p.\n", iface); return 1; } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue(ID3D12Device9 *iface, const D3D12_COMMAND_QUEUE_DESC *desc, REFIID riid, void **command_queue) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device9(iface); struct d3d12_command_queue *object; HRESULT hr; @@ -2668,10 +3060,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue(ID3D12Device7 * riid, command_queue); } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandAllocator(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandAllocator(ID3D12Device9 *iface, D3D12_COMMAND_LIST_TYPE type, REFIID riid, void **command_allocator) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device9(iface); struct d3d12_command_allocator *object; HRESULT hr; @@ -2685,10 +3077,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandAllocator(ID3D12Devic riid, command_allocator); } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateGraphicsPipelineState(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateGraphicsPipelineState(ID3D12Device9 *iface, const D3D12_GRAPHICS_PIPELINE_STATE_DESC *desc, REFIID riid, void **pipeline_state) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device9(iface); struct d3d12_pipeline_state *object; HRESULT hr; @@ -2702,10 +3094,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateGraphicsPipelineState(ID3D12 &IID_ID3D12PipelineState, riid, pipeline_state); } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateComputePipelineState(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateComputePipelineState(ID3D12Device9 *iface, const D3D12_COMPUTE_PIPELINE_STATE_DESC *desc, REFIID riid, void **pipeline_state) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device9(iface); struct d3d12_pipeline_state *object; HRESULT hr; @@ -2719,11 +3111,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateComputePipelineState(ID3D12D &IID_ID3D12PipelineState, riid, pipeline_state); } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList(ID3D12Device9 *iface, UINT node_mask, D3D12_COMMAND_LIST_TYPE type, ID3D12CommandAllocator *command_allocator, ID3D12PipelineState *initial_pipeline_state, REFIID riid, void **command_list) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device9(iface); struct d3d12_command_list *object; HRESULT hr; @@ -2846,10 +3238,10 @@ bool d3d12_device_is_uma(struct d3d12_device *device, bool *coherent) return true; } -static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device9 *iface, D3D12_FEATURE feature, void *feature_data, UINT feature_data_size) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device9(iface); TRACE("iface %p, feature %#x, feature_data %p, feature_data_size %u.\n", iface, feature, feature_data, feature_data_size); @@ -3095,9 +3487,21 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device7 return E_INVALIDARG; } + if (data->HighestShaderModel != D3D_SHADER_MODEL_5_1 + && (data->HighestShaderModel < D3D_SHADER_MODEL_6_0 + || data->HighestShaderModel > D3D_HIGHEST_SHADER_MODEL)) + { + WARN("Unknown shader model %#x.\n", data->HighestShaderModel); + return E_INVALIDARG; + } + TRACE("Request shader model %#x.\n", data->HighestShaderModel); - data->HighestShaderModel = D3D_SHADER_MODEL_5_1; +#ifdef VKD3D_SHADER_UNSUPPORTED_DXIL + data->HighestShaderModel = min(data->HighestShaderModel, D3D_SHADER_MODEL_6_0); +#else + data->HighestShaderModel = min(data->HighestShaderModel, D3D_SHADER_MODEL_5_1); +#endif TRACE("Shader model %#x.\n", data->HighestShaderModel); return S_OK; @@ -3515,16 +3919,101 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device7 return S_OK; } + case D3D12_FEATURE_D3D12_OPTIONS14: + { + D3D12_FEATURE_DATA_D3D12_OPTIONS14 *data = feature_data; + + if (feature_data_size != sizeof(*data)) + { + WARN("Invalid size %u.\n", feature_data_size); + } + + data->AdvancedTextureOpsSupported = FALSE; + data->WriteableMSAATexturesSupported = FALSE; + data->IndependentFrontAndBackStencilRefMaskSupported = FALSE; + + TRACE("Advanced texture ops %#x.\n", data->AdvancedTextureOpsSupported); + TRACE("Writeable MSAA textures %#x.\n", data->WriteableMSAATexturesSupported); + TRACE("Independent front and back stencil ref mask %#x.\n", data->IndependentFrontAndBackStencilRefMaskSupported); + return S_OK; + } + + case D3D12_FEATURE_D3D12_OPTIONS15: + { + D3D12_FEATURE_DATA_D3D12_OPTIONS15 *data = feature_data; + + if (feature_data_size != sizeof(*data)) + { + WARN("Invalid size %u.\n", feature_data_size); + } + + data->TriangleFanSupported = FALSE; + data->DynamicIndexBufferStripCutSupported = FALSE; + + TRACE("Triangle fan %#x.\n", data->TriangleFanSupported); + TRACE("Dynamic index buffer strip cut %#x.\n", data->DynamicIndexBufferStripCutSupported); + return S_OK; + } + + case D3D12_FEATURE_D3D12_OPTIONS16: + { + D3D12_FEATURE_DATA_D3D12_OPTIONS16 *data = feature_data; + + if (feature_data_size != sizeof(*data)) + { + WARN("Invalid size %u.\n", feature_data_size); + } + + data->DynamicDepthBiasSupported = FALSE; + data->GPUUploadHeapSupported = FALSE; + + TRACE("Dynamic depth bias %#x.\n", data->DynamicDepthBiasSupported); + TRACE("GPU upload heap %#x.\n", data->GPUUploadHeapSupported); + return S_OK; + } + + case D3D12_FEATURE_D3D12_OPTIONS17: + { + D3D12_FEATURE_DATA_D3D12_OPTIONS17 *data = feature_data; + + if (feature_data_size != sizeof(*data)) + { + WARN("Invalid size %u.\n", feature_data_size); + } + + data->NonNormalizedCoordinateSamplersSupported = FALSE; + data->ManualWriteTrackingResourceSupported = FALSE; + + TRACE("Non-normalized coordinate samplers %#x.\n", data->NonNormalizedCoordinateSamplersSupported); + TRACE("Manual write tracking resource %#x.\n", data->ManualWriteTrackingResourceSupported); + return S_OK; + } + + case D3D12_FEATURE_D3D12_OPTIONS18: + { + D3D12_FEATURE_DATA_D3D12_OPTIONS18 *data = feature_data; + + if (feature_data_size != sizeof(*data)) + { + WARN("Invalid size %u.\n", feature_data_size); + } + + data->RenderPassesValid = FALSE; + + TRACE("Render passes valid %#x.\n", data->RenderPassesValid); + return S_OK; + } + default: FIXME("Unhandled feature %#x.\n", feature); return E_NOTIMPL; } } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateDescriptorHeap(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateDescriptorHeap(ID3D12Device9 *iface, const D3D12_DESCRIPTOR_HEAP_DESC *desc, REFIID riid, void **descriptor_heap) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device9(iface); struct d3d12_descriptor_heap *object; HRESULT hr; @@ -3538,7 +4027,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateDescriptorHeap(ID3D12Device7 &IID_ID3D12DescriptorHeap, riid, descriptor_heap); } -static UINT STDMETHODCALLTYPE d3d12_device_GetDescriptorHandleIncrementSize(ID3D12Device7 *iface, +static UINT STDMETHODCALLTYPE d3d12_device_GetDescriptorHandleIncrementSize(ID3D12Device9 *iface, D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) { TRACE("iface %p, descriptor_heap_type %#x.\n", iface, descriptor_heap_type); @@ -3561,11 +4050,11 @@ static UINT STDMETHODCALLTYPE d3d12_device_GetDescriptorHandleIncrementSize(ID3D } } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device9 *iface, UINT node_mask, const void *bytecode, SIZE_T bytecode_length, REFIID riid, void **root_signature) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device9(iface); struct d3d12_root_signature *object; HRESULT hr; @@ -3581,10 +4070,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device7 &IID_ID3D12RootSignature, riid, root_signature); } -static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device7 *iface, +static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device9 *iface, const D3D12_CONSTANT_BUFFER_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device9(iface); struct d3d12_desc tmp = {0}; TRACE("iface %p, desc %p, descriptor %s.\n", iface, desc, debug_cpu_handle(descriptor)); @@ -3593,11 +4082,11 @@ static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); } -static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device7 *iface, +static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device9 *iface, ID3D12Resource *resource, const D3D12_SHADER_RESOURCE_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device9(iface); struct d3d12_desc tmp = {0}; TRACE("iface %p, resource %p, desc %p, descriptor %s.\n", @@ -3607,11 +4096,11 @@ static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); } -static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Device7 *iface, +static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Device9 *iface, ID3D12Resource *resource, ID3D12Resource *counter_resource, const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device9(iface); struct d3d12_desc tmp = {0}; TRACE("iface %p, resource %p, counter_resource %p, desc %p, descriptor %s.\n", @@ -3622,7 +4111,7 @@ static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Devic d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); } -static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device7 *iface, +static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device9 *iface, ID3D12Resource *resource, const D3D12_RENDER_TARGET_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) { @@ -3630,10 +4119,10 @@ static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device7 iface, resource, desc, debug_cpu_handle(descriptor)); d3d12_rtv_desc_create_rtv(d3d12_rtv_desc_from_cpu_handle(descriptor), - impl_from_ID3D12Device7(iface), unsafe_impl_from_ID3D12Resource(resource), desc); + impl_from_ID3D12Device9(iface), unsafe_impl_from_ID3D12Resource(resource), desc); } -static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device7 *iface, +static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device9 *iface, ID3D12Resource *resource, const D3D12_DEPTH_STENCIL_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) { @@ -3641,13 +4130,13 @@ static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device7 iface, resource, desc, debug_cpu_handle(descriptor)); d3d12_dsv_desc_create_dsv(d3d12_dsv_desc_from_cpu_handle(descriptor), - impl_from_ID3D12Device7(iface), unsafe_impl_from_ID3D12Resource(resource), desc); + impl_from_ID3D12Device9(iface), unsafe_impl_from_ID3D12Resource(resource), desc); } -static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device7 *iface, +static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device9 *iface, const D3D12_SAMPLER_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device9(iface); struct d3d12_desc tmp = {0}; TRACE("iface %p, desc %p, descriptor %s.\n", iface, desc, debug_cpu_handle(descriptor)); @@ -3656,14 +4145,14 @@ static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device7 *iface, d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); } -static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device7 *iface, +static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device9 *iface, UINT dst_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *dst_descriptor_range_offsets, const UINT *dst_descriptor_range_sizes, UINT src_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *src_descriptor_range_offsets, const UINT *src_descriptor_range_sizes, D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device9(iface); unsigned int dst_range_idx, dst_idx, src_range_idx, src_idx; unsigned int dst_range_size, src_range_size; struct d3d12_descriptor_heap *dst_heap; @@ -3719,7 +4208,7 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device7 *iface, } } -static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(ID3D12Device7 *iface, +static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(ID3D12Device9 *iface, UINT descriptor_count, const D3D12_CPU_DESCRIPTOR_HANDLE dst_descriptor_range_offset, const D3D12_CPU_DESCRIPTOR_HANDLE src_descriptor_range_offset, D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) @@ -3850,10 +4339,10 @@ static void d3d12_device_get_resource_allocation_info(struct d3d12_device *devic } static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResourceAllocationInfo( - ID3D12Device7 *iface, D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, + ID3D12Device9 *iface, D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, UINT count, const D3D12_RESOURCE_DESC *resource_descs) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device9(iface); TRACE("iface %p, info %p, visible_mask 0x%08x, count %u, resource_descs %p.\n", iface, info, visible_mask, count, resource_descs); @@ -3865,10 +4354,10 @@ static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResour return info; } -static D3D12_HEAP_PROPERTIES * STDMETHODCALLTYPE d3d12_device_GetCustomHeapProperties(ID3D12Device7 *iface, +static D3D12_HEAP_PROPERTIES * STDMETHODCALLTYPE d3d12_device_GetCustomHeapProperties(ID3D12Device9 *iface, D3D12_HEAP_PROPERTIES *heap_properties, UINT node_mask, D3D12_HEAP_TYPE heap_type) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device9(iface); bool coherent; TRACE("iface %p, heap_properties %p, node_mask 0x%08x, heap_type %#x.\n", @@ -3908,12 +4397,12 @@ static D3D12_HEAP_PROPERTIES * STDMETHODCALLTYPE d3d12_device_GetCustomHeapPrope return heap_properties; } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource(ID3D12Device9 *iface, const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device9(iface); D3D12_RESOURCE_DESC1 resource_desc; struct d3d12_resource *object; HRESULT hr; @@ -3935,10 +4424,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource(ID3D12Devi return return_interface(&object->ID3D12Resource2_iface, &IID_ID3D12Resource2, iid, resource); } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap(ID3D12Device9 *iface, const D3D12_HEAP_DESC *desc, REFIID iid, void **heap) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device9(iface); struct d3d12_heap *object; HRESULT hr; @@ -3954,12 +4443,12 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap(ID3D12Device7 *iface, return return_interface(&object->ID3D12Heap_iface, &IID_ID3D12Heap, iid, heap); } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource(ID3D12Device9 *iface, ID3D12Heap *heap, UINT64 heap_offset, const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device9(iface); D3D12_RESOURCE_DESC1 resource_desc; struct d3d12_heap *heap_object; struct d3d12_resource *object; @@ -3980,11 +4469,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource(ID3D12Device7 return return_interface(&object->ID3D12Resource2_iface, &IID_ID3D12Resource2, iid, resource); } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource(ID3D12Device9 *iface, const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device9(iface); D3D12_RESOURCE_DESC1 resource_desc; struct d3d12_resource *object; HRESULT hr; @@ -4001,11 +4490,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource(ID3D12Devic return return_interface(&object->ID3D12Resource2_iface, &IID_ID3D12Resource2, iid, resource); } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateSharedHandle(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateSharedHandle(ID3D12Device9 *iface, ID3D12DeviceChild *object, const SECURITY_ATTRIBUTES *attributes, DWORD access, const WCHAR *name, HANDLE *handle) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device9(iface); FIXME("iface %p, object %p, attributes %p, access %#x, name %s, handle %p stub!\n", iface, object, attributes, (uint32_t)access, debugstr_w(name, device->wchar_size), handle); @@ -4013,7 +4502,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateSharedHandle(ID3D12Device7 * return E_NOTIMPL; } -static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandle(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandle(ID3D12Device9 *iface, HANDLE handle, REFIID riid, void **object) { FIXME("iface %p, handle %p, riid %s, object %p stub!\n", @@ -4022,10 +4511,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandle(ID3D12Device7 *if return E_NOTIMPL; } -static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandleByName(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandleByName(ID3D12Device9 *iface, const WCHAR *name, DWORD access, HANDLE *handle) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device9(iface); FIXME("iface %p, name %s, access %#x, handle %p stub!\n", iface, debugstr_w(name, device->wchar_size), (uint32_t)access, handle); @@ -4033,7 +4522,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandleByName(ID3D12Devic return E_NOTIMPL; } -static HRESULT STDMETHODCALLTYPE d3d12_device_MakeResident(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_MakeResident(ID3D12Device9 *iface, UINT object_count, ID3D12Pageable * const *objects) { ID3D12Fence *fence; @@ -4041,17 +4530,17 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_MakeResident(ID3D12Device7 *iface, TRACE("iface %p, object_count %u, objects %p.\n", iface, object_count, objects); - if (FAILED(hr = ID3D12Device7_CreateFence(iface, 0, 0, &IID_ID3D12Fence, (void **)&fence))) + if (FAILED(hr = ID3D12Device9_CreateFence(iface, 0, 0, &IID_ID3D12Fence, (void **)&fence))) return hr; - hr = ID3D12Device7_EnqueueMakeResident(iface, 0, object_count, objects, fence, 1); + hr = ID3D12Device9_EnqueueMakeResident(iface, 0, object_count, objects, fence, 1); if (SUCCEEDED(hr)) ID3D12Fence_SetEventOnCompletion(fence, 1, NULL); ID3D12Fence_Release(fence); return hr; } -static HRESULT STDMETHODCALLTYPE d3d12_device_Evict(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_Evict(ID3D12Device9 *iface, UINT object_count, ID3D12Pageable * const *objects) { FIXME_ONCE("iface %p, object_count %u, objects %p stub!\n", @@ -4060,10 +4549,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_Evict(ID3D12Device7 *iface, return S_OK; } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device9 *iface, UINT64 initial_value, D3D12_FENCE_FLAGS flags, REFIID riid, void **fence) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device9(iface); struct d3d12_fence *object; HRESULT hr; @@ -4076,9 +4565,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device7 *iface, return return_interface(&object->ID3D12Fence1_iface, &IID_ID3D12Fence1, riid, fence); } -static HRESULT STDMETHODCALLTYPE d3d12_device_GetDeviceRemovedReason(ID3D12Device7 *iface) +static HRESULT STDMETHODCALLTYPE d3d12_device_GetDeviceRemovedReason(ID3D12Device9 *iface) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device9(iface); TRACE("iface %p.\n", iface); @@ -4163,12 +4652,12 @@ static void d3d12_device_get_copyable_footprints(struct d3d12_device *device, *total_bytes = total; } -static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device7 *iface, +static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device9 *iface, const D3D12_RESOURCE_DESC *desc, UINT first_sub_resource, UINT sub_resource_count, UINT64 base_offset, D3D12_PLACED_SUBRESOURCE_FOOTPRINT *layouts, UINT *row_counts, UINT64 *row_sizes, UINT64 *total_bytes) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device9(iface); D3D12_RESOURCE_DESC1 resource_desc; TRACE("iface %p, desc %p, first_sub_resource %u, sub_resource_count %u, base_offset %#"PRIx64", " @@ -4182,10 +4671,10 @@ static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device7 * base_offset, layouts, row_counts, row_sizes, total_bytes); } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateQueryHeap(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateQueryHeap(ID3D12Device9 *iface, const D3D12_QUERY_HEAP_DESC *desc, REFIID iid, void **heap) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device9(iface); struct d3d12_query_heap *object; HRESULT hr; @@ -4198,18 +4687,18 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateQueryHeap(ID3D12Device7 *ifa return return_interface(&object->ID3D12QueryHeap_iface, &IID_ID3D12QueryHeap, iid, heap); } -static HRESULT STDMETHODCALLTYPE d3d12_device_SetStablePowerState(ID3D12Device7 *iface, BOOL enable) +static HRESULT STDMETHODCALLTYPE d3d12_device_SetStablePowerState(ID3D12Device9 *iface, BOOL enable) { FIXME("iface %p, enable %#x stub!\n", iface, enable); return E_NOTIMPL; } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(ID3D12Device9 *iface, const D3D12_COMMAND_SIGNATURE_DESC *desc, ID3D12RootSignature *root_signature, REFIID iid, void **command_signature) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device9(iface); struct d3d12_command_signature *object; HRESULT hr; @@ -4223,14 +4712,14 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(ID3D12Devic &IID_ID3D12CommandSignature, iid, command_signature); } -static void STDMETHODCALLTYPE d3d12_device_GetResourceTiling(ID3D12Device7 *iface, +static void STDMETHODCALLTYPE d3d12_device_GetResourceTiling(ID3D12Device9 *iface, ID3D12Resource *resource, UINT *total_tile_count, D3D12_PACKED_MIP_INFO *packed_mip_info, D3D12_TILE_SHAPE *standard_tile_shape, UINT *sub_resource_tiling_count, UINT first_sub_resource_tiling, D3D12_SUBRESOURCE_TILING *sub_resource_tilings) { const struct d3d12_resource *resource_impl = impl_from_ID3D12Resource(resource); - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device9(iface); TRACE("iface %p, resource %p, total_tile_count %p, packed_mip_info %p, " "standard_title_shape %p, sub_resource_tiling_count %p, " @@ -4243,9 +4732,9 @@ static void STDMETHODCALLTYPE d3d12_device_GetResourceTiling(ID3D12Device7 *ifac sub_resource_tiling_count, first_sub_resource_tiling, sub_resource_tilings); } -static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device7 *iface, LUID *luid) +static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device9 *iface, LUID *luid) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device9(iface); TRACE("iface %p, luid %p.\n", iface, luid); @@ -4254,7 +4743,7 @@ static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device7 *iface return luid; } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineLibrary(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineLibrary(ID3D12Device9 *iface, const void *blob, SIZE_T blob_size, REFIID iid, void **lib) { FIXME("iface %p, blob %p, blob_size %"PRIuPTR", iid %s, lib %p stub!\n", @@ -4263,7 +4752,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineLibrary(ID3D12Device return DXGI_ERROR_UNSUPPORTED; } -static HRESULT STDMETHODCALLTYPE d3d12_device_SetEventOnMultipleFenceCompletion(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_SetEventOnMultipleFenceCompletion(ID3D12Device9 *iface, ID3D12Fence *const *fences, const UINT64 *values, UINT fence_count, D3D12_MULTIPLE_FENCE_WAIT_FLAGS flags, HANDLE event) { @@ -4273,7 +4762,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetEventOnMultipleFenceCompletion( return E_NOTIMPL; } -static HRESULT STDMETHODCALLTYPE d3d12_device_SetResidencyPriority(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_SetResidencyPriority(ID3D12Device9 *iface, UINT object_count, ID3D12Pageable *const *objects, const D3D12_RESIDENCY_PRIORITY *priorities) { FIXME_ONCE("iface %p, object_count %u, objects %p, priorities %p stub!\n", iface, object_count, objects, priorities); @@ -4281,10 +4770,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetResidencyPriority(ID3D12Device7 return S_OK; } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineState(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineState(ID3D12Device9 *iface, const D3D12_PIPELINE_STATE_STREAM_DESC *desc, REFIID iid, void **pipeline_state) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device9(iface); struct d3d12_pipeline_state *object; HRESULT hr; @@ -4296,7 +4785,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineState(ID3D12Device7 return return_interface(&object->ID3D12PipelineState_iface, &IID_ID3D12PipelineState, iid, pipeline_state); } -static HRESULT STDMETHODCALLTYPE d3d12_device_OpenExistingHeapFromAddress(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_OpenExistingHeapFromAddress(ID3D12Device9 *iface, const void *address, REFIID iid, void **heap) { FIXME("iface %p, address %p, iid %s, heap %p stub!\n", iface, address, debugstr_guid(iid), heap); @@ -4304,7 +4793,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenExistingHeapFromAddress(ID3D12 return E_NOTIMPL; } -static HRESULT STDMETHODCALLTYPE d3d12_device_OpenExistingHeapFromFileMapping(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_OpenExistingHeapFromFileMapping(ID3D12Device9 *iface, HANDLE file_mapping, REFIID iid, void **heap) { FIXME("iface %p, file_mapping %p, iid %s, heap %p stub!\n", iface, file_mapping, debugstr_guid(iid), heap); @@ -4312,7 +4801,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenExistingHeapFromFileMapping(ID return E_NOTIMPL; } -static HRESULT STDMETHODCALLTYPE d3d12_device_EnqueueMakeResident(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_EnqueueMakeResident(ID3D12Device9 *iface, D3D12_RESIDENCY_FLAGS flags, UINT num_objects, ID3D12Pageable *const *objects, ID3D12Fence *fence, UINT64 fence_value) { @@ -4323,7 +4812,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_EnqueueMakeResident(ID3D12Device7 return S_OK; } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList1(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList1(ID3D12Device9 *iface, UINT node_mask, D3D12_COMMAND_LIST_TYPE type, D3D12_COMMAND_LIST_FLAGS flags, REFIID iid, void **command_list) { @@ -4333,7 +4822,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList1(ID3D12Device7 * return E_NOTIMPL; } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateProtectedResourceSession(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateProtectedResourceSession(ID3D12Device9 *iface, const D3D12_PROTECTED_RESOURCE_SESSION_DESC *desc, REFIID iid, void **session) { FIXME("iface %p, desc %p, iid %s, session %p stub!\n", iface, desc, debugstr_guid(iid), session); @@ -4341,13 +4830,13 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateProtectedResourceSession(ID3 return E_NOTIMPL; } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource1(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource1(ID3D12Device9 *iface, const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, const D3D12_CLEAR_VALUE *optimized_clear_value, ID3D12ProtectedResourceSession *protected_session, REFIID iid, void **resource) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device9(iface); D3D12_RESOURCE_DESC1 resource_desc; struct d3d12_resource *object; HRESULT hr; @@ -4369,11 +4858,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource1(ID3D12Dev return return_interface(&object->ID3D12Resource2_iface, &IID_ID3D12Resource2, iid, resource); } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap1(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap1(ID3D12Device9 *iface, const D3D12_HEAP_DESC *desc, ID3D12ProtectedResourceSession *protected_session, REFIID iid, void **heap) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device9(iface); struct d3d12_heap *object; HRESULT hr; @@ -4389,7 +4878,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap1(ID3D12Device7 *iface, return return_interface(&object->ID3D12Heap_iface, &IID_ID3D12Heap, iid, heap); } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource1(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource1(ID3D12Device9 *iface, const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, const D3D12_CLEAR_VALUE *optimized_clear_value, ID3D12ProtectedResourceSession *protected_session, REFIID iid, void **resource) @@ -4403,11 +4892,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource1(ID3D12Devi } static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResourceAllocationInfo1( - ID3D12Device7 *iface, D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, + ID3D12Device9 *iface, D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, UINT count, const D3D12_RESOURCE_DESC *resource_descs, D3D12_RESOURCE_ALLOCATION_INFO1 *info1) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device9(iface); TRACE("iface %p, info %p, visible_mask 0x%08x, count %u, resource_descs %p, info1 %p.\n", iface, info, visible_mask, count, resource_descs, info1); @@ -4419,7 +4908,7 @@ static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResour return info; } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateLifetimeTracker(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateLifetimeTracker(ID3D12Device9 *iface, ID3D12LifetimeOwner *owner, REFIID iid, void **tracker) { FIXME("iface %p, owner %p, iid %s, tracker %p stub!\n", iface, owner, debugstr_guid(iid), tracker); @@ -4427,12 +4916,12 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateLifetimeTracker(ID3D12Device return E_NOTIMPL; } -static void STDMETHODCALLTYPE d3d12_device_RemoveDevice(ID3D12Device7 *iface) +static void STDMETHODCALLTYPE d3d12_device_RemoveDevice(ID3D12Device9 *iface) { FIXME("iface %p stub!\n", iface); } -static HRESULT STDMETHODCALLTYPE d3d12_device_EnumerateMetaCommands(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_EnumerateMetaCommands(ID3D12Device9 *iface, UINT *num_meta_commands, D3D12_META_COMMAND_DESC *command_desc) { FIXME("iface %p, num_meta_commands %p, command_desc %p stub!\n", iface, @@ -4441,7 +4930,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_EnumerateMetaCommands(ID3D12Device return E_NOTIMPL; } -static HRESULT STDMETHODCALLTYPE d3d12_device_EnumerateMetaCommandParameters(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_EnumerateMetaCommandParameters(ID3D12Device9 *iface, REFGUID command_id, D3D12_META_COMMAND_PARAMETER_STAGE stage, UINT *size_in_bytes, UINT *parameter_count, D3D12_META_COMMAND_PARAMETER_DESC *parameter_desc) @@ -4453,7 +4942,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_EnumerateMetaCommandParameters(ID3 return E_NOTIMPL; } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateMetaCommand(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateMetaCommand(ID3D12Device9 *iface, REFGUID command_id, UINT node_mask, const void *parameters_data, SIZE_T data_size_in_bytes, REFIID iid, void **meta_command) { @@ -4465,7 +4954,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateMetaCommand(ID3D12Device7 *i return E_NOTIMPL; } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateStateObject(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateStateObject(ID3D12Device9 *iface, const D3D12_STATE_OBJECT_DESC *desc, REFIID iid, void **state_object) { FIXME("iface %p, desc %p, iid %s, state_object %p stub!\n", iface, desc, debugstr_guid(iid), state_object); @@ -4473,14 +4962,14 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateStateObject(ID3D12Device7 *i return E_NOTIMPL; } -static void STDMETHODCALLTYPE d3d12_device_GetRaytracingAccelerationStructurePrebuildInfo(ID3D12Device7 *iface, +static void STDMETHODCALLTYPE d3d12_device_GetRaytracingAccelerationStructurePrebuildInfo(ID3D12Device9 *iface, const D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS *desc, D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO *info) { FIXME("iface %p, desc %p, info %p stub!\n", iface, desc, info); } -static D3D12_DRIVER_MATCHING_IDENTIFIER_STATUS STDMETHODCALLTYPE d3d12_device_CheckDriverMatchingIdentifier(ID3D12Device7 *iface, +static D3D12_DRIVER_MATCHING_IDENTIFIER_STATUS STDMETHODCALLTYPE d3d12_device_CheckDriverMatchingIdentifier(ID3D12Device9 *iface, D3D12_SERIALIZED_DATA_TYPE data_type, const D3D12_SERIALIZED_DATA_DRIVER_MATCHING_IDENTIFIER *identifier) { FIXME("iface %p, data_type %u, identifier %p stub!\n", iface, data_type, identifier); @@ -4488,7 +4977,7 @@ static D3D12_DRIVER_MATCHING_IDENTIFIER_STATUS STDMETHODCALLTYPE d3d12_device_Ch return D3D12_DRIVER_MATCHING_IDENTIFIER_UNRECOGNIZED; } -static HRESULT STDMETHODCALLTYPE d3d12_device_SetBackgroundProcessingMode(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_SetBackgroundProcessingMode(ID3D12Device9 *iface, D3D12_BACKGROUND_PROCESSING_MODE mode, D3D12_MEASUREMENTS_ACTION action, HANDLE event, BOOL *further_measurements_desired) { @@ -4498,7 +4987,8 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetBackgroundProcessingMode(ID3D12 return E_NOTIMPL; } -static HRESULT STDMETHODCALLTYPE d3d12_device_AddToStateObject(ID3D12Device7 *iface, + +static HRESULT STDMETHODCALLTYPE d3d12_device_AddToStateObject(ID3D12Device9 *iface, const D3D12_STATE_OBJECT_DESC *addition, ID3D12StateObject *state_object_to_grow_from, REFIID riid, void **new_state_object) { @@ -4508,7 +4998,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_AddToStateObject(ID3D12Device7 *if return E_NOTIMPL; } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateProtectedResourceSession1(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateProtectedResourceSession1(ID3D12Device9 *iface, const D3D12_PROTECTED_RESOURCE_SESSION_DESC1 *desc, REFIID riid, void **session) { FIXME("iface %p, desc %p, riid %s, session %p stub!\n", iface, desc, debugstr_guid(riid), session); @@ -4516,7 +5006,167 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateProtectedResourceSession1(ID return E_NOTIMPL; } -static const struct ID3D12Device7Vtbl d3d12_device_vtbl = +static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResourceAllocationInfo2(ID3D12Device9 *iface, + D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, UINT count, + const D3D12_RESOURCE_DESC1 *resource_descs, D3D12_RESOURCE_ALLOCATION_INFO1 *info1) +{ + struct d3d12_device *device = impl_from_ID3D12Device9(iface); + + TRACE("iface %p, info %p, visible_mask 0x%08x, count %u, resource_descs %p, info1 %p.\n", + iface, info, visible_mask, count, resource_descs, info1); + + debug_ignored_node_mask(visible_mask); + + d3d12_device_get_resource1_allocation_info(device, info1, count, resource_descs, info); + + return info; +} + +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource2(ID3D12Device9 *iface, + const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, const D3D12_RESOURCE_DESC1 *desc, + D3D12_RESOURCE_STATES initial_state, const D3D12_CLEAR_VALUE *optimized_clear_value, + ID3D12ProtectedResourceSession *protected_session, REFIID iid, void **resource) +{ + struct d3d12_device *device = impl_from_ID3D12Device9(iface); + struct d3d12_resource *object; + HRESULT hr; + + TRACE("iface %p, heap_properties %p, heap_flags %#x, desc %p, initial_state %#x, " + "optimized_clear_value %p, protected_session %p, iid %s, resource %p.\n", + iface, heap_properties, heap_flags, desc, initial_state, + optimized_clear_value, protected_session, debugstr_guid(iid), resource); + + if (FAILED(hr = d3d12_committed_resource_create(device, heap_properties, heap_flags, + desc, initial_state, optimized_clear_value, protected_session, &object))) + { + *resource = NULL; + return hr; + } + + return return_interface(&object->ID3D12Resource2_iface, &IID_ID3D12Resource2, iid, resource); +} + +static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource1(ID3D12Device9 *iface, + ID3D12Heap *heap, UINT64 heap_offset, const D3D12_RESOURCE_DESC1 *resource_desc, + D3D12_RESOURCE_STATES initial_state, const D3D12_CLEAR_VALUE *optimized_clear_value, + REFIID iid, void **resource) +{ + struct d3d12_device *device = impl_from_ID3D12Device9(iface); + struct d3d12_heap *heap_object; + struct d3d12_resource *object; + HRESULT hr; + + TRACE("iface %p, heap %p, heap_offset %#"PRIx64", desc %p, initial_state %#x, " + "optimized_clear_value %p, iid %s, resource %p.\n", + iface, heap, heap_offset, resource_desc, initial_state, + optimized_clear_value, debugstr_guid(iid), resource); + + heap_object = unsafe_impl_from_ID3D12Heap(heap); + + if (FAILED(hr = d3d12_placed_resource_create(device, heap_object, heap_offset, + resource_desc, initial_state, optimized_clear_value, &object))) + return hr; + + return return_interface(&object->ID3D12Resource2_iface, &IID_ID3D12Resource2, iid, resource); +} + +static void STDMETHODCALLTYPE d3d12_device_CreateSamplerFeedbackUnorderedAccessView(ID3D12Device9 *iface, + ID3D12Resource *target_resource, ID3D12Resource *feedback_resource, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) +{ + FIXME("iface %p, target_resource %p, feedback_resource %p, descriptor %s stub!\n", + iface, target_resource, feedback_resource, debug_cpu_handle(descriptor)); +} + +static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints1(ID3D12Device9 *iface, + const D3D12_RESOURCE_DESC1 *desc, UINT first_sub_resource, UINT sub_resource_count, + UINT64 base_offset, D3D12_PLACED_SUBRESOURCE_FOOTPRINT *layouts, UINT *row_counts, + UINT64 *row_sizes, UINT64 *total_bytes) +{ + struct d3d12_device *device = impl_from_ID3D12Device9(iface); + + TRACE("iface %p, desc %p, first_sub_resource %u, sub_resource_count %u, base_offset %#"PRIx64", " + "layouts %p, row_counts %p, row_sizes %p, total_bytes %p.\n", + iface, desc, first_sub_resource, sub_resource_count, base_offset, + layouts, row_counts, row_sizes, total_bytes); + + d3d12_device_get_copyable_footprints(device, desc, first_sub_resource, sub_resource_count, + base_offset, layouts, row_counts, row_sizes, total_bytes); +} + +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateShaderCacheSession(ID3D12Device9 *iface, + const D3D12_SHADER_CACHE_SESSION_DESC *desc, REFIID iid, void **session) +{ + struct d3d12_device *device = impl_from_ID3D12Device9(iface); + struct d3d12_cache_session *object; + static const GUID guid_null = {0}; + HRESULT hr; + + static const UINT valid_flags = D3D12_SHADER_CACHE_FLAG_DRIVER_VERSIONED + | D3D12_SHADER_CACHE_FLAG_USE_WORKING_DIR; + + TRACE("iface %p, desc %p, iid %s, session %p.\n", iface, desc, debugstr_guid(iid), session); + + if (!desc || !memcmp(&desc->Identifier, &guid_null, sizeof(desc->Identifier))) + { + WARN("No description or identifier, returning E_INVALIDARG.\n"); + return E_INVALIDARG; + } + if (desc->MaximumValueFileSizeBytes > 1024 * 1024 * 1024) + { + WARN("Requested size is larger than 1GiB, returning E_INVALIDARG.\n"); + return E_INVALIDARG; + } + if (desc->Flags & ~valid_flags) + { + WARN("Invalid flags %#x, returning E_INVALIDARG.\n", desc->Flags); + return E_INVALIDARG; + } + if (desc->Mode != D3D12_SHADER_CACHE_MODE_MEMORY && desc->Mode != D3D12_SHADER_CACHE_MODE_DISK) + { + WARN("Invalid mode %#x, returning E_INVALIDARG.\n", desc->Mode); + return E_INVALIDARG; + } + if (!session) + { + WARN("No output pointer, returning S_FALSE.\n"); + return S_FALSE; + } + *session = NULL; + + if (!(object = vkd3d_malloc(sizeof(*object)))) + return E_OUTOFMEMORY; + + if (FAILED(hr = d3d12_cache_session_init(object, device, desc))) + { + vkd3d_free(object); + return hr; + } + + hr = ID3D12ShaderCacheSession_QueryInterface(&object->ID3D12ShaderCacheSession_iface, iid, + session); + ID3D12ShaderCacheSession_Release(&object->ID3D12ShaderCacheSession_iface); + return hr; +} + +static HRESULT STDMETHODCALLTYPE d3d12_device_ShaderCacheControl(ID3D12Device9 *iface, + D3D12_SHADER_CACHE_KIND_FLAGS kinds, D3D12_SHADER_CACHE_CONTROL_FLAGS control) +{ + FIXME("iface %p, kinds %#x control %#x stub!\n", iface, kinds, control); + + return E_NOTIMPL; +} + +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue1(ID3D12Device9 *iface, + const D3D12_COMMAND_QUEUE_DESC *desc, REFIID creator_id, REFIID iid, + void **command_queue) +{ + FIXME("iface %p, desc %p, creator %s, iid %s, queue %p stub!\n", iface, desc, + debugstr_guid(creator_id), debugstr_guid(iid), command_queue); + + return E_NOTIMPL; +} + +static const struct ID3D12Device9Vtbl d3d12_device_vtbl = { /* IUnknown methods */ d3d12_device_QueryInterface, @@ -4596,14 +5246,24 @@ static const struct ID3D12Device7Vtbl d3d12_device_vtbl = /* ID3D12Device7 methods */ d3d12_device_AddToStateObject, d3d12_device_CreateProtectedResourceSession1, + /* ID3D12Device8 methods */ + d3d12_device_GetResourceAllocationInfo2, + d3d12_device_CreateCommittedResource2, + d3d12_device_CreatePlacedResource1, + d3d12_device_CreateSamplerFeedbackUnorderedAccessView, + d3d12_device_GetCopyableFootprints1, + /* ID3D12Device9 methods */ + d3d12_device_CreateShaderCacheSession, + d3d12_device_ShaderCacheControl, + d3d12_device_CreateCommandQueue1, }; -struct d3d12_device *unsafe_impl_from_ID3D12Device7(ID3D12Device7 *iface) +struct d3d12_device *unsafe_impl_from_ID3D12Device9(ID3D12Device9 *iface) { if (!iface) return NULL; assert(iface->lpVtbl == &d3d12_device_vtbl); - return impl_from_ID3D12Device7(iface); + return impl_from_ID3D12Device9(iface); } static void *device_worker_main(void *arg) @@ -4646,13 +5306,15 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, const struct vkd3d_vk_device_procs *vk_procs; HRESULT hr; - device->ID3D12Device7_iface.lpVtbl = &d3d12_device_vtbl; + device->ID3D12Device9_iface.lpVtbl = &d3d12_device_vtbl; device->refcount = 1; vkd3d_instance_incref(device->vkd3d_instance = instance); device->vk_info = instance->vk_info; device->signal_event = instance->signal_event; device->wchar_size = instance->wchar_size; + device->environment = (instance->vk_api_version >= VK_API_VERSION_1_1) + ? VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_1 : VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0; device->adapter_luid = create_info->adapter_luid; device->removed_reason = S_OK; @@ -4894,28 +5556,28 @@ HRESULT vkd3d_join_thread(struct vkd3d_instance *instance, union vkd3d_thread_ha IUnknown *vkd3d_get_device_parent(ID3D12Device *device) { - struct d3d12_device *d3d12_device = impl_from_ID3D12Device7((ID3D12Device7 *)device); + struct d3d12_device *d3d12_device = impl_from_ID3D12Device9((ID3D12Device9 *)device); return d3d12_device->parent; } VkDevice vkd3d_get_vk_device(ID3D12Device *device) { - struct d3d12_device *d3d12_device = impl_from_ID3D12Device7((ID3D12Device7 *)device); + struct d3d12_device *d3d12_device = impl_from_ID3D12Device9((ID3D12Device9 *)device); return d3d12_device->vk_device; } VkPhysicalDevice vkd3d_get_vk_physical_device(ID3D12Device *device) { - struct d3d12_device *d3d12_device = impl_from_ID3D12Device7((ID3D12Device7 *)device); + struct d3d12_device *d3d12_device = impl_from_ID3D12Device9((ID3D12Device9 *)device); return d3d12_device->vk_physical_device; } struct vkd3d_instance *vkd3d_instance_from_device(ID3D12Device *device) { - struct d3d12_device *d3d12_device = impl_from_ID3D12Device7((ID3D12Device7 *)device); + struct d3d12_device *d3d12_device = impl_from_ID3D12Device9((ID3D12Device9 *)device); return d3d12_device->vkd3d_instance; } diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c index 89764d0901d..c897d9f2c5a 100644 --- a/libs/vkd3d/libs/vkd3d/resource.c +++ b/libs/vkd3d/libs/vkd3d/resource.c @@ -1271,7 +1271,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_QueryInterface(ID3D12Resource2 * || IsEqualGUID(riid, &IID_ID3D12Object) || IsEqualGUID(riid, &IID_IUnknown)) { - ID3D12Resource_AddRef(iface); + ID3D12Resource2_AddRef(iface); *object = iface; return S_OK; } @@ -1857,6 +1857,7 @@ static bool d3d12_resource_validate_texture_alignment(const D3D12_RESOURCE_DESC1 HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC1 *desc, struct d3d12_device *device) { + const D3D12_MIP_REGION *mip_region = &desc->SamplerFeedbackMipRegion; const struct vkd3d_format *format; switch (desc->Dimension) @@ -1892,6 +1893,13 @@ HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC1 *desc, struct d3 WARN("Invalid sample count 0.\n"); return E_INVALIDARG; } + if (desc->SampleDesc.Count > 1 + && !(desc->Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL))) + { + WARN("Sample count %u invalid without ALLOW_RENDER_TARGET or ALLOW_DEPTH_STENCIL.\n", + desc->SampleDesc.Count); + return E_INVALIDARG; + } if (!(format = vkd3d_format_from_d3d12_resource_desc(device, desc, 0))) { @@ -1926,6 +1934,12 @@ HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC1 *desc, struct d3 d3d12_validate_resource_flags(desc->Flags); + if (mip_region->Width && mip_region->Height && mip_region->Depth) + { + FIXME("Unhandled sampler feedback mip region size (%u, %u, %u).\n", mip_region->Width, mip_region->Height, + mip_region->Depth); + } + return S_OK; } @@ -1989,6 +2003,11 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12 WARN("Invalid initial resource state %#x.\n", initial_state); return E_INVALIDARG; } + if (initial_state == D3D12_RESOURCE_STATE_RENDER_TARGET && !(desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)) + { + WARN("Invalid initial resource state %#x for non-render-target.\n", initial_state); + return E_INVALIDARG; + } if (optimized_clear_value && d3d12_resource_is_buffer(resource)) { @@ -2253,7 +2272,7 @@ HRESULT d3d12_reserved_resource_create(struct d3d12_device *device, HRESULT vkd3d_create_image_resource(ID3D12Device *device, const struct vkd3d_image_resource_create_info *create_info, ID3D12Resource **resource) { - struct d3d12_device *d3d12_device = unsafe_impl_from_ID3D12Device7((ID3D12Device7 *)device); + struct d3d12_device *d3d12_device = unsafe_impl_from_ID3D12Device9((ID3D12Device9 *)device); struct d3d12_resource *object; HRESULT hr; @@ -2331,16 +2350,16 @@ static void *vkd3d_desc_object_cache_get(struct vkd3d_desc_object_cache *cache) i = vkd3d_atomic_increment_u32(&cache->next_index) & HEAD_INDEX_MASK; for (;;) { - if (vkd3d_atomic_compare_exchange(&cache->heads[i].spinlock, 0, 1)) + if (vkd3d_atomic_compare_exchange_u32(&cache->heads[i].spinlock, 0, 1)) { if ((u.object = cache->heads[i].head)) { vkd3d_atomic_decrement_u32(&cache->free_count); cache->heads[i].head = u.header->next; - vkd3d_atomic_exchange(&cache->heads[i].spinlock, 0); + vkd3d_atomic_exchange_u32(&cache->heads[i].spinlock, 0); return u.object; } - vkd3d_atomic_exchange(&cache->heads[i].spinlock, 0); + vkd3d_atomic_exchange_u32(&cache->heads[i].spinlock, 0); } /* Keeping a free count avoids uncertainty over when this loop should terminate, * which could result in excess allocations gradually increasing without limit. */ @@ -2362,7 +2381,7 @@ static void vkd3d_desc_object_cache_push(struct vkd3d_desc_object_cache *cache, i = vkd3d_atomic_increment_u32(&cache->next_index) & HEAD_INDEX_MASK; for (;;) { - if (vkd3d_atomic_compare_exchange(&cache->heads[i].spinlock, 0, 1)) + if (vkd3d_atomic_compare_exchange_u32(&cache->heads[i].spinlock, 0, 1)) break; i = (i + 1) & HEAD_INDEX_MASK; } @@ -2370,7 +2389,7 @@ static void vkd3d_desc_object_cache_push(struct vkd3d_desc_object_cache *cache, head = cache->heads[i].head; u.header->next = head; cache->heads[i].head = u.object; - vkd3d_atomic_exchange(&cache->heads[i].spinlock, 0); + vkd3d_atomic_exchange_u32(&cache->heads[i].spinlock, 0); vkd3d_atomic_increment_u32(&cache->free_count); } @@ -2454,7 +2473,7 @@ void vkd3d_view_decref(void *view, struct d3d12_device *device) static inline void d3d12_desc_replace(struct d3d12_desc *dst, void *view, struct d3d12_device *device) { - if ((view = vkd3d_atomic_exchange_pointer(&dst->s.u.object, view))) + if ((view = vkd3d_atomic_exchange_ptr(&dst->s.u.object, view))) vkd3d_view_decref(view, device); } @@ -2633,7 +2652,7 @@ void d3d12_desc_flush_vk_heap_updates_locked(struct d3d12_descriptor_heap *descr union d3d12_desc_object u; unsigned int i, next; - if ((i = vkd3d_atomic_exchange(&descriptor_heap->dirty_list_head, UINT_MAX)) == UINT_MAX) + if ((i = vkd3d_atomic_exchange_u32(&descriptor_heap->dirty_list_head, UINT_MAX)) == UINT_MAX) return; writes.null_vk_cbv_info.buffer = VK_NULL_HANDLE; @@ -2648,7 +2667,7 @@ void d3d12_desc_flush_vk_heap_updates_locked(struct d3d12_descriptor_heap *descr for (; i != UINT_MAX; i = next) { src = &descriptors[i]; - next = vkd3d_atomic_exchange(&src->next, 0); + next = vkd3d_atomic_exchange_u32(&src->next, 0); next = (int)next >> 1; /* A race exists here between updating src->next and getting the current object. The best @@ -2676,13 +2695,13 @@ static void d3d12_desc_mark_as_modified(struct d3d12_desc *dst, struct d3d12_des head = descriptor_heap->dirty_list_head; /* Only one thread can swap the value away from zero. */ - if (!vkd3d_atomic_compare_exchange(&dst->next, 0, (head << 1) | 1)) + if (!vkd3d_atomic_compare_exchange_u32(&dst->next, 0, (head << 1) | 1)) return; /* Now it is safe to modify 'next' to another nonzero value if necessary. */ - while (!vkd3d_atomic_compare_exchange(&descriptor_heap->dirty_list_head, head, i)) + while (!vkd3d_atomic_compare_exchange_u32(&descriptor_heap->dirty_list_head, head, i)) { head = descriptor_heap->dirty_list_head; - vkd3d_atomic_exchange(&dst->next, (head << 1) | 1); + vkd3d_atomic_exchange_u32(&dst->next, (head << 1) | 1); } } @@ -4265,12 +4284,14 @@ static HRESULT d3d12_descriptor_heap_create_descriptor_set(struct d3d12_descript VkDescriptorSetVariableDescriptorCountAllocateInfoEXT set_size; VkDescriptorSetAllocateInfo set_desc; VkResult vr; + HRESULT hr; if (!device->vk_descriptor_heap_layouts[set].vk_set_layout) { /* Set 0 uses mutable descriptors, and this set is unused. */ - if (!descriptor_heap->vk_descriptor_sets[0].vk_set) - d3d12_descriptor_heap_create_descriptor_set(descriptor_heap, device, 0); + if (!descriptor_heap->vk_descriptor_sets[0].vk_set + && FAILED(hr = d3d12_descriptor_heap_create_descriptor_set(descriptor_heap, device, 0))) + return hr; descriptor_set->vk_set = descriptor_heap->vk_descriptor_sets[0].vk_set; descriptor_set->vk_type = device->vk_descriptor_heap_layouts[set].type; return S_OK; diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c index 08cc110e8f7..7197193523d 100644 --- a/libs/vkd3d/libs/vkd3d/state.c +++ b/libs/vkd3d/libs/vkd3d/state.c @@ -2045,6 +2045,9 @@ static ULONG STDMETHODCALLTYPE d3d12_pipeline_state_Release(ID3D12PipelineState d3d12_pipeline_uav_counter_state_cleanup(&state->uav_counters, device); + if (state->implicit_root_signature) + d3d12_root_signature_Release(state->implicit_root_signature); + vkd3d_free(state); d3d12_device_release(device); @@ -2156,6 +2159,8 @@ static unsigned int feature_flags_compile_option(const struct d3d12_device *devi flags |= VKD3D_SHADER_COMPILE_OPTION_FEATURE_INT64; if (device->feature_options.DoublePrecisionFloatShaderOps) flags |= VKD3D_SHADER_COMPILE_OPTION_FEATURE_FLOAT64; + if (device->feature_options1.WaveOps) + flags |= VKD3D_SHADER_COMPILE_OPTION_FEATURE_WAVE_OPS; return flags; } @@ -2173,7 +2178,7 @@ static HRESULT create_shader_stage(struct d3d12_device *device, const struct vkd3d_shader_compile_option options[] = { - {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_11}, + {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_12}, {VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV, typed_uav_compile_option(device)}, {VKD3D_SHADER_COMPILE_OPTION_WRITE_TESS_GEOM_POINT_SIZE, 0}, {VKD3D_SHADER_COMPILE_OPTION_FEATURE, feature_flags_compile_option(device)}, @@ -2228,7 +2233,7 @@ static int vkd3d_scan_dxbc(const struct d3d12_device *device, const D3D12_SHADER const struct vkd3d_shader_compile_option options[] = { - {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_11}, + {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_12}, {VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV, typed_uav_compile_option(device)}, }; @@ -2413,8 +2418,8 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; struct vkd3d_shader_interface_info shader_interface; struct vkd3d_shader_descriptor_offset_info offset_info; - const struct d3d12_root_signature *root_signature; struct vkd3d_shader_spirv_target_info target_info; + struct d3d12_root_signature *root_signature; VkPipelineLayout vk_pipeline_layout; HRESULT hr; @@ -2425,17 +2430,31 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st if (!(root_signature = unsafe_impl_from_ID3D12RootSignature(desc->root_signature))) { - WARN("Root signature is NULL.\n"); - return E_INVALIDARG; + TRACE("Root signature is NULL, looking for an embedded signature.\n"); + if (FAILED(hr = d3d12_root_signature_create(device, + desc->cs.pShaderBytecode, desc->cs.BytecodeLength, &root_signature))) + { + WARN("Failed to find an embedded root signature, hr %s.\n", debugstr_hresult(hr)); + return hr; + } + state->implicit_root_signature = &root_signature->ID3D12RootSignature_iface; + } + else + { + state->implicit_root_signature = NULL; } if (FAILED(hr = d3d12_pipeline_state_find_and_init_uav_counters(state, device, root_signature, &desc->cs, VK_SHADER_STAGE_COMPUTE_BIT))) + { + if (state->implicit_root_signature) + d3d12_root_signature_Release(state->implicit_root_signature); return hr; + } memset(&target_info, 0, sizeof(target_info)); target_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SPIRV_TARGET_INFO; - target_info.environment = VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0; + target_info.environment = device->environment; target_info.extensions = device->vk_info.shader_extensions; target_info.extension_count = device->vk_info.shader_extension_count; @@ -2476,6 +2495,8 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st { WARN("Failed to create Vulkan compute pipeline, hr %s.\n", debugstr_hresult(hr)); d3d12_pipeline_uav_counter_state_cleanup(&state->uav_counters, device); + if (state->implicit_root_signature) + d3d12_root_signature_Release(state->implicit_root_signature); return hr; } @@ -2483,6 +2504,8 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st { VK_CALL(vkDestroyPipeline(device->vk_device, state->u.compute.vk_pipeline, NULL)); d3d12_pipeline_uav_counter_state_cleanup(&state->uav_counters, device); + if (state->implicit_root_signature) + d3d12_root_signature_Release(state->implicit_root_signature); return hr; } @@ -3156,7 +3179,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s ps_target_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SPIRV_TARGET_INFO; ps_target_info.next = NULL; ps_target_info.entry_point = "main"; - ps_target_info.environment = VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0; + ps_target_info.environment = device->environment; ps_target_info.extensions = vk_info->shader_extensions; ps_target_info.extension_count = vk_info->shader_extension_count; ps_target_info.parameters = ps_shader_parameters; @@ -3186,7 +3209,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s memset(&target_info, 0, sizeof(target_info)); target_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SPIRV_TARGET_INFO; - target_info.environment = VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0; + target_info.environment = device->environment; target_info.extensions = vk_info->shader_extensions; target_info.extension_count = vk_info->shader_extension_count; @@ -3484,6 +3507,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s goto fail; state->vk_bind_point = VK_PIPELINE_BIND_POINT_GRAPHICS; + state->implicit_root_signature = NULL; d3d12_device_add_ref(state->device = device); return S_OK; @@ -3887,7 +3911,7 @@ static int compile_hlsl_cs(const struct vkd3d_shader_code *hlsl, struct vkd3d_sh static const struct vkd3d_shader_compile_option options[] = { - {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_11}, + {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_12}, }; info.type = VKD3D_SHADER_STRUCTURE_TYPE_COMPILE_INFO; diff --git a/libs/vkd3d/libs/vkd3d/utils.c b/libs/vkd3d/libs/vkd3d/utils.c index ac79ae5ddff..11029c9f5f9 100644 --- a/libs/vkd3d/libs/vkd3d/utils.c +++ b/libs/vkd3d/libs/vkd3d/utils.c @@ -87,6 +87,8 @@ static const struct vkd3d_format vkd3d_formats[] = {DXGI_FORMAT_R8_SNORM, VK_FORMAT_R8_SNORM, 1, 1, 1, 1, COLOR, 1}, {DXGI_FORMAT_R8_SINT, VK_FORMAT_R8_SINT, 1, 1, 1, 1, COLOR, 1, SINT}, {DXGI_FORMAT_A8_UNORM, VK_FORMAT_R8_UNORM, 1, 1, 1, 1, COLOR, 1}, + {DXGI_FORMAT_B5G6R5_UNORM, VK_FORMAT_R5G6B5_UNORM_PACK16, 2, 1, 1, 1, COLOR, 1}, + {DXGI_FORMAT_B5G5R5A1_UNORM, VK_FORMAT_A1R5G5B5_UNORM_PACK16, 2, 1, 1, 1, COLOR, 1}, {DXGI_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_B8G8R8A8_UNORM, 4, 1, 1, 1, COLOR, 1}, {DXGI_FORMAT_B8G8R8X8_UNORM, VK_FORMAT_B8G8R8A8_UNORM, 4, 1, 1, 1, COLOR, 1}, {DXGI_FORMAT_B8G8R8A8_TYPELESS, VK_FORMAT_B8G8R8A8_UNORM, 4, 1, 1, 1, COLOR, 1, TYPELESS}, @@ -116,6 +118,9 @@ static const struct vkd3d_format vkd3d_formats[] = {DXGI_FORMAT_BC7_UNORM_SRGB, VK_FORMAT_BC7_SRGB_BLOCK, 1, 4, 4, 16, COLOR, 1}, }; +static const struct vkd3d_format format_b4g4r4a4 = + {DXGI_FORMAT_B4G4R4A4_UNORM, VK_FORMAT_A4R4G4B4_UNORM_PACK16_EXT, 2, 1, 1, 1, COLOR, 1}; + /* Each depth/stencil format is only compatible with itself in Vulkan. */ static const struct vkd3d_format vkd3d_depth_stencil_formats[] = { @@ -449,6 +454,11 @@ const struct vkd3d_format *vkd3d_get_format(const struct d3d12_device *device, return &vkd3d_formats[i]; } + /* Do not check VkPhysicalDevice4444FormatsFeaturesEXT because apps + * should query format support, which returns more detailed info. */ + if (dxgi_format == format_b4g4r4a4.dxgi_format && device->vk_info.EXT_4444_formats) + return &format_b4g4r4a4; + return NULL; } @@ -891,6 +901,30 @@ bool vkd3d_get_program_name(char program_name[PATH_MAX]) return true; } +#elif defined(WIN32) + +bool vkd3d_get_program_name(char program_name[PATH_MAX]) +{ + char buffer[MAX_PATH]; + char *p, *name; + size_t len; + + *program_name = '\0'; + len = GetModuleFileNameA(NULL, buffer, ARRAY_SIZE(buffer)); + if (!(len && len < MAX_PATH)) + return false; + + name = buffer; + if ((p = strrchr(name, '/'))) + name = p + 1; + if ((p = strrchr(name, '\\'))) + name = p + 1; + + len = strlen(name) + 1; + memcpy(program_name, name, len); + return true; +} + #else bool vkd3d_get_program_name(char program_name[PATH_MAX]) diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_main.c b/libs/vkd3d/libs/vkd3d/vkd3d_main.c index 7919b7d8760..c7431bd821b 100644 --- a/libs/vkd3d/libs/vkd3d/vkd3d_main.c +++ b/libs/vkd3d/libs/vkd3d/vkd3d_main.c @@ -71,11 +71,11 @@ HRESULT vkd3d_create_device(const struct vkd3d_device_create_info *create_info, if (!device) { - ID3D12Device_Release(&object->ID3D12Device7_iface); + ID3D12Device9_Release(&object->ID3D12Device9_iface); return S_FALSE; } - return return_interface(&object->ID3D12Device7_iface, &IID_ID3D12Device, iid, device); + return return_interface(&object->ID3D12Device9_iface, &IID_ID3D12Device, iid, device); } /* ID3D12RootSignatureDeserializer */ @@ -453,11 +453,10 @@ HRESULT vkd3d_serialize_root_signature(const D3D12_ROOT_SIGNATURE_DESC *desc, if ((ret = vkd3d_shader_serialize_root_signature(&vkd3d_desc, &dxbc, &messages)) < 0) { WARN("Failed to serialize root signature, vkd3d result %d.\n", ret); - if (error_blob && messages) - { - if (FAILED(hr = vkd3d_blob_create(messages, strlen(messages), error_blob))) - ERR("Failed to create error blob, hr %s.\n", debugstr_hresult(hr)); - } + if (!error_blob) + vkd3d_shader_free_messages(messages); + else if (messages && FAILED(hr = vkd3d_blob_create(messages, strlen(messages), error_blob))) + ERR("Failed to create error blob, hr %s.\n", debugstr_hresult(hr)); return hresult_from_vkd3d_result(ret); } vkd3d_shader_free_messages(messages); @@ -494,11 +493,10 @@ HRESULT vkd3d_serialize_versioned_root_signature(const D3D12_VERSIONED_ROOT_SIGN if ((ret = vkd3d_shader_serialize_root_signature(vkd3d_desc, &dxbc, &messages)) < 0) { WARN("Failed to serialize root signature, vkd3d result %d.\n", ret); - if (error_blob && messages) - { - if (FAILED(hr = vkd3d_blob_create(messages, strlen(messages), error_blob))) - ERR("Failed to create error blob, hr %s.\n", debugstr_hresult(hr)); - } + if (!error_blob) + vkd3d_shader_free_messages(messages); + else if (messages && FAILED(hr = vkd3d_blob_create(messages, strlen(messages), error_blob))) + ERR("Failed to create error blob, hr %s.\n", debugstr_hresult(hr)); return hresult_from_vkd3d_result(ret); } vkd3d_shader_free_messages(messages); diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h index b092bb26ded..d1fa866d9e3 100644 --- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h @@ -19,15 +19,14 @@ #ifndef __VKD3D_PRIVATE_H #define __VKD3D_PRIVATE_H +#ifndef __MINGW32__ +#define WIDL_C_INLINE_WRAPPERS +#endif #define COBJMACROS #define NONAMELESSUNION #define VK_NO_PROTOTYPES #define CONST_VTABLE -#ifdef _WIN32 -# define _WIN32_WINNT 0x0600 /* for condition variables */ -#endif - #include "vkd3d_common.h" #include "vkd3d_blob.h" #include "vkd3d_memory.h" @@ -55,7 +54,7 @@ #define VKD3D_MAX_COMPATIBLE_FORMAT_COUNT 6u #define VKD3D_MAX_QUEUE_FAMILY_COUNT 3u -#define VKD3D_MAX_SHADER_EXTENSIONS 4u +#define VKD3D_MAX_SHADER_EXTENSIONS 5u #define VKD3D_MAX_SHADER_STAGES 5u #define VKD3D_MAX_VK_SYNC_OBJECTS 4u #define VKD3D_MAX_DEVICE_BLOCKED_QUEUES 16u @@ -128,11 +127,13 @@ struct vkd3d_vulkan_info bool KHR_sampler_mirror_clamp_to_edge; bool KHR_timeline_semaphore; /* EXT device extensions */ + bool EXT_4444_formats; bool EXT_calibrated_timestamps; bool EXT_conditional_rendering; bool EXT_debug_marker; bool EXT_depth_clip_enable; bool EXT_descriptor_indexing; + bool EXT_fragment_shader_interlock; bool EXT_mutable_descriptor_type; bool EXT_robustness2; bool EXT_shader_demote_to_helper_invocation; @@ -184,6 +185,7 @@ struct vkd3d_instance struct vkd3d_vulkan_info vk_info; struct vkd3d_vk_global_procs vk_global_procs; void *libvulkan; + uint32_t vk_api_version; uint64_t config_flags; enum vkd3d_api_version api_version; @@ -195,240 +197,14 @@ struct vkd3d_instance unsigned int refcount; }; -#ifdef _WIN32 - -union vkd3d_thread_handle -{ - void *handle; -}; - -struct vkd3d_mutex -{ - CRITICAL_SECTION lock; -}; - -struct vkd3d_cond -{ - CONDITION_VARIABLE cond; -}; - -static inline void vkd3d_mutex_init(struct vkd3d_mutex *lock) -{ - InitializeCriticalSection(&lock->lock); -} - -static inline void vkd3d_mutex_lock(struct vkd3d_mutex *lock) -{ - EnterCriticalSection(&lock->lock); -} - -static inline void vkd3d_mutex_unlock(struct vkd3d_mutex *lock) -{ - LeaveCriticalSection(&lock->lock); -} - -static inline void vkd3d_mutex_destroy(struct vkd3d_mutex *lock) -{ - DeleteCriticalSection(&lock->lock); -} - -static inline void vkd3d_cond_init(struct vkd3d_cond *cond) -{ - InitializeConditionVariable(&cond->cond); -} - -static inline void vkd3d_cond_signal(struct vkd3d_cond *cond) -{ - WakeConditionVariable(&cond->cond); -} - -static inline void vkd3d_cond_broadcast(struct vkd3d_cond *cond) -{ - WakeAllConditionVariable(&cond->cond); -} - -static inline void vkd3d_cond_wait(struct vkd3d_cond *cond, struct vkd3d_mutex *lock) -{ - if (!SleepConditionVariableCS(&cond->cond, &lock->lock, INFINITE)) - ERR("Could not sleep on the condition variable, error %lu.\n", GetLastError()); -} - -static inline void vkd3d_cond_destroy(struct vkd3d_cond *cond) -{ -} - -static inline bool vkd3d_atomic_compare_exchange(unsigned int volatile *x, unsigned int cmp, unsigned int xchg) -{ - return InterlockedCompareExchange((LONG volatile *)x, xchg, cmp) == cmp; -} - -static inline unsigned int vkd3d_atomic_exchange(unsigned int volatile *x, unsigned int val) -{ - return InterlockedExchange((LONG volatile *)x, val); -} - -static inline bool vkd3d_atomic_compare_exchange_pointer(void * volatile *x, void *cmp, void *xchg) -{ - return InterlockedCompareExchangePointer(x, xchg, cmp) == cmp; -} - -static inline void *vkd3d_atomic_exchange_pointer(void * volatile *x, void *val) -{ - return InterlockedExchangePointer(x, val); -} - -#else /* _WIN32 */ - -#include - union vkd3d_thread_handle { +#ifndef _WIN32 pthread_t pthread; +#endif void *handle; }; -struct vkd3d_mutex -{ - pthread_mutex_t lock; -}; - -struct vkd3d_cond -{ - pthread_cond_t cond; -}; - - -static inline void vkd3d_mutex_init(struct vkd3d_mutex *lock) -{ - int ret; - - ret = pthread_mutex_init(&lock->lock, NULL); - if (ret) - ERR("Could not initialize the mutex, error %d.\n", ret); -} - -static inline void vkd3d_mutex_lock(struct vkd3d_mutex *lock) -{ - int ret; - - ret = pthread_mutex_lock(&lock->lock); - if (ret) - ERR("Could not lock the mutex, error %d.\n", ret); -} - -static inline void vkd3d_mutex_unlock(struct vkd3d_mutex *lock) -{ - int ret; - - ret = pthread_mutex_unlock(&lock->lock); - if (ret) - ERR("Could not unlock the mutex, error %d.\n", ret); -} - -static inline void vkd3d_mutex_destroy(struct vkd3d_mutex *lock) -{ - int ret; - - ret = pthread_mutex_destroy(&lock->lock); - if (ret) - ERR("Could not destroy the mutex, error %d.\n", ret); -} - -static inline void vkd3d_cond_init(struct vkd3d_cond *cond) -{ - int ret; - - ret = pthread_cond_init(&cond->cond, NULL); - if (ret) - ERR("Could not initialize the condition variable, error %d.\n", ret); -} - -static inline void vkd3d_cond_signal(struct vkd3d_cond *cond) -{ - int ret; - - ret = pthread_cond_signal(&cond->cond); - if (ret) - ERR("Could not signal the condition variable, error %d.\n", ret); -} - -static inline void vkd3d_cond_broadcast(struct vkd3d_cond *cond) -{ - int ret; - - ret = pthread_cond_broadcast(&cond->cond); - if (ret) - ERR("Could not broadcast the condition variable, error %d.\n", ret); -} - -static inline void vkd3d_cond_wait(struct vkd3d_cond *cond, struct vkd3d_mutex *lock) -{ - int ret; - - ret = pthread_cond_wait(&cond->cond, &lock->lock); - if (ret) - ERR("Could not wait on the condition variable, error %d.\n", ret); -} - -static inline void vkd3d_cond_destroy(struct vkd3d_cond *cond) -{ - int ret; - - ret = pthread_cond_destroy(&cond->cond); - if (ret) - ERR("Could not destroy the condition variable, error %d.\n", ret); -} - -# if HAVE_SYNC_BOOL_COMPARE_AND_SWAP -static inline bool vkd3d_atomic_compare_exchange(unsigned int volatile *x, unsigned int cmp, unsigned int xchg) -{ - return __sync_bool_compare_and_swap(x, cmp, xchg); -} - -static inline bool vkd3d_atomic_compare_exchange_pointer(void * volatile *x, void *cmp, void *xchg) -{ - return __sync_bool_compare_and_swap(x, cmp, xchg); -} -# else -# error "vkd3d_atomic_compare_exchange() not implemented for this platform" -# endif - -# if HAVE_ATOMIC_EXCHANGE_N -static inline unsigned int vkd3d_atomic_exchange(unsigned int volatile *x, unsigned int val) -{ - return __atomic_exchange_n(x, val, __ATOMIC_SEQ_CST); -} - -static inline void *vkd3d_atomic_exchange_pointer(void * volatile *x, void *val) -{ - return __atomic_exchange_n(x, val, __ATOMIC_SEQ_CST); -} -# elif HAVE_SYNC_BOOL_COMPARE_AND_SWAP -static inline unsigned int vkd3d_atomic_exchange(unsigned int volatile *x, unsigned int val) -{ - unsigned int i; - do - { - i = *x; - } while (!__sync_bool_compare_and_swap(x, i, val)); - return i; -} - -static inline void *vkd3d_atomic_exchange_pointer(void * volatile *x, void *val) -{ - void *p; - do - { - p = *x; - } while (!__sync_bool_compare_and_swap(x, p, val)); - return p; -} -# else -# error "vkd3d_atomic_exchange() not implemented for this platform" -# endif - -#endif /* _WIN32 */ - HRESULT vkd3d_create_thread(struct vkd3d_instance *instance, PFN_vkd3d_thread thread_main, void *data, union vkd3d_thread_handle *thread); HRESULT vkd3d_join_thread(struct vkd3d_instance *instance, union vkd3d_thread_handle *thread); @@ -890,7 +666,7 @@ static inline bool vkd3d_view_incref(void *desc) if (refcount <= 0) return false; } - while (!vkd3d_atomic_compare_exchange(&h->refcount, refcount, refcount + 1)); + while (!vkd3d_atomic_compare_exchange_u32(&h->refcount, refcount, refcount + 1)); return true; } @@ -1279,6 +1055,7 @@ struct d3d12_pipeline_state struct d3d12_pipeline_uav_counter_state uav_counters; + ID3D12RootSignature *implicit_root_signature; struct d3d12_device *device; struct vkd3d_private_store private_store; @@ -1735,7 +1512,7 @@ struct vkd3d_desc_object_cache /* ID3D12Device */ struct d3d12_device { - ID3D12Device7 ID3D12Device7_iface; + ID3D12Device9 ID3D12Device9_iface; unsigned int refcount; VkDevice vk_device; @@ -1743,6 +1520,7 @@ struct d3d12_device struct vkd3d_vk_device_procs vk_procs; PFN_vkd3d_signal_event signal_event; size_t wchar_size; + enum vkd3d_shader_spirv_environment environment; struct vkd3d_gpu_va_allocator gpu_va_allocator; @@ -1810,29 +1588,29 @@ struct vkd3d_queue *d3d12_device_get_vkd3d_queue(struct d3d12_device *device, D3 bool d3d12_device_is_uma(struct d3d12_device *device, bool *coherent); void d3d12_device_mark_as_removed(struct d3d12_device *device, HRESULT reason, const char *message, ...) VKD3D_PRINTF_FUNC(3, 4); -struct d3d12_device *unsafe_impl_from_ID3D12Device7(ID3D12Device7 *iface); +struct d3d12_device *unsafe_impl_from_ID3D12Device9(ID3D12Device9 *iface); HRESULT d3d12_device_add_descriptor_heap(struct d3d12_device *device, struct d3d12_descriptor_heap *heap); void d3d12_device_remove_descriptor_heap(struct d3d12_device *device, struct d3d12_descriptor_heap *heap); static inline HRESULT d3d12_device_query_interface(struct d3d12_device *device, REFIID iid, void **object) { - return ID3D12Device7_QueryInterface(&device->ID3D12Device7_iface, iid, object); + return ID3D12Device9_QueryInterface(&device->ID3D12Device9_iface, iid, object); } static inline ULONG d3d12_device_add_ref(struct d3d12_device *device) { - return ID3D12Device7_AddRef(&device->ID3D12Device7_iface); + return ID3D12Device9_AddRef(&device->ID3D12Device9_iface); } static inline ULONG d3d12_device_release(struct d3d12_device *device) { - return ID3D12Device7_Release(&device->ID3D12Device7_iface); + return ID3D12Device9_Release(&device->ID3D12Device9_iface); } static inline unsigned int d3d12_device_get_descriptor_handle_increment_size(struct d3d12_device *device, D3D12_DESCRIPTOR_HEAP_TYPE descriptor_type) { - return ID3D12Device7_GetDescriptorHandleIncrementSize(&device->ID3D12Device7_iface, descriptor_type); + return ID3D12Device9_GetDescriptorHandleIncrementSize(&device->ID3D12Device9_iface, descriptor_type); } /* utils */ @@ -1993,4 +1771,14 @@ static inline void vkd3d_prepend_struct(void *header, void *structure) vkd3d_header->next = vkd3d_structure; } +struct vkd3d_shader_cache; + +int vkd3d_shader_open_cache(struct vkd3d_shader_cache **cache); +unsigned int vkd3d_shader_cache_incref(struct vkd3d_shader_cache *cache); +unsigned int vkd3d_shader_cache_decref(struct vkd3d_shader_cache *cache); +int vkd3d_shader_cache_put(struct vkd3d_shader_cache *cache, + const void *key, size_t key_size, const void *value, size_t value_size); +int vkd3d_shader_cache_get(struct vkd3d_shader_cache *cache, + const void *key, size_t key_size, void *value, size_t *value_size); + #endif /* __VKD3D_PRIVATE_H */