vkd3d: Import upstream release 1.4.

Signed-off-by: Alexandre Julliard <julliard@winehq.org>
This commit is contained in:
Alexandre Julliard 2022-06-22 18:44:03 +02:00
parent a40973f204
commit 072eb3a540
32 changed files with 4092 additions and 1122 deletions

View file

@ -5,8 +5,10 @@ Atharva Nimbalkar
Biswapriyo Nath
Chip Davis
Conor McCarthy
David Gow
Derek Lesho
Francisco Casas
Francois Gouget
Giovanni Mascellani
Hans-Kristian Arntzen
Henri Verbeet

View file

@ -1,5 +1,5 @@
#define PACKAGE_NAME "vkd3d"
#define PACKAGE_STRING "vkd3d 1.3"
#define PACKAGE_VERSION "1.3"
#define PACKAGE_STRING "vkd3d 1.4"
#define PACKAGE_VERSION "1.4"
#define PATH_MAX 1024
#define SONAME_LIBVULKAN "vulkan-1.dll"

View file

@ -27,6 +27,7 @@
#include <limits.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#ifdef _MSC_VER
#include <intrin.h>
@ -210,6 +211,10 @@ static inline LONG InterlockedIncrement(LONG volatile *x)
{
return __sync_add_and_fetch(x, 1);
}
static inline LONG64 InterlockedIncrement64(LONG64 volatile *x)
{
return __sync_add_and_fetch(x, 1);
}
static inline LONG InterlockedAdd(LONG volatile *x, LONG val)
{
return __sync_add_and_fetch(x, val);
@ -242,4 +247,74 @@ static inline void vkd3d_parse_version(const char *version, int *major, int *min
HRESULT hresult_from_vkd3d_result(int vkd3d_result);
#ifdef _WIN32
static inline void *vkd3d_dlopen(const char *name)
{
return LoadLibraryA(name);
}
static inline void *vkd3d_dlsym(void *handle, const char *symbol)
{
return GetProcAddress(handle, symbol);
}
static inline int vkd3d_dlclose(void *handle)
{
return FreeLibrary(handle);
}
static inline const char *vkd3d_dlerror(void)
{
unsigned int error = GetLastError();
static char message[256];
if (FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, NULL, error, 0, message, sizeof(message), NULL))
return message;
sprintf(message, "Unknown error %u.\n", error);
return message;
}
#elif defined(HAVE_DLFCN_H)
#include <dlfcn.h>
static inline void *vkd3d_dlopen(const char *name)
{
return dlopen(name, RTLD_NOW);
}
static inline void *vkd3d_dlsym(void *handle, const char *symbol)
{
return dlsym(handle, symbol);
}
static inline int vkd3d_dlclose(void *handle)
{
return dlclose(handle);
}
static inline const char *vkd3d_dlerror(void)
{
return dlerror();
}
#else
static inline void *vkd3d_dlopen(const char *name)
{
return NULL;
}
static inline void *vkd3d_dlsym(void *handle, const char *symbol)
{
return NULL;
}
static inline int vkd3d_dlclose(void *handle)
{
return 0;
}
static inline const char *vkd3d_dlerror(void)
{
return "Not implemented for this platform.\n";
}
#endif
#endif /* __VKD3D_COMMON_H */

View file

@ -47,6 +47,7 @@ enum vkd3d_dbg_level
enum vkd3d_dbg_level vkd3d_dbg_get_level(void);
void vkd3d_dbg_printf(enum vkd3d_dbg_level level, const char *function, const char *fmt, ...) VKD3D_PRINTF_FUNC(3, 4);
void vkd3d_dbg_set_log_callback(PFN_vkd3d_log callback);
const char *vkd3d_dbg_sprintf(const char *fmt, ...) VKD3D_PRINTF_FUNC(1, 2);
const char *vkd3d_dbg_vsprintf(const char *fmt, va_list args);

View file

@ -1 +1 @@
#define VKD3D_VCS_ID " (git d773dc05c687)"
#define VKD3D_VCS_ID " (git 9d4df5e70468)"

View file

@ -60,6 +60,7 @@ enum vkd3d_api_version
VKD3D_API_VERSION_1_1,
VKD3D_API_VERSION_1_2,
VKD3D_API_VERSION_1_3,
VKD3D_API_VERSION_1_4,
};
typedef HRESULT (*PFN_vkd3d_signal_event)(HANDLE event);
@ -212,6 +213,20 @@ VKD3D_API HRESULT vkd3d_serialize_versioned_root_signature(const D3D12_VERSIONED
VKD3D_API HRESULT vkd3d_create_versioned_root_signature_deserializer(const void *data, SIZE_T data_size,
REFIID iid, void **deserializer);
/**
* Set a callback to be called when vkd3d outputs debug logging.
*
* If NULL, or if this function has not been called, libvkd3d will print all
* enabled log output to stderr.
*
* Calling this function will also set the log callback for libvkd3d-shader.
*
* \param callback Callback function to set.
*
* \since 1.4
*/
VKD3D_API void vkd3d_set_log_callback(PFN_vkd3d_log callback);
#endif /* VKD3D_NO_PROTOTYPES */
/*
@ -255,6 +270,9 @@ typedef HRESULT (*PFN_vkd3d_serialize_versioned_root_signature)(const D3D12_VERS
typedef HRESULT (*PFN_vkd3d_create_versioned_root_signature_deserializer)(const void *data, SIZE_T data_size,
REFIID iid, void **deserializer);
/** Type of vkd3d_set_log_callback(). \since 1.4 */
typedef void (*PFN_vkd3d_set_log_callback)(PFN_vkd3d_log callback);
#ifdef __cplusplus
}
#endif /* __cplusplus */

View file

@ -45,6 +45,7 @@ enum vkd3d_shader_api_version
VKD3D_SHADER_API_VERSION_1_1,
VKD3D_SHADER_API_VERSION_1_2,
VKD3D_SHADER_API_VERSION_1_3,
VKD3D_SHADER_API_VERSION_1_4,
};
/** The type of a chained structure. */
@ -1806,6 +1807,18 @@ VKD3D_SHADER_API void vkd3d_shader_free_shader_signature(struct vkd3d_shader_sig
VKD3D_SHADER_API int vkd3d_shader_preprocess(const struct vkd3d_shader_compile_info *compile_info,
struct vkd3d_shader_code *out, char **messages);
/**
* Set a callback to be called when vkd3d-shader outputs debug logging.
*
* If NULL, or if this function has not been called, libvkd3d-shader will print
* all enabled log output to stderr.
*
* \param callback Callback function to set.
*
* \since 1.4
*/
VKD3D_SHADER_API void vkd3d_shader_set_log_callback(PFN_vkd3d_log callback);
#endif /* VKD3D_SHADER_NO_PROTOTYPES */
/** Type of vkd3d_shader_get_version(). */
@ -1859,6 +1872,9 @@ typedef void (*PFN_vkd3d_shader_free_shader_signature)(struct vkd3d_shader_signa
typedef void (*PFN_vkd3d_shader_preprocess)(struct vkd3d_shader_compile_info *compile_info,
struct vkd3d_shader_code *out, char **messages);
/** Type of vkd3d_shader_set_log_callback(). \since 1.4 */
typedef void (*PFN_vkd3d_shader_set_log_callback)(PFN_vkd3d_log callback);
#ifdef __cplusplus
}
#endif /* __cplusplus */

View file

@ -19,6 +19,8 @@
#ifndef __VKD3D_TYPES_H
#define __VKD3D_TYPES_H
#include <stdarg.h>
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
@ -53,6 +55,8 @@ enum vkd3d_result
VKD3D_FORCE_32_BIT_ENUM(VKD3D_RESULT),
};
typedef void (*PFN_vkd3d_log)(const char *format, va_list args);
#ifdef _WIN32
# define VKD3D_IMPORT
# define VKD3D_EXPORT

View file

@ -68,6 +68,25 @@ enum vkd3d_dbg_level vkd3d_dbg_get_level(void)
return level;
}
static PFN_vkd3d_log log_callback;
static void vkd3d_dbg_voutput(const char *fmt, va_list args)
{
if (log_callback)
log_callback(fmt, args);
else
vfprintf(stderr, fmt, args);
}
static void vkd3d_dbg_output(const char *fmt, ...)
{
va_list args;
va_start(args, fmt);
vkd3d_dbg_voutput(fmt, args);
va_end(args);
}
void vkd3d_dbg_printf(enum vkd3d_dbg_level level, const char *function, const char *fmt, ...)
{
va_list args;
@ -77,12 +96,17 @@ void vkd3d_dbg_printf(enum vkd3d_dbg_level level, const char *function, const ch
assert(level < ARRAY_SIZE(debug_level_names));
fprintf(stderr, "%s:%s: ", debug_level_names[level], function);
vkd3d_dbg_output("%s:%s ", debug_level_names[level], function);
va_start(args, fmt);
vfprintf(stderr, fmt, args);
vkd3d_dbg_voutput(fmt, args);
va_end(args);
}
void vkd3d_dbg_set_log_callback(PFN_vkd3d_log callback)
{
log_callback = callback;
}
static char *get_buffer(void)
{
static char buffers[VKD3D_DEBUG_BUFFER_COUNT][VKD3D_DEBUG_BUFFER_SIZE];

View file

@ -837,6 +837,7 @@ static const struct vkd3d_sm4_opcode_info opcode_table[] =
{VKD3D_SM5_OP_DDIV, VKD3DSIH_DDIV, "d", "dd"},
{VKD3D_SM5_OP_DFMA, VKD3DSIH_DFMA, "d", "ddd"},
{VKD3D_SM5_OP_DRCP, VKD3DSIH_DRCP, "d", "d"},
{VKD3D_SM5_OP_MSAD, VKD3DSIH_MSAD, "u", "uuu"},
{VKD3D_SM5_OP_DTOI, VKD3DSIH_DTOI, "i", "d"},
{VKD3D_SM5_OP_DTOU, VKD3DSIH_DTOU, "u", "d"},
{VKD3D_SM5_OP_ITOD, VKD3DSIH_ITOD, "d", "i"},

View file

@ -202,6 +202,13 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type
}
}
/* Returns the size of a type, considered as part of an array of that type.
* As such it includes padding after the type. */
unsigned int hlsl_type_get_array_element_reg_size(const struct hlsl_type *type)
{
return align(type->reg_size, 4);
}
static struct hlsl_type *hlsl_new_type(struct hlsl_ctx *ctx, const char *name, enum hlsl_type_class type_class,
enum hlsl_base_type base_type, unsigned dimx, unsigned dimy)
{
@ -225,6 +232,85 @@ static struct hlsl_type *hlsl_new_type(struct hlsl_ctx *ctx, const char *name, e
return type;
}
/* Returns the register offset of a given component within a type, given its index.
* *comp_type will be set to the type of the component. */
unsigned int hlsl_compute_component_offset(struct hlsl_ctx *ctx, struct hlsl_type *type,
unsigned int idx, struct hlsl_type **comp_type)
{
switch (type->type)
{
case HLSL_CLASS_SCALAR:
case HLSL_CLASS_VECTOR:
{
assert(idx < type->dimx * type->dimy);
*comp_type = hlsl_get_scalar_type(ctx, type->base_type);
return idx;
}
case HLSL_CLASS_MATRIX:
{
unsigned int minor, major, x = idx % type->dimx, y = idx / type->dimx;
assert(idx < type->dimx * type->dimy);
if (hlsl_type_is_row_major(type))
{
minor = x;
major = y;
}
else
{
minor = y;
major = x;
}
*comp_type = hlsl_get_scalar_type(ctx, type->base_type);
return 4 * major + minor;
}
case HLSL_CLASS_ARRAY:
{
unsigned int elem_comp_count = hlsl_type_component_count(type->e.array.type);
unsigned int array_idx = idx / elem_comp_count;
unsigned int idx_in_elem = idx % elem_comp_count;
assert(array_idx < type->e.array.elements_count);
return array_idx * hlsl_type_get_array_element_reg_size(type->e.array.type) +
hlsl_compute_component_offset(ctx, type->e.array.type, idx_in_elem, comp_type);
}
case HLSL_CLASS_STRUCT:
{
struct hlsl_struct_field *field;
LIST_FOR_EACH_ENTRY(field, type->e.elements, struct hlsl_struct_field, entry)
{
unsigned int elem_comp_count = hlsl_type_component_count(field->type);
if (idx < elem_comp_count)
{
return field->reg_offset +
hlsl_compute_component_offset(ctx, field->type, idx, comp_type);
}
idx -= elem_comp_count;
}
assert(0);
return 0;
}
case HLSL_CLASS_OBJECT:
{
assert(idx == 0);
*comp_type = type;
return 0;
}
}
assert(0);
return 0;
}
struct hlsl_type *hlsl_new_array_type(struct hlsl_ctx *ctx, struct hlsl_type *basic_type, unsigned int array_size)
{
struct hlsl_type *type;
@ -556,27 +642,44 @@ struct hlsl_ir_store *hlsl_new_simple_store(struct hlsl_ctx *ctx, struct hlsl_ir
return hlsl_new_store(ctx, lhs, NULL, rhs, 0, rhs->loc);
}
struct hlsl_ir_constant *hlsl_new_int_constant(struct hlsl_ctx *ctx, int n,
const struct vkd3d_shader_location loc)
struct hlsl_ir_constant *hlsl_new_constant(struct hlsl_ctx *ctx, struct hlsl_type *type,
const struct vkd3d_shader_location *loc)
{
struct hlsl_ir_constant *c;
assert(type->type <= HLSL_CLASS_VECTOR);
if (!(c = hlsl_alloc(ctx, sizeof(*c))))
return NULL;
init_node(&c->node, HLSL_IR_CONSTANT, hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), loc);
init_node(&c->node, HLSL_IR_CONSTANT, type, *loc);
return c;
}
struct hlsl_ir_constant *hlsl_new_int_constant(struct hlsl_ctx *ctx, int n,
const struct vkd3d_shader_location *loc)
{
struct hlsl_ir_constant *c;
c = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), loc);
if (c)
c->value[0].i = n;
return c;
}
struct hlsl_ir_constant *hlsl_new_uint_constant(struct hlsl_ctx *ctx, unsigned int n,
const struct vkd3d_shader_location loc)
const struct vkd3d_shader_location *loc)
{
struct hlsl_ir_constant *c;
if (!(c = hlsl_alloc(ctx, sizeof(*c))))
return NULL;
init_node(&c->node, HLSL_IR_CONSTANT, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc);
c = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc);
if (c)
c->value[0].u = n;
return c;
}
@ -1144,7 +1247,7 @@ static void dump_ir_constant(struct vkd3d_string_buffer *buffer, const struct hl
switch (type->base_type)
{
case HLSL_TYPE_BOOL:
vkd3d_string_buffer_printf(buffer, "%s ", value->b ? "true" : "false");
vkd3d_string_buffer_printf(buffer, "%s ", value->u ? "true" : "false");
break;
case HLSL_TYPE_DOUBLE:

View file

@ -152,6 +152,7 @@ struct hlsl_struct_field
struct hlsl_type *type;
const char *name;
struct hlsl_semantic semantic;
unsigned int modifiers;
unsigned int reg_offset;
size_t name_bytecode_offset;
@ -415,7 +416,6 @@ struct hlsl_ir_constant
int32_t i;
float f;
double d;
bool b;
} value[4];
struct hlsl_reg reg;
};
@ -733,12 +733,14 @@ struct hlsl_buffer *hlsl_new_buffer(struct hlsl_ctx *ctx, enum hlsl_buffer_type
const struct hlsl_reg_reservation *reservation, struct vkd3d_shader_location loc);
struct hlsl_ir_expr *hlsl_new_cast(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_type *type,
const struct vkd3d_shader_location *loc);
struct hlsl_ir_constant *hlsl_new_constant(struct hlsl_ctx *ctx, struct hlsl_type *type,
const struct vkd3d_shader_location *loc);
struct hlsl_ir_expr *hlsl_new_copy(struct hlsl_ctx *ctx, struct hlsl_ir_node *node);
struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, struct hlsl_type *return_type,
struct list *parameters, const struct hlsl_semantic *semantic, struct vkd3d_shader_location loc);
struct hlsl_ir_if *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, struct vkd3d_shader_location loc);
struct hlsl_ir_constant *hlsl_new_int_constant(struct hlsl_ctx *ctx, int n,
const struct vkd3d_shader_location loc);
const struct vkd3d_shader_location *loc);
struct hlsl_ir_jump *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type type, struct vkd3d_shader_location loc);
struct hlsl_ir_load *hlsl_new_load(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, struct hlsl_ir_node *offset,
struct hlsl_type *type, struct vkd3d_shader_location loc);
@ -757,7 +759,7 @@ struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *nam
const struct vkd3d_shader_location loc);
struct hlsl_type *hlsl_new_texture_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, struct hlsl_type *format);
struct hlsl_ir_constant *hlsl_new_uint_constant(struct hlsl_ctx *ctx, unsigned int n,
const struct vkd3d_shader_location loc);
const struct vkd3d_shader_location *loc);
struct hlsl_ir_node *hlsl_new_unary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg,
struct vkd3d_shader_location loc);
struct hlsl_ir_var *hlsl_new_var(struct hlsl_ctx *ctx, const char *name, struct hlsl_type *type,
@ -783,6 +785,9 @@ bool hlsl_scope_add_type(struct hlsl_scope *scope, struct hlsl_type *type);
struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old,
unsigned int default_majority, unsigned int modifiers);
unsigned int hlsl_type_component_count(struct hlsl_type *type);
unsigned int hlsl_type_get_array_element_reg_size(const struct hlsl_type *type);
unsigned int hlsl_compute_component_offset(struct hlsl_ctx *ctx, struct hlsl_type *type,
unsigned int idx, struct hlsl_type **comp_type);
unsigned int hlsl_type_get_sm4_offset(const struct hlsl_type *type, unsigned int offset);
bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2);
@ -793,8 +798,7 @@ unsigned int hlsl_swizzle_from_writemask(unsigned int writemask);
bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *offset);
unsigned int hlsl_offset_from_deref_safe(struct hlsl_ctx *ctx, const struct hlsl_deref *deref);
struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref,
const struct hlsl_type *type);
struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref);
bool hlsl_fold_constants(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context);

View file

@ -197,15 +197,15 @@ row_major {return KW_ROW_MAJOR; }
return C_FLOAT;
}
0x[0-9a-fA-F]+ {
sscanf(yytext, "0x%x", &yylval->intval);
yylval->intval = vkd3d_parse_integer(yytext);
return C_INTEGER;
}
0[0-7]+ {
sscanf(yytext, "0%o", &yylval->intval);
yylval->intval = vkd3d_parse_integer(yytext);
return C_INTEGER;
}
[0-9]+ {
yylval->intval = (atoi(yytext));
yylval->intval = vkd3d_parse_integer(yytext);
return C_INTEGER;
}

File diff suppressed because it is too large Load diff

View file

@ -21,6 +21,22 @@
#include "hlsl.h"
#include <stdio.h>
static unsigned int minor_size(const struct hlsl_type *type)
{
if (type->modifiers & HLSL_MODIFIER_ROW_MAJOR)
return type->dimx;
else
return type->dimy;
}
static unsigned int major_size(const struct hlsl_type *type)
{
if (type->modifiers & HLSL_MODIFIER_ROW_MAJOR)
return type->dimy;
else
return type->dimx;
}
/* Split uniforms into two variables representing the constant and temp
* registers, and copy the former to the latter, so that writes to uniforms
* work. */
@ -34,7 +50,8 @@ static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct list *instrs, stru
/* Use the synthetic name for the temp, rather than the uniform, so that we
* can write the uniform name into the shader reflection data. */
if (!(uniform = hlsl_new_var(ctx, temp->name, temp->data_type, temp->loc, NULL, 0, &temp->reg_reservation)))
if (!(uniform = hlsl_new_var(ctx, temp->name, temp->data_type,
temp->loc, NULL, temp->modifiers, &temp->reg_reservation)))
return;
list_add_before(&temp->scope_entry, &uniform->scope_entry);
list_add_tail(&ctx->extern_vars, &uniform->extern_entry);
@ -58,7 +75,7 @@ static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct list *instrs, stru
}
static void prepend_input_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_var *var,
struct hlsl_type *type, unsigned int field_offset, const struct hlsl_semantic *semantic)
struct hlsl_type *type, unsigned int field_offset, unsigned int modifiers, const struct hlsl_semantic *semantic)
{
struct vkd3d_string_buffer *name;
struct hlsl_semantic new_semantic;
@ -67,6 +84,21 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct list *instrs, struct
struct hlsl_ir_load *load;
struct hlsl_ir_var *input;
if (type->type == HLSL_CLASS_MATRIX)
{
struct hlsl_type *vector_type = hlsl_get_vector_type(ctx, type->base_type, minor_size(type));
struct hlsl_semantic vector_semantic = *semantic;
unsigned int i;
for (i = 0; i < major_size(type); ++i)
{
prepend_input_copy(ctx, instrs, var, vector_type, 4 * i, modifiers, &vector_semantic);
++vector_semantic.index;
}
return;
}
if (!(name = hlsl_get_string_buffer(ctx)))
return;
vkd3d_string_buffer_printf(name, "<input-%s%u>", semantic->name, semantic->index);
@ -76,7 +108,8 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct list *instrs, struct
return;
}
new_semantic.index = semantic->index;
if (!(input = hlsl_new_var(ctx, hlsl_strdup(ctx, name->buffer), type, var->loc, &new_semantic, 0, NULL)))
if (!(input = hlsl_new_var(ctx, hlsl_strdup(ctx, name->buffer),
type, var->loc, &new_semantic, modifiers, NULL)))
{
hlsl_release_string_buffer(ctx, name);
vkd3d_free((void *)new_semantic.name);
@ -92,7 +125,7 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct list *instrs, struct
return;
list_add_head(instrs, &load->node.entry);
if (!(offset = hlsl_new_uint_constant(ctx, field_offset, var->loc)))
if (!(offset = hlsl_new_uint_constant(ctx, field_offset, &var->loc)))
return;
list_add_after(&load->node.entry, &offset->node.entry);
@ -111,7 +144,8 @@ static void prepend_input_struct_copy(struct hlsl_ctx *ctx, struct list *instrs,
if (field->type->type == HLSL_CLASS_STRUCT)
prepend_input_struct_copy(ctx, instrs, var, field->type, field_offset + field->reg_offset);
else if (field->semantic.name)
prepend_input_copy(ctx, instrs, var, field->type, field_offset + field->reg_offset, &field->semantic);
prepend_input_copy(ctx, instrs, var, field->type,
field_offset + field->reg_offset, field->modifiers, &field->semantic);
else
hlsl_error(ctx, &field->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC,
"Field '%s' is missing a semantic.", field->name);
@ -125,11 +159,11 @@ static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct list *instrs, st
if (var->data_type->type == HLSL_CLASS_STRUCT)
prepend_input_struct_copy(ctx, instrs, var, var->data_type, 0);
else if (var->semantic.name)
prepend_input_copy(ctx, instrs, var, var->data_type, 0, &var->semantic);
prepend_input_copy(ctx, instrs, var, var->data_type, 0, var->modifiers, &var->semantic);
}
static void append_output_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_var *var,
struct hlsl_type *type, unsigned int field_offset, const struct hlsl_semantic *semantic)
struct hlsl_type *type, unsigned int field_offset, unsigned int modifiers, const struct hlsl_semantic *semantic)
{
struct vkd3d_string_buffer *name;
struct hlsl_semantic new_semantic;
@ -138,6 +172,21 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct list *instrs, struct
struct hlsl_ir_var *output;
struct hlsl_ir_load *load;
if (type->type == HLSL_CLASS_MATRIX)
{
struct hlsl_type *vector_type = hlsl_get_vector_type(ctx, type->base_type, minor_size(type));
struct hlsl_semantic vector_semantic = *semantic;
unsigned int i;
for (i = 0; i < major_size(type); ++i)
{
append_output_copy(ctx, instrs, var, vector_type, 4 * i, modifiers, &vector_semantic);
++vector_semantic.index;
}
return;
}
if (!(name = hlsl_get_string_buffer(ctx)))
return;
vkd3d_string_buffer_printf(name, "<output-%s%u>", semantic->name, semantic->index);
@ -147,7 +196,8 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct list *instrs, struct
return;
}
new_semantic.index = semantic->index;
if (!(output = hlsl_new_var(ctx, hlsl_strdup(ctx, name->buffer), type, var->loc, &new_semantic, 0, NULL)))
if (!(output = hlsl_new_var(ctx, hlsl_strdup(ctx, name->buffer),
type, var->loc, &new_semantic, modifiers, NULL)))
{
vkd3d_free((void *)new_semantic.name);
hlsl_release_string_buffer(ctx, name);
@ -159,7 +209,7 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct list *instrs, struct
list_add_before(&var->scope_entry, &output->scope_entry);
list_add_tail(&ctx->extern_vars, &output->extern_entry);
if (!(offset = hlsl_new_uint_constant(ctx, field_offset, var->loc)))
if (!(offset = hlsl_new_uint_constant(ctx, field_offset, &var->loc)))
return;
list_add_tail(instrs, &offset->node.entry);
@ -182,7 +232,8 @@ static void append_output_struct_copy(struct hlsl_ctx *ctx, struct list *instrs,
if (field->type->type == HLSL_CLASS_STRUCT)
append_output_struct_copy(ctx, instrs, var, field->type, field_offset + field->reg_offset);
else if (field->semantic.name)
append_output_copy(ctx, instrs, var, field->type, field_offset + field->reg_offset, &field->semantic);
append_output_copy(ctx, instrs, var, field->type,
field_offset + field->reg_offset, field->modifiers, &field->semantic);
else
hlsl_error(ctx, &field->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC,
"Field '%s' is missing a semantic.", field->name);
@ -197,7 +248,7 @@ static void append_output_var_copy(struct hlsl_ctx *ctx, struct list *instrs, st
if (var->data_type->type == HLSL_CLASS_STRUCT)
append_output_struct_copy(ctx, instrs, var, var->data_type, 0);
else if (var->semantic.name)
append_output_copy(ctx, instrs, var, var->data_type, 0, &var->semantic);
append_output_copy(ctx, instrs, var, var->data_type, 0, var->modifiers, &var->semantic);
}
static bool transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *),
@ -241,6 +292,7 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, v
&& src_type->type <= HLSL_CLASS_VECTOR && dst_type->type <= HLSL_CLASS_VECTOR
&& src_type->dimx == 1)
{
struct hlsl_ir_node *replacement;
struct hlsl_ir_swizzle *swizzle;
struct hlsl_ir_expr *new_cast;
@ -250,19 +302,33 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, v
if (!(new_cast = hlsl_new_cast(ctx, cast->operands[0].node, dst_scalar_type, &cast->node.loc)))
return false;
list_add_after(&cast->node.entry, &new_cast->node.entry);
if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), dst_type->dimx, &new_cast->node, &cast->node.loc)))
replacement = &new_cast->node;
if (dst_type->dimx != 1)
{
if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), dst_type->dimx, replacement, &cast->node.loc)))
return false;
list_add_after(&new_cast->node.entry, &swizzle->node.entry);
replacement = &swizzle->node;
}
hlsl_replace_node(&cast->node, &swizzle->node);
hlsl_replace_node(&cast->node, replacement);
return true;
}
return false;
}
enum copy_propagation_value_state
{
VALUE_STATE_NOT_WRITTEN = 0,
VALUE_STATE_STATICALLY_WRITTEN,
VALUE_STATE_DYNAMICALLY_WRITTEN,
};
struct copy_propagation_value
{
enum copy_propagation_value_state state;
struct hlsl_ir_node *node;
unsigned int component;
};
@ -277,6 +343,7 @@ struct copy_propagation_var_def
struct copy_propagation_state
{
struct rb_tree var_defs;
struct copy_propagation_state *parent;
};
static int copy_propagation_var_def_compare(const void *key, const struct rb_entry *entry)
@ -294,14 +361,31 @@ static void copy_propagation_var_def_destroy(struct rb_entry *entry, void *conte
vkd3d_free(var_def);
}
static struct copy_propagation_var_def *copy_propagation_get_var_def(const struct copy_propagation_state *state,
const struct hlsl_ir_var *var)
static struct copy_propagation_value *copy_propagation_get_value(const struct copy_propagation_state *state,
const struct hlsl_ir_var *var, unsigned component)
{
for (; state; state = state->parent)
{
struct rb_entry *entry = rb_get(&state->var_defs, var);
if (entry)
return RB_ENTRY_VALUE(entry, struct copy_propagation_var_def, entry);
else
{
struct copy_propagation_var_def *var_def = RB_ENTRY_VALUE(entry, struct copy_propagation_var_def, entry);
enum copy_propagation_value_state state = var_def->values[component].state;
assert(component < var_def->var->data_type->reg_size);
switch (state)
{
case VALUE_STATE_STATICALLY_WRITTEN:
return &var_def->values[component];
case VALUE_STATE_DYNAMICALLY_WRITTEN:
return NULL;
case VALUE_STATE_NOT_WRITTEN:
break;
}
}
}
return NULL;
}
@ -326,10 +410,28 @@ static struct copy_propagation_var_def *copy_propagation_create_var_def(struct h
return var_def;
}
static void copy_propagation_invalidate_variable(struct copy_propagation_var_def *var_def,
unsigned offset, unsigned char writemask)
{
unsigned i;
TRACE("Invalidate variable %s[%u]%s.\n", var_def->var->name, offset, debug_hlsl_writemask(writemask));
for (i = 0; i < 4; ++i)
{
if (writemask & (1u << i))
var_def->values[offset + i].state = VALUE_STATE_DYNAMICALLY_WRITTEN;
}
}
static void copy_propagation_invalidate_whole_variable(struct copy_propagation_var_def *var_def)
{
unsigned i;
TRACE("Invalidate variable %s.\n", var_def->var->name);
memset(var_def->values, 0, sizeof(*var_def->values) * var_def->var->data_type->reg_size);
for (i = 0; i < var_def->var->data_type->reg_size; ++i)
var_def->values[i].state = VALUE_STATE_DYNAMICALLY_WRITTEN;
}
static void copy_propagation_set_value(struct copy_propagation_var_def *var_def, unsigned int offset,
@ -343,6 +445,7 @@ static void copy_propagation_set_value(struct copy_propagation_var_def *var_def,
{
TRACE("Variable %s[%u] is written by instruction %p%s.\n",
var_def->var->name, offset + i, node, debug_hlsl_writemask(1u << i));
var_def->values[offset + i].state = VALUE_STATE_STATICALLY_WRITTEN;
var_def->values[offset + i].node = node;
var_def->values[offset + i].component = j++;
}
@ -354,32 +457,34 @@ static struct hlsl_ir_node *copy_propagation_compute_replacement(struct hlsl_ctx
unsigned int count, unsigned int *swizzle)
{
const struct hlsl_ir_var *var = deref->var;
struct copy_propagation_var_def *var_def;
struct hlsl_ir_node *node = NULL;
unsigned int offset, i;
if (!hlsl_offset_from_deref(ctx, deref, &offset))
return NULL;
if (!(var_def = copy_propagation_get_var_def(state, var)))
return NULL;
assert(offset + count <= var_def->var->data_type->reg_size);
if (var->data_type->type != HLSL_CLASS_OBJECT)
assert(offset + count <= var->data_type->reg_size);
*swizzle = 0;
for (i = 0; i < count; ++i)
{
struct copy_propagation_value *value = copy_propagation_get_value(state, var, offset + i);
if (!value)
return NULL;
if (!node)
{
node = var_def->values[offset + i].node;
node = value->node;
}
else if (node != var_def->values[offset + i].node)
else if (node != value->node)
{
TRACE("No single source for propagating load from %s[%u-%u].\n", var->name, offset, offset + count);
return NULL;
}
*swizzle |= var_def->values[offset + i].component << i * 2;
*swizzle |= value->component << i * 2;
}
TRACE("Load from %s[%u-%u] propagated as instruction %p%s.\n",
@ -483,6 +588,113 @@ static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_s
}
}
static void copy_propagation_state_init(struct hlsl_ctx *ctx, struct copy_propagation_state *state,
struct copy_propagation_state *parent)
{
rb_init(&state->var_defs, copy_propagation_var_def_compare);
state->parent = parent;
}
static void copy_propagation_state_destroy(struct copy_propagation_state *state)
{
rb_destroy(&state->var_defs, copy_propagation_var_def_destroy, NULL);
}
static void copy_propagation_invalidate_from_block(struct hlsl_ctx *ctx, struct copy_propagation_state *state,
struct hlsl_block *block)
{
struct hlsl_ir_node *instr;
LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry)
{
switch (instr->type)
{
case HLSL_IR_STORE:
{
struct hlsl_ir_store *store = hlsl_ir_store(instr);
struct copy_propagation_var_def *var_def;
struct hlsl_deref *lhs = &store->lhs;
struct hlsl_ir_var *var = lhs->var;
unsigned int offset;
if (!(var_def = copy_propagation_create_var_def(ctx, state, var)))
continue;
if (hlsl_offset_from_deref(ctx, lhs, &offset))
copy_propagation_invalidate_variable(var_def, offset, store->writemask);
else
copy_propagation_invalidate_whole_variable(var_def);
break;
}
case HLSL_IR_IF:
{
struct hlsl_ir_if *iff = hlsl_ir_if(instr);
copy_propagation_invalidate_from_block(ctx, state, &iff->then_instrs);
copy_propagation_invalidate_from_block(ctx, state, &iff->else_instrs);
break;
}
case HLSL_IR_LOOP:
{
struct hlsl_ir_loop *loop = hlsl_ir_loop(instr);
copy_propagation_invalidate_from_block(ctx, state, &loop->body);
break;
}
default:
break;
}
}
}
static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_block *block,
struct copy_propagation_state *state);
static bool copy_propagation_process_if(struct hlsl_ctx *ctx, struct hlsl_ir_if *iff,
struct copy_propagation_state *state)
{
struct copy_propagation_state inner_state;
bool progress = false;
copy_propagation_state_init(ctx, &inner_state, state);
progress |= copy_propagation_transform_block(ctx, &iff->then_instrs, &inner_state);
copy_propagation_state_destroy(&inner_state);
copy_propagation_state_init(ctx, &inner_state, state);
progress |= copy_propagation_transform_block(ctx, &iff->else_instrs, &inner_state);
copy_propagation_state_destroy(&inner_state);
/* Ideally we'd invalidate the outer state looking at what was
* touched in the two inner states, but this doesn't work for
* loops (because we need to know what is invalidated in advance),
* so we need copy_propagation_invalidate_from_block() anyway. */
copy_propagation_invalidate_from_block(ctx, state, &iff->then_instrs);
copy_propagation_invalidate_from_block(ctx, state, &iff->else_instrs);
return progress;
}
static bool copy_propagation_process_loop(struct hlsl_ctx *ctx, struct hlsl_ir_loop *loop,
struct copy_propagation_state *state)
{
struct copy_propagation_state inner_state;
bool progress = false;
copy_propagation_invalidate_from_block(ctx, state, &loop->body);
copy_propagation_state_init(ctx, &inner_state, state);
progress |= copy_propagation_transform_block(ctx, &loop->body, &inner_state);
copy_propagation_state_destroy(&inner_state);
return progress;
}
static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_block *block,
struct copy_propagation_state *state)
{
@ -506,12 +718,12 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b
break;
case HLSL_IR_IF:
FIXME("Copy propagation doesn't support conditionals yet, leaving.\n");
return progress;
progress |= copy_propagation_process_if(ctx, hlsl_ir_if(instr), state);
break;
case HLSL_IR_LOOP:
FIXME("Copy propagation doesn't support loops yet, leaving.\n");
return progress;
progress |= copy_propagation_process_loop(ctx, hlsl_ir_loop(instr), state);
break;
default:
break;
@ -526,11 +738,11 @@ static bool copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *bl
struct copy_propagation_state state;
bool progress;
rb_init(&state.var_defs, copy_propagation_var_def_compare);
copy_propagation_state_init(ctx, &state, NULL);
progress = copy_propagation_transform_block(ctx, block, &state);
rb_destroy(&state.var_defs, copy_propagation_var_def_destroy, NULL);
copy_propagation_state_destroy(&state);
return progress;
}
@ -562,8 +774,10 @@ static bool fold_redundant_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst
return false;
}
/* Helper for split_array_copies() and split_struct_copies(). Inserts new
* instructions right before "store". */
/* Copy an element of a complex variable. Helper for
* split_array_copies(), split_struct_copies() and
* split_matrix_copies(). Inserts new instructions right before
* "store". */
static bool split_copy(struct hlsl_ctx *ctx, struct hlsl_ir_store *store,
const struct hlsl_ir_load *load, const unsigned int offset, struct hlsl_type *type)
{
@ -572,7 +786,7 @@ static bool split_copy(struct hlsl_ctx *ctx, struct hlsl_ir_store *store,
struct hlsl_ir_load *split_load;
struct hlsl_ir_constant *c;
if (!(c = hlsl_new_uint_constant(ctx, offset, store->node.loc)))
if (!(c = hlsl_new_uint_constant(ctx, offset, &store->node.loc)))
return false;
list_add_before(&store->node.entry, &c->node.entry);
@ -621,7 +835,13 @@ static bool split_array_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr,
if (type->type != HLSL_CLASS_ARRAY)
return false;
element_type = type->e.array.type;
element_size = element_type->reg_size;
element_size = hlsl_type_get_array_element_reg_size(element_type);
if (rhs->type != HLSL_IR_LOAD)
{
hlsl_fixme(ctx, &instr->loc, "Array store rhs is not HLSL_IR_LOAD. Broadcast may be missing.");
return false;
}
for (i = 0; i < type->e.array.elements_count; ++i)
{
@ -653,6 +873,12 @@ static bool split_struct_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr
if (type->type != HLSL_CLASS_STRUCT)
return false;
if (rhs->type != HLSL_IR_LOAD)
{
hlsl_fixme(ctx, &instr->loc, "Struct store rhs is not HLSL_IR_LOAD. Broadcast may be missing.");
return false;
}
LIST_FOR_EACH_ENTRY(field, type->e.elements, struct hlsl_struct_field, entry)
{
if (!split_copy(ctx, store, hlsl_ir_load(rhs), field->reg_offset, field->type))
@ -667,6 +893,41 @@ static bool split_struct_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr
return true;
}
static bool split_matrix_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
{
const struct hlsl_ir_node *rhs;
struct hlsl_type *element_type;
const struct hlsl_type *type;
unsigned int i;
struct hlsl_ir_store *store;
if (instr->type != HLSL_IR_STORE)
return false;
store = hlsl_ir_store(instr);
rhs = store->rhs.node;
type = rhs->data_type;
if (type->type != HLSL_CLASS_MATRIX)
return false;
element_type = hlsl_get_vector_type(ctx, type->base_type, minor_size(type));
if (rhs->type != HLSL_IR_LOAD)
{
hlsl_fixme(ctx, &instr->loc, "Copying from unsupported node type.\n");
return false;
}
for (i = 0; i < major_size(type); ++i)
{
if (!split_copy(ctx, store, hlsl_ir_load(rhs), 4 * i, element_type))
return false;
}
list_remove(&store->node.entry);
hlsl_free_instr(&store->node);
return true;
}
static bool lower_narrowing_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
{
const struct hlsl_type *src_type, *dst_type;
@ -745,6 +1006,37 @@ static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, voi
return true;
}
static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
{
struct hlsl_type *type = instr->data_type, *arg_type;
struct hlsl_ir_constant *zero;
struct hlsl_ir_expr *expr;
if (instr->type != HLSL_IR_EXPR)
return false;
expr = hlsl_ir_expr(instr);
if (expr->op != HLSL_OP1_CAST)
return false;
arg_type = expr->operands[0].node->data_type;
if (type->type > HLSL_CLASS_VECTOR || arg_type->type > HLSL_CLASS_VECTOR)
return false;
if (type->base_type != HLSL_TYPE_BOOL)
return false;
/* Narrowing casts should have already been lowered. */
assert(type->dimx == arg_type->dimx);
zero = hlsl_new_constant(ctx, arg_type, &instr->loc);
if (!zero)
return false;
list_add_before(&instr->entry, &zero->node.entry);
expr->op = HLSL_OP2_NEQUAL;
hlsl_src_from_node(&expr->operands[1], &zero->node);
return true;
}
static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
{
switch (instr->type)
@ -1086,7 +1378,7 @@ static void allocate_variable_temp_register(struct hlsl_ctx *ctx, struct hlsl_ir
var->last_read, var->data_type->reg_size);
else
var->reg = allocate_register(ctx, liveness, var->first_write,
var->last_read, var->data_type->dimx);
var->last_read, hlsl_type_component_count(var->data_type));
TRACE("Allocated %s to %s (liveness %u-%u).\n", var->name,
debug_register('r', var->reg, var->data_type), var->first_write, var->last_read);
}
@ -1200,7 +1492,7 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b
switch (type->base_type)
{
case HLSL_TYPE_BOOL:
f = value->b;
f = !!value->u;
break;
case HLSL_TYPE_FLOAT:
@ -1542,11 +1834,6 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_base_type type)
}
}
static bool type_is_single_reg(const struct hlsl_type *type)
{
return type->type == HLSL_CLASS_SCALAR || type->type == HLSL_CLASS_VECTOR;
}
bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *offset)
{
struct hlsl_ir_node *offset_node = deref->offset.node;
@ -1569,7 +1856,7 @@ bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref
if (*offset >= deref->var->data_type->reg_size)
{
hlsl_error(ctx, &deref->offset.node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS,
"Dereference is out of bounds.");
"Dereference is out of bounds. %u/%u", *offset, deref->var->data_type->reg_size);
return false;
}
@ -1589,8 +1876,7 @@ unsigned int hlsl_offset_from_deref_safe(struct hlsl_ctx *ctx, const struct hlsl
return 0;
}
struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref,
const struct hlsl_type *type)
struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref)
{
const struct hlsl_ir_var *var = deref->var;
struct hlsl_reg ret = var->reg;
@ -1598,16 +1884,10 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere
ret.id += offset / 4;
if (type_is_single_reg(var->data_type))
{
assert(!offset);
ret.writemask = var->reg.writemask;
}
else
{
assert(type_is_single_reg(type));
ret.writemask = ((1 << type->dimx) - 1) << (offset % 4);
}
ret.writemask = 0xf & (0xf << (offset % 4));
if (var->reg.writemask)
ret.writemask = hlsl_combine_writemasks(var->reg.writemask, ret.writemask);
return ret;
}
@ -1661,7 +1941,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry
progress |= transform_ir(ctx, split_struct_copies, body, NULL);
}
while (progress);
transform_ir(ctx, split_matrix_copies, body, NULL);
transform_ir(ctx, lower_narrowing_casts, body, NULL);
transform_ir(ctx, lower_casts_to_bool, body, NULL);
do
{
progress = transform_ir(ctx, hlsl_fold_constants, body, NULL);

View file

@ -18,6 +18,8 @@
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*/
#include <math.h>
#include "hlsl.h"
static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct hlsl_ir_constant *src)
@ -27,7 +29,6 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct
int32_t i;
double d;
float f;
bool b;
if (dst->node.data_type->dimx != src->node.data_type->dimx
|| dst->node.data_type->dimy != src->node.data_type->dimy)
@ -47,7 +48,6 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct
i = src->value[k].f;
f = src->value[k].f;
d = src->value[k].f;
b = src->value[k].f;
break;
case HLSL_TYPE_DOUBLE:
@ -55,7 +55,6 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct
i = src->value[k].d;
f = src->value[k].d;
d = src->value[k].d;
b = src->value[k].d;
break;
case HLSL_TYPE_INT:
@ -63,7 +62,6 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct
i = src->value[k].i;
f = src->value[k].i;
d = src->value[k].i;
b = src->value[k].i;
break;
case HLSL_TYPE_UINT:
@ -71,20 +69,17 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct
i = src->value[k].u;
f = src->value[k].u;
d = src->value[k].u;
b = src->value[k].u;
break;
case HLSL_TYPE_BOOL:
u = src->value[k].b;
i = src->value[k].b;
f = src->value[k].b;
d = src->value[k].b;
b = src->value[k].b;
u = !!src->value[k].u;
i = !!src->value[k].u;
f = !!src->value[k].u;
d = !!src->value[k].u;
break;
default:
FIXME("Cast from %s to %s.\n", debug_hlsl_type(ctx, src->node.data_type),
debug_hlsl_type(ctx, dst->node.data_type));
assert(0);
return false;
}
@ -108,12 +103,12 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct
break;
case HLSL_TYPE_BOOL:
dst->value[k].b = b;
/* Casts to bool should have already been lowered. */
assert(0);
break;
default:
FIXME("Cast from %s to %s.\n", debug_hlsl_type(ctx, src->node.data_type),
debug_hlsl_type(ctx, dst->node.data_type));
assert(0);
return false;
}
}
@ -194,11 +189,12 @@ static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst,
struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2)
{
enum hlsl_base_type type = dst->node.data_type->base_type;
unsigned int k;
assert(type == src1->node.data_type->base_type);
assert(type == src2->node.data_type->base_type);
for (int k = 0; k < 4; ++k)
for (k = 0; k < 4; ++k)
{
switch (type)
{
@ -224,6 +220,155 @@ static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst,
return true;
}
static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst,
struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2)
{
unsigned int k;
assert(dst->node.data_type->base_type == HLSL_TYPE_BOOL);
assert(src1->node.data_type->base_type == src2->node.data_type->base_type);
for (k = 0; k < 4; ++k)
{
switch (src1->node.data_type->base_type)
{
case HLSL_TYPE_FLOAT:
case HLSL_TYPE_HALF:
dst->value[k].u = src1->value[k].f != src2->value[k].f;
break;
case HLSL_TYPE_DOUBLE:
dst->value[k].u = src1->value[k].d != src2->value[k].d;
break;
case HLSL_TYPE_INT:
case HLSL_TYPE_UINT:
case HLSL_TYPE_BOOL:
dst->value[k].u = src1->value[k].u != src2->value[k].u;
break;
default:
assert(0);
return false;
}
dst->value[k].u *= ~0u;
}
return true;
}
static bool fold_div(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst,
struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2)
{
enum hlsl_base_type type = dst->node.data_type->base_type;
unsigned int k;
assert(type == src1->node.data_type->base_type);
assert(type == src2->node.data_type->base_type);
for (k = 0; k < dst->node.data_type->dimx; ++k)
{
switch (type)
{
case HLSL_TYPE_FLOAT:
case HLSL_TYPE_HALF:
if (ctx->profile->major_version >= 4 && src2->value[k].f == 0)
{
hlsl_warning(ctx, &dst->node.loc, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO,
"Floating point division by zero.");
}
dst->value[k].f = src1->value[k].f / src2->value[k].f;
if (ctx->profile->major_version < 4 && !isfinite(dst->value[k].f))
{
hlsl_error(ctx, &dst->node.loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO,
"Infinities and NaNs are not allowed by the shader model.");
}
break;
case HLSL_TYPE_DOUBLE:
if (src2->value[k].d == 0)
{
hlsl_warning(ctx, &dst->node.loc, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO,
"Floating point division by zero.");
}
dst->value[k].d = src1->value[k].d / src2->value[k].d;
break;
case HLSL_TYPE_INT:
if (src2->value[k].i == 0)
{
hlsl_error(ctx, &dst->node.loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO,
"Division by zero.");
return false;
}
if (src1->value[k].i == INT_MIN && src2->value[k].i == -1)
dst->value[k].i = INT_MIN;
else
dst->value[k].i = src1->value[k].i / src2->value[k].i;
break;
case HLSL_TYPE_UINT:
if (src2->value[k].u == 0)
{
hlsl_error(ctx, &dst->node.loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO,
"Division by zero.");
return false;
}
dst->value[k].u = src1->value[k].u / src2->value[k].u;
break;
default:
FIXME("Fold division for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type));
return false;
}
}
return true;
}
static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst,
struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2)
{
enum hlsl_base_type type = dst->node.data_type->base_type;
unsigned int k;
assert(type == src1->node.data_type->base_type);
assert(type == src2->node.data_type->base_type);
for (k = 0; k < dst->node.data_type->dimx; ++k)
{
switch (type)
{
case HLSL_TYPE_INT:
if (src2->value[k].i == 0)
{
hlsl_error(ctx, &dst->node.loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO,
"Division by zero.");
return false;
}
if (src1->value[k].i == INT_MIN && src2->value[k].i == -1)
dst->value[k].i = 0;
else
dst->value[k].i = src1->value[k].i % src2->value[k].i;
break;
case HLSL_TYPE_UINT:
if (src2->value[k].u == 0)
{
hlsl_error(ctx, &dst->node.loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO,
"Division by zero.");
return false;
}
dst->value[k].u = src1->value[k].u % src2->value[k].u;
break;
default:
FIXME("Fold modulus for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type));
return false;
}
}
return true;
}
bool hlsl_fold_constants(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
{
struct hlsl_ir_constant *arg1, *arg2 = NULL, *res;
@ -235,10 +380,17 @@ bool hlsl_fold_constants(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void
return false;
expr = hlsl_ir_expr(instr);
if (instr->data_type->type > HLSL_CLASS_VECTOR)
return false;
for (i = 0; i < ARRAY_SIZE(expr->operands); ++i)
{
if (expr->operands[i].node && expr->operands[i].node->type != HLSL_IR_CONSTANT)
if (expr->operands[i].node)
{
if (expr->operands[i].node->type != HLSL_IR_CONSTANT)
return false;
assert(expr->operands[i].node->data_type->type <= HLSL_CLASS_VECTOR);
}
}
arg1 = hlsl_ir_constant(expr->operands[0].node);
if (expr->operands[1].node)
@ -266,6 +418,18 @@ bool hlsl_fold_constants(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void
success = fold_mul(ctx, res, arg1, arg2);
break;
case HLSL_OP2_NEQUAL:
success = fold_nequal(ctx, res, arg1, arg2);
break;
case HLSL_OP2_DIV:
success = fold_div(ctx, res, arg1, arg2);
break;
case HLSL_OP2_MOD:
success = fold_mod(ctx, res, arg1, arg2);
break;
default:
FIXME("Fold \"%s\" expression.\n", debug_hlsl_expr_op(expr->op));
success = false;

View file

@ -663,7 +663,7 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b
static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr)
{
const struct hlsl_ir_load *load = hlsl_ir_load(instr);
const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &load->src, instr->data_type);
const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &load->src);
struct sm1_instruction sm1_instr =
{
.opcode = D3DSIO_MOV,
@ -707,7 +707,7 @@ static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *
{
const struct hlsl_ir_store *store = hlsl_ir_store(instr);
const struct hlsl_ir_node *rhs = store->rhs.node;
const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &store->lhs, rhs->data_type);
const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &store->lhs);
struct sm1_instruction sm1_instr =
{
.opcode = D3DSIO_MOV,
@ -790,7 +790,7 @@ static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b
}
else if (instr->data_type->type == HLSL_CLASS_OBJECT)
{
hlsl_fixme(ctx, &instr->loc, "Object copy.\n");
hlsl_fixme(ctx, &instr->loc, "Object copy.");
break;
}

View file

@ -25,6 +25,20 @@
static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_block *block);
static bool type_is_integer(const struct hlsl_type *type)
{
switch (type->base_type)
{
case HLSL_TYPE_BOOL:
case HLSL_TYPE_INT:
case HLSL_TYPE_UINT:
return true;
default:
return false;
}
}
bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic,
bool output, enum vkd3d_sm4_register_type *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx)
{
@ -100,6 +114,7 @@ bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semant
{"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH},
{"sv_position", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_UNDEFINED},
{"sv_vertexid", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_VERTEX_ID},
{"position", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_POSITION},
{"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_POSITION},
@ -275,6 +290,8 @@ static D3D_SHADER_VARIABLE_TYPE sm4_base_type(const struct hlsl_type *type)
{
case HLSL_TYPE_BOOL:
return D3D_SVT_BOOL;
case HLSL_TYPE_DOUBLE:
return D3D_SVT_DOUBLE;
case HLSL_TYPE_FLOAT:
case HLSL_TYPE_HALF:
return D3D_SVT_FLOAT;
@ -870,18 +887,20 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r
if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, false, &reg->type, swizzle_type, &has_idx))
{
unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref);
if (has_idx)
{
reg->idx[0] = var->semantic.index;
reg->idx[0] = var->semantic.index + offset / 4;
reg->idx_count = 1;
}
reg->dim = VKD3D_SM4_DIMENSION_VEC4;
*writemask = (1u << data_type->dimx) - 1;
*writemask = ((1u << data_type->dimx) - 1) << (offset % 4);
}
else
{
struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref, data_type);
struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref);
assert(hlsl_reg.allocated);
reg->type = VKD3D_SM4_RT_INPUT;
@ -899,9 +918,11 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r
if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, true, &reg->type, swizzle_type, &has_idx))
{
unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref);
if (has_idx)
{
reg->idx[0] = var->semantic.index;
reg->idx[0] = var->semantic.index + offset / 4;
reg->idx_count = 1;
}
@ -909,11 +930,11 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r
reg->dim = VKD3D_SM4_DIMENSION_SCALAR;
else
reg->dim = VKD3D_SM4_DIMENSION_VEC4;
*writemask = (1u << data_type->dimx) - 1;
*writemask = ((1u << data_type->dimx) - 1) << (offset % 4);
}
else
{
struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref, data_type);
struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref);
assert(hlsl_reg.allocated);
reg->type = VKD3D_SM4_RT_OUTPUT;
@ -925,7 +946,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r
}
else
{
struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref, data_type);
struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref);
assert(hlsl_reg.allocated);
reg->type = VKD3D_SM4_RT_TEMP;
@ -1198,7 +1219,14 @@ static void write_sm4_dcl_semantic(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b
}
if (profile->type == VKD3D_SHADER_TYPE_PIXEL)
instr.opcode |= VKD3DSIM_LINEAR << VKD3D_SM4_INTERPOLATION_MODE_SHIFT;
{
enum vkd3d_shader_interpolation_mode mode = VKD3DSIM_LINEAR;
if ((var->modifiers & HLSL_STORAGE_NOINTERPOLATION) || type_is_integer(var->data_type))
mode = VKD3DSIM_CONSTANT;
instr.opcode |= mode << VKD3D_SM4_INTERPOLATION_MODE_SHIFT;
}
}
else
{
@ -1391,7 +1419,7 @@ static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer
if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset))
{
hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET,
"Offset must resolve to integer literal in the range -8 to 7.\n");
"Offset must resolve to integer literal in the range -8 to 7.");
return;
}
}
@ -1407,31 +1435,24 @@ static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer
write_sm4_instruction(buffer, &instr);
}
static void write_sm4_expr(struct hlsl_ctx *ctx,
static bool type_is_float(const struct hlsl_type *type)
{
return type->base_type == HLSL_TYPE_FLOAT || type->base_type == HLSL_TYPE_HALF;
}
static void write_sm4_cast(struct hlsl_ctx *ctx,
struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr)
{
const struct hlsl_ir_node *arg1 = expr->operands[0].node;
const struct hlsl_ir_node *arg2 = expr->operands[1].node;
assert(expr->node.reg.allocated);
switch (expr->node.data_type->base_type)
{
case HLSL_TYPE_FLOAT:
{
switch (expr->op)
{
case HLSL_OP1_ABS:
write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_ABS);
break;
case HLSL_OP1_CAST:
{
const struct hlsl_type *dst_type = expr->node.data_type;
const struct hlsl_type *src_type = arg1->data_type;
/* Narrowing casts were already lowered. */
assert(src_type->dimx == expr->node.data_type->dimx);
assert(src_type->dimx == dst_type->dimx);
switch (dst_type->base_type)
{
case HLSL_TYPE_FLOAT:
switch (src_type->base_type)
{
case HLSL_TYPE_HALF:
@ -1448,83 +1469,19 @@ static void write_sm4_expr(struct hlsl_ctx *ctx,
break;
case HLSL_TYPE_BOOL:
hlsl_fixme(ctx, &expr->node.loc, "Casts from bool to float are not implemented.\n");
hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from bool to float.");
break;
case HLSL_TYPE_DOUBLE:
hlsl_fixme(ctx, &expr->node.loc, "Casts from double to float are not implemented.\n");
hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to float.");
break;
default:
break;
assert(0);
}
break;
}
case HLSL_OP1_EXP2:
write_sm4_unary_op(buffer, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0);
break;
case HLSL_OP1_FLOOR:
write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0);
break;
case HLSL_OP1_LOG2:
write_sm4_unary_op(buffer, VKD3D_SM4_OP_LOG, &expr->node, arg1, 0);
break;
case HLSL_OP1_NEG:
write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_NEGATE);
break;
case HLSL_OP1_ROUND:
write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_NE, &expr->node, arg1, 0);
break;
case HLSL_OP1_SAT:
write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV
| (VKD3D_SM4_INSTRUCTION_FLAG_SATURATE << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT),
&expr->node, arg1, 0);
break;
case HLSL_OP2_ADD:
write_sm4_binary_op(buffer, VKD3D_SM4_OP_ADD, &expr->node, arg1, arg2);
break;
case HLSL_OP2_DIV:
write_sm4_binary_op(buffer, VKD3D_SM4_OP_DIV, &expr->node, arg1, arg2);
break;
case HLSL_OP2_MAX:
write_sm4_binary_op(buffer, VKD3D_SM4_OP_MAX, &expr->node, arg1, arg2);
break;
case HLSL_OP2_MIN:
write_sm4_binary_op(buffer, VKD3D_SM4_OP_MIN, &expr->node, arg1, arg2);
break;
case HLSL_OP2_MUL:
write_sm4_binary_op(buffer, VKD3D_SM4_OP_MUL, &expr->node, arg1, arg2);
break;
default:
hlsl_fixme(ctx, &expr->node.loc, "SM4 float \"%s\" expression.", debug_hlsl_expr_op(expr->op));
break;
}
break;
}
case HLSL_TYPE_INT:
{
switch (expr->op)
{
case HLSL_OP1_CAST:
{
const struct hlsl_type *src_type = arg1->data_type;
/* Narrowing casts were already lowered. */
assert(src_type->dimx == expr->node.data_type->dimx);
switch (src_type->base_type)
{
case HLSL_TYPE_HALF:
@ -1546,41 +1503,11 @@ static void write_sm4_expr(struct hlsl_ctx *ctx,
break;
default:
break;
assert(0);
}
break;
}
case HLSL_OP1_NEG:
write_sm4_unary_op(buffer, VKD3D_SM4_OP_INEG, &expr->node, arg1, 0);
break;
case HLSL_OP2_MAX:
write_sm4_binary_op(buffer, VKD3D_SM4_OP_IMAX, &expr->node, arg1, arg2);
break;
case HLSL_OP2_MIN:
write_sm4_binary_op(buffer, VKD3D_SM4_OP_IMIN, &expr->node, arg1, arg2);
break;
default:
hlsl_fixme(ctx, &expr->node.loc, "SM4 int \"%s\" expression.", debug_hlsl_expr_op(expr->op));
break;
}
break;
}
case HLSL_TYPE_UINT:
{
switch (expr->op)
{
case HLSL_OP1_CAST:
{
const struct hlsl_type *src_type = arg1->data_type;
/* Narrowing casts were already lowered. */
assert(src_type->dimx == expr->node.data_type->dimx);
switch (src_type->base_type)
{
case HLSL_TYPE_HALF:
@ -1594,48 +1521,170 @@ static void write_sm4_expr(struct hlsl_ctx *ctx,
break;
case HLSL_TYPE_BOOL:
hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from bool to uint.\n");
hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from bool to uint.");
break;
case HLSL_TYPE_DOUBLE:
hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to uint.\n");
hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to uint.");
break;
default:
break;
assert(0);
}
break;
}
case HLSL_OP2_MAX:
write_sm4_binary_op(buffer, VKD3D_SM4_OP_UMAX, &expr->node, arg1, arg2);
case HLSL_TYPE_HALF:
hlsl_fixme(ctx, &expr->node.loc, "SM4 cast to half.");
break;
case HLSL_OP2_MIN:
write_sm4_binary_op(buffer, VKD3D_SM4_OP_UMIN, &expr->node, arg1, arg2);
case HLSL_TYPE_DOUBLE:
hlsl_fixme(ctx, &expr->node.loc, "SM4 cast to double.");
break;
case HLSL_OP2_MUL:
/* Using IMUL instead of UMUL because we're taking the low
* bits, and the native compiler generates IMUL. */
write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_IMUL, &expr->node, 1, arg1, arg2);
break;
default:
hlsl_fixme(ctx, &expr->node.loc, "SM4 uint \"%s\" expression.\n", debug_hlsl_expr_op(expr->op));
break;
}
break;
}
case HLSL_TYPE_BOOL:
{
/* Casts to bool should have already been lowered. */
assert(0);
break;
default:
assert(0);
}
}
static void write_sm4_expr(struct hlsl_ctx *ctx,
struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr)
{
const struct hlsl_ir_node *arg1 = expr->operands[0].node;
const struct hlsl_ir_node *arg2 = expr->operands[1].node;
const struct hlsl_type *dst_type = expr->node.data_type;
struct vkd3d_string_buffer *dst_type_string;
assert(expr->node.reg.allocated);
if (!(dst_type_string = hlsl_type_to_string(ctx, dst_type)))
return;
switch (expr->op)
{
case HLSL_OP1_ABS:
switch (dst_type->base_type)
{
case HLSL_TYPE_FLOAT:
write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_ABS);
break;
default:
hlsl_fixme(ctx, &expr->node.loc, "SM4 %s absolute value expression.", dst_type_string->buffer);
}
break;
case HLSL_OP1_BIT_NOT:
assert(type_is_integer(dst_type));
write_sm4_unary_op(buffer, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0);
break;
case HLSL_OP1_CAST:
write_sm4_cast(ctx, buffer, expr);
break;
case HLSL_OP1_EXP2:
assert(type_is_float(dst_type));
write_sm4_unary_op(buffer, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0);
break;
case HLSL_OP1_FLOOR:
assert(type_is_float(dst_type));
write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0);
break;
case HLSL_OP1_LOG2:
assert(type_is_float(dst_type));
write_sm4_unary_op(buffer, VKD3D_SM4_OP_LOG, &expr->node, arg1, 0);
break;
case HLSL_OP1_NEG:
switch (dst_type->base_type)
{
case HLSL_TYPE_FLOAT:
write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_NEGATE);
break;
case HLSL_TYPE_INT:
case HLSL_TYPE_UINT:
write_sm4_unary_op(buffer, VKD3D_SM4_OP_INEG, &expr->node, arg1, 0);
break;
default:
hlsl_fixme(ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_string->buffer);
}
break;
case HLSL_OP1_ROUND:
assert(type_is_float(dst_type));
write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_NE, &expr->node, arg1, 0);
break;
case HLSL_OP1_SAT:
assert(type_is_float(dst_type));
write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV
| (VKD3D_SM4_INSTRUCTION_FLAG_SATURATE << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT),
&expr->node, arg1, 0);
break;
case HLSL_OP2_ADD:
switch (dst_type->base_type)
{
case HLSL_TYPE_FLOAT:
write_sm4_binary_op(buffer, VKD3D_SM4_OP_ADD, &expr->node, arg1, arg2);
break;
case HLSL_TYPE_INT:
case HLSL_TYPE_UINT:
write_sm4_binary_op(buffer, VKD3D_SM4_OP_IADD, &expr->node, arg1, arg2);
break;
default:
hlsl_fixme(ctx, &expr->node.loc, "SM4 %s addition expression.", dst_type_string->buffer);
}
break;
case HLSL_OP2_BIT_AND:
assert(type_is_integer(dst_type));
write_sm4_binary_op(buffer, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2);
break;
case HLSL_OP2_BIT_OR:
assert(type_is_integer(dst_type));
write_sm4_binary_op(buffer, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2);
break;
case HLSL_OP2_BIT_XOR:
assert(type_is_integer(dst_type));
write_sm4_binary_op(buffer, VKD3D_SM4_OP_XOR, &expr->node, arg1, arg2);
break;
case HLSL_OP2_DIV:
switch (dst_type->base_type)
{
case HLSL_TYPE_FLOAT:
write_sm4_binary_op(buffer, VKD3D_SM4_OP_DIV, &expr->node, arg1, arg2);
break;
case HLSL_TYPE_UINT:
write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_UDIV, &expr->node, 0, arg1, arg2);
break;
default:
hlsl_fixme(ctx, &expr->node.loc, "SM4 %s division expression.", dst_type_string->buffer);
}
break;
case HLSL_OP2_EQUAL:
{
const struct hlsl_type *src_type = arg1->data_type;
assert(dst_type->base_type == HLSL_TYPE_BOOL);
switch (src_type->base_type)
{
case HLSL_TYPE_FLOAT:
@ -1656,61 +1705,12 @@ static void write_sm4_expr(struct hlsl_ctx *ctx,
break;
}
case HLSL_OP2_NEQUAL:
{
const struct hlsl_type *src_type = arg1->data_type;
switch (src_type->base_type)
{
case HLSL_TYPE_FLOAT:
write_sm4_binary_op(buffer, VKD3D_SM4_OP_NE, &expr->node, arg1, arg2);
break;
case HLSL_TYPE_BOOL:
case HLSL_TYPE_INT:
case HLSL_TYPE_UINT:
write_sm4_binary_op(buffer, VKD3D_SM4_OP_INE, &expr->node, arg1, arg2);
break;
default:
hlsl_fixme(ctx, &expr->node.loc, "SM4 inequality between \"%s\" operands.",
debug_hlsl_type(ctx, src_type));
break;
}
break;
}
case HLSL_OP2_LESS:
{
const struct hlsl_type *src_type = arg1->data_type;
switch (src_type->base_type)
{
case HLSL_TYPE_FLOAT:
write_sm4_binary_op(buffer, VKD3D_SM4_OP_LT, &expr->node, arg1, arg2);
break;
case HLSL_TYPE_INT:
write_sm4_binary_op(buffer, VKD3D_SM4_OP_ILT, &expr->node, arg1, arg2);
break;
case HLSL_TYPE_BOOL:
case HLSL_TYPE_UINT:
write_sm4_binary_op(buffer, VKD3D_SM4_OP_ULT, &expr->node, arg1, arg2);
break;
default:
hlsl_fixme(ctx, &expr->node.loc, "SM4 less-than between \"%s\" operands.",
debug_hlsl_type(ctx, src_type));
break;
}
break;
}
case HLSL_OP2_GEQUAL:
{
const struct hlsl_type *src_type = arg1->data_type;
assert(dst_type->base_type == HLSL_TYPE_BOOL);
switch (src_type->base_type)
{
case HLSL_TYPE_FLOAT:
@ -1734,23 +1734,150 @@ static void write_sm4_expr(struct hlsl_ctx *ctx,
break;
}
default:
hlsl_fixme(ctx, &expr->node.loc, "SM4 bool \"%s\" expression.", debug_hlsl_expr_op(expr->op));
break;
}
break;
}
default:
case HLSL_OP2_LESS:
{
struct vkd3d_string_buffer *string;
const struct hlsl_type *src_type = arg1->data_type;
if ((string = hlsl_type_to_string(ctx, expr->node.data_type)))
hlsl_fixme(ctx, &expr->node.loc, "SM4 %s expression.", string->buffer);
hlsl_release_string_buffer(ctx, string);
assert(dst_type->base_type == HLSL_TYPE_BOOL);
switch (src_type->base_type)
{
case HLSL_TYPE_FLOAT:
write_sm4_binary_op(buffer, VKD3D_SM4_OP_LT, &expr->node, arg1, arg2);
break;
case HLSL_TYPE_INT:
write_sm4_binary_op(buffer, VKD3D_SM4_OP_ILT, &expr->node, arg1, arg2);
break;
case HLSL_TYPE_BOOL:
case HLSL_TYPE_UINT:
write_sm4_binary_op(buffer, VKD3D_SM4_OP_ULT, &expr->node, arg1, arg2);
break;
default:
hlsl_fixme(ctx, &expr->node.loc, "SM4 less-than between \"%s\" operands.",
debug_hlsl_type(ctx, src_type));
break;
}
break;
}
case HLSL_OP2_LSHIFT:
assert(type_is_integer(dst_type));
assert(dst_type->base_type != HLSL_TYPE_BOOL);
write_sm4_binary_op(buffer, VKD3D_SM4_OP_ISHL, &expr->node, arg1, arg2);
break;
case HLSL_OP2_MAX:
switch (dst_type->base_type)
{
case HLSL_TYPE_FLOAT:
write_sm4_binary_op(buffer, VKD3D_SM4_OP_MAX, &expr->node, arg1, arg2);
break;
case HLSL_TYPE_INT:
write_sm4_binary_op(buffer, VKD3D_SM4_OP_IMAX, &expr->node, arg1, arg2);
break;
case HLSL_TYPE_UINT:
write_sm4_binary_op(buffer, VKD3D_SM4_OP_UMAX, &expr->node, arg1, arg2);
break;
default:
hlsl_fixme(ctx, &expr->node.loc, "SM4 %s maximum expression.", dst_type_string->buffer);
}
break;
case HLSL_OP2_MIN:
switch (dst_type->base_type)
{
case HLSL_TYPE_FLOAT:
write_sm4_binary_op(buffer, VKD3D_SM4_OP_MIN, &expr->node, arg1, arg2);
break;
case HLSL_TYPE_INT:
write_sm4_binary_op(buffer, VKD3D_SM4_OP_IMIN, &expr->node, arg1, arg2);
break;
case HLSL_TYPE_UINT:
write_sm4_binary_op(buffer, VKD3D_SM4_OP_UMIN, &expr->node, arg1, arg2);
break;
default:
hlsl_fixme(ctx, &expr->node.loc, "SM4 %s minimum expression.", dst_type_string->buffer);
}
break;
case HLSL_OP2_MOD:
switch (dst_type->base_type)
{
case HLSL_TYPE_UINT:
write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_UDIV, &expr->node, 1, arg1, arg2);
break;
default:
hlsl_fixme(ctx, &expr->node.loc, "SM4 %s modulus expression.", dst_type_string->buffer);
}
break;
case HLSL_OP2_MUL:
switch (dst_type->base_type)
{
case HLSL_TYPE_FLOAT:
write_sm4_binary_op(buffer, VKD3D_SM4_OP_MUL, &expr->node, arg1, arg2);
break;
case HLSL_TYPE_INT:
case HLSL_TYPE_UINT:
/* Using IMUL instead of UMUL because we're taking the low
* bits, and the native compiler generates IMUL. */
write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_IMUL, &expr->node, 1, arg1, arg2);
break;
default:
hlsl_fixme(ctx, &expr->node.loc, "SM4 %s multiplication expression.", dst_type_string->buffer);
}
break;
case HLSL_OP2_NEQUAL:
{
const struct hlsl_type *src_type = arg1->data_type;
assert(dst_type->base_type == HLSL_TYPE_BOOL);
switch (src_type->base_type)
{
case HLSL_TYPE_FLOAT:
write_sm4_binary_op(buffer, VKD3D_SM4_OP_NE, &expr->node, arg1, arg2);
break;
case HLSL_TYPE_BOOL:
case HLSL_TYPE_INT:
case HLSL_TYPE_UINT:
write_sm4_binary_op(buffer, VKD3D_SM4_OP_INE, &expr->node, arg1, arg2);
break;
default:
hlsl_fixme(ctx, &expr->node.loc, "SM4 inequality between \"%s\" operands.",
debug_hlsl_type(ctx, src_type));
break;
}
break;
}
case HLSL_OP2_RSHIFT:
assert(type_is_integer(dst_type));
assert(dst_type->base_type != HLSL_TYPE_BOOL);
write_sm4_binary_op(buffer, dst_type->base_type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR,
&expr->node, arg1, arg2);
break;
default:
hlsl_fixme(ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op));
}
hlsl_release_string_buffer(ctx, dst_type_string);
}
static void write_sm4_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_if *iff)
@ -1778,6 +1905,7 @@ static void write_sm4_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf
}
instr.opcode = VKD3D_SM4_OP_ENDIF;
instr.src_count = 0;
write_sm4_instruction(buffer, &instr);
}
@ -1919,12 +2047,6 @@ static void write_sm4_store(struct hlsl_ctx *ctx,
struct sm4_instruction instr;
unsigned int writemask;
if (store->lhs.var->data_type->type == HLSL_CLASS_MATRIX)
{
hlsl_fixme(ctx, &store->node.loc, "Store to a matrix variable.\n");
return;
}
memset(&instr, 0, sizeof(instr));
instr.opcode = VKD3D_SM4_OP_MOV;
@ -1969,12 +2091,12 @@ static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *
{
if (instr->data_type->type == HLSL_CLASS_MATRIX)
{
FIXME("Matrix operations need to be lowered.\n");
hlsl_fixme(ctx, &instr->loc, "Matrix operations need to be lowered.");
break;
}
else if (instr->data_type->type == HLSL_CLASS_OBJECT)
{
hlsl_fixme(ctx, &instr->loc, "Object copy.\n");
hlsl_fixme(ctx, &instr->loc, "Object copy.");
break;
}

View file

@ -579,6 +579,9 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner)
if (ctx->current_directive)
return return_token(token, lval, text);
if (isspace(text[0]))
vkd3d_string_buffer_printf(&ctx->buffer, "%s", text);
else
vkd3d_string_buffer_printf(&ctx->buffer, "%s ", text);
break;
}

View file

@ -165,38 +165,6 @@ static bool preproc_push_if(struct preproc_ctx *ctx, bool condition)
return true;
}
static int char_to_int(char c)
{
if ('0' <= c && c <= '9')
return c - '0';
if ('A' <= c && c <= 'F')
return c - 'A' + 10;
if ('a' <= c && c <= 'f')
return c - 'a' + 10;
return -1;
}
static uint32_t preproc_parse_integer(const char *s)
{
uint32_t base = 10, ret = 0;
int digit;
if (*s == '0')
{
base = 8;
++s;
if (*s == 'x' || *s == 'X')
{
base = 16;
++s;
}
}
while ((digit = char_to_int(*s++)) >= 0)
ret = ret * base + (uint32_t)digit;
return ret;
}
static int default_open_include(const char *filename, bool local,
const char *parent_data, void *context, struct vkd3d_shader_code *out)
{
@ -691,7 +659,7 @@ directive
primary_expr
: T_INTEGER
{
$$ = preproc_parse_integer($1);
$$ = vkd3d_parse_integer($1);
vkd3d_free($1);
}
| T_IDENTIFIER

View file

@ -350,6 +350,7 @@ enum vkd3d_sm4_opcode
VKD3D_SM5_OP_DDIV = 0xd2,
VKD3D_SM5_OP_DFMA = 0xd3,
VKD3D_SM5_OP_DRCP = 0xd4,
VKD3D_SM5_OP_MSAD = 0xd5,
VKD3D_SM5_OP_DTOI = 0xd6,
VKD3D_SM5_OP_DTOU = 0xd7,
VKD3D_SM5_OP_ITOD = 0xd8,

View file

@ -1,6 +1,6 @@
/*
* Copyright 2017 Józef Kucia for CodeWeavers
* Copyright 2021 Conor McCarthy for Codeweavers
* Copyright 2021 Conor McCarthy for CodeWeavers
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@ -206,7 +206,7 @@ static enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval(enum vkd3d_
#define VKD3D_SPIRV_VERSION 0x00010000
#define VKD3D_SPIRV_GENERATOR_ID 18
#define VKD3D_SPIRV_GENERATOR_VERSION 3
#define VKD3D_SPIRV_GENERATOR_VERSION 4
#define VKD3D_SPIRV_GENERATOR_MAGIC vkd3d_make_u32(VKD3D_SPIRV_GENERATOR_VERSION, VKD3D_SPIRV_GENERATOR_ID)
struct vkd3d_spirv_stream
@ -2376,6 +2376,8 @@ struct vkd3d_dxbc_compiler *vkd3d_dxbc_compiler_create(const struct vkd3d_shader
default:
WARN("Ignoring unrecognised option %#x with value %#x.\n", option->name, option->value);
case VKD3D_SHADER_COMPILE_OPTION_API_VERSION:
break;
}
}
@ -7425,7 +7427,7 @@ static void vkd3d_dxbc_compiler_emit_f16tof32(struct vkd3d_dxbc_compiler *compil
type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, 2);
scalar_type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, 1);
/* FIXME: Consider a single UnpackHalf2x16 intruction per 2 components. */
/* FIXME: Consider a single UnpackHalf2x16 instruction per 2 components. */
assert(dst->write_mask & VKD3DSP_WRITEMASK_ALL);
for (i = 0, j = 0; i < VKD3D_VEC4_SIZE; ++i)
{
@ -7459,7 +7461,7 @@ static void vkd3d_dxbc_compiler_emit_f32tof16(struct vkd3d_dxbc_compiler *compil
scalar_type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1);
zero_id = vkd3d_dxbc_compiler_get_constant_float(compiler, 0.0f);
/* FIXME: Consider a single PackHalf2x16 intruction per 2 components. */
/* FIXME: Consider a single PackHalf2x16 instruction per 2 components. */
assert(dst->write_mask & VKD3DSP_WRITEMASK_ALL);
for (i = 0, j = 0; i < VKD3D_VEC4_SIZE; ++i)
{
@ -7802,7 +7804,7 @@ static int vkd3d_dxbc_compiler_emit_control_flow_instruction(struct vkd3d_dxbc_c
/* The OpSwitch instruction is inserted when the endswitch
* instruction is processed because we do not know the number
* of case statments in advance.*/
* of case statements in advance.*/
vkd3d_spirv_begin_function_stream_insertion(builder, cf_info->u.switch_.stream_location);
vkd3d_spirv_build_op_switch(builder, cf_info->u.switch_.selector_id,
cf_info->u.switch_.default_block_id, cf_info->u.switch_.case_blocks,
@ -8336,10 +8338,10 @@ static void vkd3d_dxbc_compiler_emit_sample(struct vkd3d_dxbc_compiler *compiler
static void vkd3d_dxbc_compiler_emit_sample_c(struct vkd3d_dxbc_compiler *compiler,
const struct vkd3d_shader_instruction *instruction)
{
uint32_t sampled_type_id, coordinate_id, dref_id, val_id, type_id;
struct vkd3d_spirv_builder *builder = &compiler->spirv_builder;
const struct vkd3d_shader_dst_param *dst = instruction->dst;
const struct vkd3d_shader_src_param *src = instruction->src;
uint32_t sampled_type_id, coordinate_id, dref_id, val_id;
SpvImageOperandsMask operands_mask = 0;
unsigned int image_operand_count = 0;
struct vkd3d_shader_image image;
@ -8371,10 +8373,6 @@ static void vkd3d_dxbc_compiler_emit_sample_c(struct vkd3d_dxbc_compiler *compil
sampled_type_id = vkd3d_spirv_get_type_id(builder, image.sampled_type, 1);
coordinate_id = vkd3d_dxbc_compiler_emit_load_src(compiler, &src[0], VKD3DSP_WRITEMASK_ALL);
dref_id = vkd3d_dxbc_compiler_emit_load_src(compiler, &src[3], VKD3DSP_WRITEMASK_0);
/* XXX: Nvidia is broken and expects that the D_ref is packed together with coordinates. */
type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE);
coordinate_id = vkd3d_spirv_build_op_composite_insert1(builder,
type_id, dref_id, coordinate_id, image.resource_type_info->coordinate_component_count);
val_id = vkd3d_spirv_build_op_image_sample_dref(builder, op, sampled_type_id,
image.sampled_image_id, coordinate_id, dref_id, operands_mask,
image_operands, image_operand_count);

View file

@ -225,6 +225,7 @@ static const char * const shader_opcode_names[] =
[VKD3DSIH_MOV ] = "mov",
[VKD3DSIH_MOVA ] = "mova",
[VKD3DSIH_MOVC ] = "movc",
[VKD3DSIH_MSAD ] = "msad",
[VKD3DSIH_MUL ] = "mul",
[VKD3DSIH_NE ] = "ne",
[VKD3DSIH_NOP ] = "nop",

View file

@ -22,6 +22,38 @@
#include <stdio.h>
#include <math.h>
static inline int char_to_int(char c)
{
if ('0' <= c && c <= '9')
return c - '0';
if ('A' <= c && c <= 'F')
return c - 'A' + 10;
if ('a' <= c && c <= 'f')
return c - 'a' + 10;
return -1;
}
uint32_t vkd3d_parse_integer(const char *s)
{
uint32_t base = 10, ret = 0;
int digit;
if (*s == '0')
{
base = 8;
++s;
if (*s == 'x' || *s == 'X')
{
base = 16;
++s;
}
}
while ((digit = char_to_int(*s++)) >= 0)
ret = ret * base + (uint32_t)digit;
return ret;
}
void vkd3d_string_buffer_init(struct vkd3d_string_buffer *buffer)
{
buffer->buffer_size = 16;
@ -1516,3 +1548,8 @@ int vkd3d_shader_preprocess(const struct vkd3d_shader_compile_info *compile_info
vkd3d_shader_message_context_cleanup(&message_context);
return ret;
}
void vkd3d_shader_set_log_callback(PFN_vkd3d_log callback)
{
vkd3d_dbg_set_log_callback(callback);
}

View file

@ -117,8 +117,10 @@ enum vkd3d_shader_error
VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET = 5018,
VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS = 5019,
VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE = 5020,
VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO = 5021,
VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300,
VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301,
VKD3D_SHADER_ERROR_GLSL_INTERNAL = 6000,
@ -328,6 +330,7 @@ enum vkd3d_shader_opcode
VKD3DSIH_MOV,
VKD3DSIH_MOVA,
VKD3DSIH_MOVC,
VKD3DSIH_MSAD,
VKD3DSIH_MUL,
VKD3DSIH_NE,
VKD3DSIH_NOP,
@ -1032,6 +1035,8 @@ static inline size_t bytecode_get_size(struct vkd3d_bytecode_buffer *buffer)
return buffer->size;
}
uint32_t vkd3d_parse_integer(const char *s);
struct vkd3d_shader_message_context
{
enum vkd3d_shader_log_level log_level;

File diff suppressed because it is too large Load diff

View file

@ -19,51 +19,6 @@
#include "vkd3d_private.h"
#include "vkd3d_version.h"
#ifdef HAVE_DLFCN_H
#include <dlfcn.h>
static void *vkd3d_dlopen(const char *name)
{
return dlopen(name, RTLD_NOW);
}
static void *vkd3d_dlsym(void *handle, const char *symbol)
{
return dlsym(handle, symbol);
}
static int vkd3d_dlclose(void *handle)
{
return dlclose(handle);
}
static const char *vkd3d_dlerror(void)
{
return dlerror();
}
#else
static void *vkd3d_dlopen(const char *name)
{
FIXME("Not implemented for this platform.\n");
return NULL;
}
static void *vkd3d_dlsym(void *handle, const char *symbol)
{
return NULL;
}
static int vkd3d_dlclose(void *handle)
{
return 0;
}
static const char *vkd3d_dlerror(void)
{
return "Not implemented for this platform.\n";
}
#endif
struct vkd3d_struct
{
enum vkd3d_structure_type type;
@ -129,12 +84,14 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] =
VK_EXTENSION(KHR_MAINTENANCE3, KHR_maintenance3),
VK_EXTENSION(KHR_PUSH_DESCRIPTOR, KHR_push_descriptor),
VK_EXTENSION(KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE, KHR_sampler_mirror_clamp_to_edge),
VK_EXTENSION(KHR_TIMELINE_SEMAPHORE, KHR_timeline_semaphore),
/* EXT extensions */
VK_EXTENSION(EXT_CALIBRATED_TIMESTAMPS, EXT_calibrated_timestamps),
VK_EXTENSION(EXT_CONDITIONAL_RENDERING, EXT_conditional_rendering),
VK_EXTENSION(EXT_DEBUG_MARKER, EXT_debug_marker),
VK_EXTENSION(EXT_DEPTH_CLIP_ENABLE, EXT_depth_clip_enable),
VK_EXTENSION(EXT_DESCRIPTOR_INDEXING, EXT_descriptor_indexing),
VK_EXTENSION(EXT_ROBUSTNESS_2, EXT_robustness2),
VK_EXTENSION(EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION, EXT_shader_demote_to_helper_invocation),
VK_EXTENSION(EXT_SHADER_STENCIL_EXPORT, EXT_shader_stencil_export),
VK_EXTENSION(EXT_TEXEL_BUFFER_ALIGNMENT, EXT_texel_buffer_alignment),
@ -142,6 +99,112 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] =
VK_EXTENSION(EXT_VERTEX_ATTRIBUTE_DIVISOR, EXT_vertex_attribute_divisor),
};
static HRESULT vkd3d_create_vk_descriptor_heap_layout(struct d3d12_device *device, unsigned int index)
{
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
VkDescriptorSetLayoutBindingFlagsCreateInfoEXT flags_info;
VkDescriptorSetLayoutCreateInfo set_desc;
VkDescriptorBindingFlagsEXT set_flags;
VkDescriptorSetLayoutBinding binding;
VkResult vr;
binding.binding = 0;
binding.descriptorType = device->vk_descriptor_heap_layouts[index].type;
binding.descriptorCount = device->vk_descriptor_heap_layouts[index].count;
binding.stageFlags = VK_SHADER_STAGE_ALL;
binding.pImmutableSamplers = NULL;
set_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
set_desc.pNext = &flags_info;
set_desc.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT_EXT;
set_desc.bindingCount = 1;
set_desc.pBindings = &binding;
set_flags = VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT_EXT
| VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT_EXT | VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT_EXT
| VK_DESCRIPTOR_BINDING_UPDATE_UNUSED_WHILE_PENDING_BIT_EXT;
flags_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO_EXT;
flags_info.pNext = NULL;
flags_info.bindingCount = 1;
flags_info.pBindingFlags = &set_flags;
if ((vr = VK_CALL(vkCreateDescriptorSetLayout(device->vk_device, &set_desc, NULL,
&device->vk_descriptor_heap_layouts[index].vk_set_layout))) < 0)
{
WARN("Failed to create Vulkan descriptor set layout, vr %d.\n", vr);
return hresult_from_vk_result(vr);
}
return S_OK;
}
static void vkd3d_vk_descriptor_heap_layouts_cleanup(struct d3d12_device *device)
{
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
enum vkd3d_vk_descriptor_set_index set;
for (set = 0; set < ARRAY_SIZE(device->vk_descriptor_heap_layouts); ++set)
VK_CALL(vkDestroyDescriptorSetLayout(device->vk_device, device->vk_descriptor_heap_layouts[set].vk_set_layout,
NULL));
}
static HRESULT vkd3d_vk_descriptor_heap_layouts_init(struct d3d12_device *device)
{
static const struct vkd3d_vk_descriptor_heap_layout vk_descriptor_heap_layouts[VKD3D_SET_INDEX_COUNT] =
{
{VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV},
{VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV},
{VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, false, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV},
{VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV},
{VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, false, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV},
{VK_DESCRIPTOR_TYPE_SAMPLER, false, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER},
/* UAV counters */
{VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV},
};
const struct vkd3d_device_descriptor_limits *limits = &device->vk_info.descriptor_limits;
enum vkd3d_vk_descriptor_set_index set;
HRESULT hr;
for (set = 0; set < ARRAY_SIZE(device->vk_descriptor_heap_layouts); ++set)
device->vk_descriptor_heap_layouts[set] = vk_descriptor_heap_layouts[set];
if (!device->use_vk_heaps)
return S_OK;
for (set = 0; set < ARRAY_SIZE(device->vk_descriptor_heap_layouts); ++set)
{
switch (device->vk_descriptor_heap_layouts[set].type)
{
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
device->vk_descriptor_heap_layouts[set].count = limits->uniform_buffer_max_descriptors;
break;
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
device->vk_descriptor_heap_layouts[set].count = limits->sampled_image_max_descriptors;
break;
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
device->vk_descriptor_heap_layouts[set].count = limits->storage_image_max_descriptors;
break;
case VK_DESCRIPTOR_TYPE_SAMPLER:
device->vk_descriptor_heap_layouts[set].count = limits->sampler_max_descriptors;
break;
default:
ERR("Unhandled descriptor type %#x.\n", device->vk_descriptor_heap_layouts[set].type);
break;
}
if (FAILED(hr = vkd3d_create_vk_descriptor_heap_layout(device, set)))
{
vkd3d_vk_descriptor_heap_layouts_cleanup(device);
return hr;
}
}
return S_OK;
}
static unsigned int get_spec_version(const VkExtensionProperties *extensions,
unsigned int count, const char *extension_name)
{
@ -431,6 +494,7 @@ static void vkd3d_init_debug_report(struct vkd3d_instance *instance)
static const struct vkd3d_debug_option vkd3d_config_options[] =
{
{"virtual_heaps", VKD3D_CONFIG_FLAG_VIRTUAL_HEAPS}, /* always use virtual descriptor heaps */
{"vk_debug", VKD3D_CONFIG_FLAG_VULKAN_DEBUG}, /* enable Vulkan debug extensions */
};
@ -690,10 +754,12 @@ struct vkd3d_physical_device_info
VkPhysicalDeviceConditionalRenderingFeaturesEXT conditional_rendering_features;
VkPhysicalDeviceDepthClipEnableFeaturesEXT depth_clip_features;
VkPhysicalDeviceDescriptorIndexingFeaturesEXT descriptor_indexing_features;
VkPhysicalDeviceRobustness2FeaturesEXT robustness2_features;
VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote_features;
VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT texel_buffer_alignment_features;
VkPhysicalDeviceTransformFeedbackFeaturesEXT xfb_features;
VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT vertex_divisor_features;
VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore_features;
VkPhysicalDeviceFeatures2 features2;
};
@ -706,9 +772,11 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i
VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *vertex_divisor_properties;
VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *buffer_alignment_properties;
VkPhysicalDeviceDescriptorIndexingFeaturesEXT *descriptor_indexing_features;
VkPhysicalDeviceRobustness2FeaturesEXT *robustness2_features;
VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *vertex_divisor_features;
VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *buffer_alignment_features;
VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *demote_features;
VkPhysicalDeviceTimelineSemaphoreFeaturesKHR *timeline_semaphore_features;
VkPhysicalDeviceDepthClipEnableFeaturesEXT *depth_clip_features;
VkPhysicalDeviceMaintenance3Properties *maintenance3_properties;
VkPhysicalDeviceTransformFeedbackPropertiesEXT *xfb_properties;
@ -720,6 +788,7 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i
conditional_rendering_features = &info->conditional_rendering_features;
depth_clip_features = &info->depth_clip_features;
descriptor_indexing_features = &info->descriptor_indexing_features;
robustness2_features = &info->robustness2_features;
descriptor_indexing_properties = &info->descriptor_indexing_properties;
maintenance3_properties = &info->maintenance3_properties;
demote_features = &info->demote_features;
@ -727,6 +796,7 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i
buffer_alignment_properties = &info->texel_buffer_alignment_properties;
vertex_divisor_features = &info->vertex_divisor_features;
vertex_divisor_properties = &info->vertex_divisor_properties;
timeline_semaphore_features = &info->timeline_semaphore_features;
xfb_features = &info->xfb_features;
xfb_properties = &info->xfb_properties;
@ -738,6 +808,8 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i
vk_prepend_struct(&info->features2, depth_clip_features);
descriptor_indexing_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT;
vk_prepend_struct(&info->features2, descriptor_indexing_features);
robustness2_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT;
vk_prepend_struct(&info->features2, robustness2_features);
demote_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT;
vk_prepend_struct(&info->features2, demote_features);
buffer_alignment_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT;
@ -746,6 +818,8 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i
vk_prepend_struct(&info->features2, xfb_features);
vertex_divisor_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT;
vk_prepend_struct(&info->features2, vertex_divisor_features);
timeline_semaphore_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR;
vk_prepend_struct(&info->features2, timeline_semaphore_features);
if (vulkan_info->KHR_get_physical_device_properties2)
VK_CALL(vkGetPhysicalDeviceFeatures2KHR(physical_device, &info->features2));
@ -1287,6 +1361,36 @@ static void vkd3d_device_descriptor_limits_init(struct vkd3d_device_descriptor_l
limits->sampler_max_descriptors = min(device_limits->maxDescriptorSetSamplers, VKD3D_MAX_DESCRIPTOR_SET_SAMPLERS);
}
static void vkd3d_device_vk_heaps_descriptor_limits_init(struct vkd3d_device_descriptor_limits *limits,
const VkPhysicalDeviceDescriptorIndexingPropertiesEXT *properties)
{
const unsigned int root_provision = D3D12_MAX_ROOT_COST / 2;
unsigned int srv_divisor = 1, uav_divisor = 1;
/* The total number of populated sampled image or storage image descriptors never exceeds the size of
* one set (or two sets if every UAV has a counter), but the total size of bound layouts will exceed
* device limits if each set size is maxDescriptorSet*, because of the D3D12 buffer + image allowance
* (and UAV counters). Breaking limits for layouts seems to work with RADV and Nvidia drivers at
* least, but let's try to stay within them if limits are high enough. */
if (properties->maxDescriptorSetUpdateAfterBindSampledImages >= (1u << 21))
{
srv_divisor = 2;
uav_divisor = properties->maxDescriptorSetUpdateAfterBindSampledImages >= (3u << 20) ? 3 : 2;
}
limits->uniform_buffer_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindUniformBuffers,
properties->maxPerStageDescriptorUpdateAfterBindUniformBuffers - root_provision);
limits->sampled_image_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindSampledImages,
properties->maxPerStageDescriptorUpdateAfterBindSampledImages / srv_divisor - root_provision);
limits->storage_buffer_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindStorageBuffers,
properties->maxPerStageDescriptorUpdateAfterBindStorageBuffers - root_provision);
limits->storage_image_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindStorageImages,
properties->maxPerStageDescriptorUpdateAfterBindStorageImages / uav_divisor - root_provision);
limits->sampler_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindSamplers,
properties->maxPerStageDescriptorUpdateAfterBindSamplers - root_provision);
limits->sampler_max_descriptors = min(limits->sampler_max_descriptors, VKD3D_MAX_DESCRIPTOR_SET_SAMPLERS);
}
static HRESULT vkd3d_init_device_caps(struct d3d12_device *device,
const struct vkd3d_device_create_info *create_info,
struct vkd3d_physical_device_info *physical_device_info,
@ -1440,10 +1544,14 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device,
vulkan_info->EXT_conditional_rendering = false;
if (!physical_device_info->depth_clip_features.depthClipEnable)
vulkan_info->EXT_depth_clip_enable = false;
if (!physical_device_info->robustness2_features.nullDescriptor)
vulkan_info->EXT_robustness2 = false;
if (!physical_device_info->demote_features.shaderDemoteToHelperInvocation)
vulkan_info->EXT_shader_demote_to_helper_invocation = false;
if (!physical_device_info->texel_buffer_alignment_features.texelBufferAlignment)
vulkan_info->EXT_texel_buffer_alignment = false;
if (!physical_device_info->timeline_semaphore_features.timelineSemaphore)
vulkan_info->KHR_timeline_semaphore = false;
vulkan_info->texel_buffer_alignment_properties = physical_device_info->texel_buffer_alignment_properties;
@ -1492,8 +1600,6 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device,
features->shaderTessellationAndGeometryPointSize = VK_FALSE;
descriptor_indexing = &physical_device_info->descriptor_indexing_features;
if (descriptor_indexing)
{
descriptor_indexing->shaderInputAttachmentArrayDynamicIndexing = VK_FALSE;
descriptor_indexing->shaderInputAttachmentArrayNonUniformIndexing = VK_FALSE;
@ -1501,7 +1607,6 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device,
features->shaderStorageBufferArrayDynamicIndexing = VK_FALSE;
descriptor_indexing->shaderStorageBufferArrayNonUniformIndexing = VK_FALSE;
descriptor_indexing->descriptorBindingStorageBufferUpdateAfterBind = VK_FALSE;
}
if (vulkan_info->EXT_descriptor_indexing && descriptor_indexing
&& (descriptor_indexing->descriptorBindingUniformBufferUpdateAfterBind
@ -1514,6 +1619,23 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device,
features->robustBufferAccess = VK_FALSE;
}
/* Select descriptor heap implementation. Forcing virtual heaps may be useful if
* a client allocates descriptor heaps too large for the Vulkan device, or the
* root signature cost exceeds the available push constant size. Virtual heaps
* use only enough descriptors for the descriptor tables of the currently bound
* root signature, and don't require a 32-bit push constant for each table. */
device->use_vk_heaps = vulkan_info->EXT_descriptor_indexing
&& !(device->vkd3d_instance->config_flags & VKD3D_CONFIG_FLAG_VIRTUAL_HEAPS)
&& descriptor_indexing->descriptorBindingUniformBufferUpdateAfterBind
&& descriptor_indexing->descriptorBindingSampledImageUpdateAfterBind
&& descriptor_indexing->descriptorBindingStorageImageUpdateAfterBind
&& descriptor_indexing->descriptorBindingUniformTexelBufferUpdateAfterBind
&& descriptor_indexing->descriptorBindingStorageTexelBufferUpdateAfterBind;
if (device->use_vk_heaps)
vkd3d_device_vk_heaps_descriptor_limits_init(&vulkan_info->descriptor_limits,
&physical_device_info->descriptor_indexing_properties);
else
vkd3d_device_descriptor_limits_init(&vulkan_info->descriptor_limits,
&physical_device_info->properties2.properties.limits);
@ -2504,12 +2626,12 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface)
vkd3d_private_store_destroy(&device->private_store);
vkd3d_cleanup_format_info(device);
vkd3d_vk_descriptor_heap_layouts_cleanup(device);
vkd3d_uav_clear_state_cleanup(&device->uav_clear_state, device);
vkd3d_destroy_null_resources(&device->null_resources, device);
vkd3d_gpu_va_allocator_cleanup(&device->gpu_va_allocator);
vkd3d_gpu_descriptor_allocator_cleanup(&device->gpu_descriptor_allocator);
vkd3d_render_pass_cache_cleanup(&device->render_pass_cache, device);
vkd3d_fence_worker_stop(&device->fence_worker, device);
d3d12_device_destroy_pipeline_cache(device);
d3d12_device_destroy_vkd3d_queues(device);
for (i = 0; i < ARRAY_SIZE(device->desc_mutex); ++i)
@ -3406,6 +3528,134 @@ static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device *iface,
d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device);
}
static void flush_desc_writes(struct d3d12_desc_copy_location locations[][VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE],
struct d3d12_desc_copy_info *infos, struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_device *device)
{
enum vkd3d_vk_descriptor_set_index set;
for (set = 0; set < VKD3D_SET_INDEX_COUNT; ++set)
{
if (!infos[set].count)
continue;
d3d12_desc_copy_vk_heap_range(locations[set], &infos[set], descriptor_heap, set, device);
infos[set].count = 0;
infos[set].uav_counter = false;
}
}
static void d3d12_desc_buffered_copy_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src,
struct d3d12_desc_copy_location locations[][VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE],
struct d3d12_desc_copy_info *infos, struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_device *device)
{
struct d3d12_desc_copy_location *location;
enum vkd3d_vk_descriptor_set_index set;
struct vkd3d_mutex *mutex;
mutex = d3d12_device_get_descriptor_mutex(device, src);
vkd3d_mutex_lock(mutex);
if (src->magic == VKD3D_DESCRIPTOR_MAGIC_FREE)
{
/* Source must be unlocked first, and therefore can't be used as a null source. */
static const struct d3d12_desc null = {0};
vkd3d_mutex_unlock(mutex);
d3d12_desc_write_atomic(dst, &null, device);
return;
}
set = vkd3d_vk_descriptor_set_index_from_vk_descriptor_type(src->vk_descriptor_type);
location = &locations[set][infos[set].count++];
location->src = *src;
if (location->src.magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW)
vkd3d_view_incref(location->src.u.view_info.view);
vkd3d_mutex_unlock(mutex);
infos[set].uav_counter |= (location->src.magic == VKD3D_DESCRIPTOR_MAGIC_UAV)
&& !!location->src.u.view_info.view->vk_counter_view;
location->dst = dst;
if (infos[set].count == ARRAY_SIZE(locations[0]))
{
d3d12_desc_copy_vk_heap_range(locations[set], &infos[set], descriptor_heap, set, device);
infos[set].count = 0;
infos[set].uav_counter = false;
}
}
/* Some games, e.g. Control, copy a large number of descriptors per frame, so the
* speed of this function is critical. */
static void d3d12_device_vk_heaps_copy_descriptors(struct d3d12_device *device,
UINT dst_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *dst_descriptor_range_offsets,
const UINT *dst_descriptor_range_sizes,
UINT src_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *src_descriptor_range_offsets,
const UINT *src_descriptor_range_sizes)
{
struct d3d12_desc_copy_location locations[VKD3D_SET_INDEX_COUNT][VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE];
unsigned int dst_range_idx, dst_idx, src_range_idx, src_idx;
/* The locations array is relatively large, and often mostly empty. Keeping these
* values together in a separate array will likely result in fewer cache misses. */
struct d3d12_desc_copy_info infos[VKD3D_SET_INDEX_COUNT];
struct d3d12_descriptor_heap *descriptor_heap = NULL;
const struct d3d12_desc *src, *heap_base, *heap_end;
unsigned int dst_range_size, src_range_size;
struct d3d12_desc *dst;
descriptor_heap = vkd3d_gpu_descriptor_allocator_heap_from_descriptor(&device->gpu_descriptor_allocator,
d3d12_desc_from_cpu_handle(dst_descriptor_range_offsets[0]));
heap_base = (const struct d3d12_desc *)descriptor_heap->descriptors;
heap_end = heap_base + descriptor_heap->desc.NumDescriptors;
memset(infos, 0, sizeof(infos));
dst_range_idx = dst_idx = 0;
src_range_idx = src_idx = 0;
while (dst_range_idx < dst_descriptor_range_count && src_range_idx < src_descriptor_range_count)
{
dst_range_size = dst_descriptor_range_sizes ? dst_descriptor_range_sizes[dst_range_idx] : 1;
src_range_size = src_descriptor_range_sizes ? src_descriptor_range_sizes[src_range_idx] : 1;
dst = d3d12_desc_from_cpu_handle(dst_descriptor_range_offsets[dst_range_idx]);
src = d3d12_desc_from_cpu_handle(src_descriptor_range_offsets[src_range_idx]);
if (dst < heap_base || dst >= heap_end)
{
flush_desc_writes(locations, infos, descriptor_heap, device);
descriptor_heap = vkd3d_gpu_descriptor_allocator_heap_from_descriptor(&device->gpu_descriptor_allocator,
dst);
heap_base = (const struct d3d12_desc *)descriptor_heap->descriptors;
heap_end = heap_base + descriptor_heap->desc.NumDescriptors;
}
for (; dst_idx < dst_range_size && src_idx < src_range_size; src_idx++, dst_idx++)
{
/* We don't need to lock either descriptor for the identity check. The descriptor
* mutex is only intended to prevent use-after-free of the vkd3d_view caused by a
* race condition in the calling app. It is unnecessary to protect this test as it's
* the app's race condition, not ours. */
if (dst[dst_idx].magic == src[src_idx].magic && (dst[dst_idx].magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW)
&& dst[dst_idx].u.view_info.written_serial_id == src[src_idx].u.view_info.view->serial_id)
continue;
d3d12_desc_buffered_copy_atomic(&dst[dst_idx], &src[src_idx], locations, infos, descriptor_heap, device);
}
if (dst_idx >= dst_range_size)
{
++dst_range_idx;
dst_idx = 0;
}
if (src_idx >= src_range_size)
{
++src_range_idx;
src_idx = 0;
}
}
flush_desc_writes(locations, infos, descriptor_heap, device);
}
#define VKD3D_DESCRIPTOR_OPTIMISED_COPY_MIN_COUNT 8
static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface,
UINT dst_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *dst_descriptor_range_offsets,
const UINT *dst_descriptor_range_sizes,
@ -3434,6 +3684,18 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface,
return;
}
if (!dst_descriptor_range_count)
return;
if (device->use_vk_heaps && (dst_descriptor_range_count > 1 || (dst_descriptor_range_sizes
&& dst_descriptor_range_sizes[0] >= VKD3D_DESCRIPTOR_OPTIMISED_COPY_MIN_COUNT)))
{
d3d12_device_vk_heaps_copy_descriptors(device, dst_descriptor_range_count, dst_descriptor_range_offsets,
dst_descriptor_range_sizes, src_descriptor_range_count, src_descriptor_range_offsets,
src_descriptor_range_sizes);
return;
}
dst_range_idx = dst_idx = 0;
src_range_idx = src_idx = 0;
while (dst_range_idx < dst_descriptor_range_count && src_range_idx < src_descriptor_range_count)
@ -3470,6 +3732,17 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(ID3D12Device *i
iface, descriptor_count, dst_descriptor_range_offset.ptr, src_descriptor_range_offset.ptr,
descriptor_heap_type);
if (descriptor_count >= VKD3D_DESCRIPTOR_OPTIMISED_COPY_MIN_COUNT)
{
struct d3d12_device *device = impl_from_ID3D12Device(iface);
if (device->use_vk_heaps)
{
d3d12_device_vk_heaps_copy_descriptors(device, 1, &dst_descriptor_range_offset,
&descriptor_count, 1, &src_descriptor_range_offset, &descriptor_count);
return;
}
}
d3d12_device_CopyDescriptors(iface, 1, &dst_descriptor_range_offset, &descriptor_count,
1, &src_descriptor_range_offset, &descriptor_count, descriptor_heap_type);
}
@ -3736,7 +4009,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device *iface,
struct d3d12_fence *object;
HRESULT hr;
TRACE("iface %p, intial_value %#"PRIx64", flags %#x, riid %s, fence %p.\n",
TRACE("iface %p, initial_value %#"PRIx64", flags %#x, riid %s, fence %p.\n",
iface, initial_value, flags, debugstr_guid(riid), fence);
if (FAILED(hr = d3d12_fence_create(device, initial_value, flags, &object)))
@ -3993,11 +4266,8 @@ static HRESULT d3d12_device_init(struct d3d12_device *device,
if (FAILED(hr = vkd3d_private_store_init(&device->private_store)))
goto out_free_pipeline_cache;
if (FAILED(hr = vkd3d_fence_worker_start(&device->fence_worker, device)))
goto out_free_private_store;
if (FAILED(hr = vkd3d_init_format_info(device)))
goto out_stop_fence_worker;
goto out_free_private_store;
if (FAILED(hr = vkd3d_init_null_resources(&device->null_resources, device)))
goto out_cleanup_format_info;
@ -4005,11 +4275,16 @@ static HRESULT d3d12_device_init(struct d3d12_device *device,
if (FAILED(hr = vkd3d_uav_clear_state_init(&device->uav_clear_state, device)))
goto out_destroy_null_resources;
if (FAILED(hr = vkd3d_vk_descriptor_heap_layouts_init(device)))
goto out_cleanup_uav_clear_state;
vkd3d_render_pass_cache_init(&device->render_pass_cache);
vkd3d_gpu_descriptor_allocator_init(&device->gpu_descriptor_allocator);
vkd3d_gpu_va_allocator_init(&device->gpu_va_allocator);
vkd3d_time_domains_init(device);
device->blocked_queue_count = 0;
for (i = 0; i < ARRAY_SIZE(device->desc_mutex); ++i)
vkd3d_mutex_init(&device->desc_mutex[i]);
@ -4020,12 +4295,12 @@ static HRESULT d3d12_device_init(struct d3d12_device *device,
return S_OK;
out_cleanup_uav_clear_state:
vkd3d_uav_clear_state_cleanup(&device->uav_clear_state, device);
out_destroy_null_resources:
vkd3d_destroy_null_resources(&device->null_resources, device);
out_cleanup_format_info:
vkd3d_cleanup_format_info(device);
out_stop_fence_worker:
vkd3d_fence_worker_stop(&device->fence_worker, device);
out_free_private_store:
vkd3d_private_store_destroy(&device->private_store);
out_free_pipeline_cache:

View file

@ -22,6 +22,8 @@
#define VKD3D_NULL_BUFFER_SIZE 16
#define VKD3D_NULL_VIEW_FORMAT DXGI_FORMAT_R8G8B8A8_UNORM
LONG64 object_global_serial_id;
static inline bool is_cpu_accessible_heap(const D3D12_HEAP_PROPERTIES *properties)
{
if (properties->Type == D3D12_HEAP_TYPE_DEFAULT)
@ -2085,6 +2087,7 @@ static struct vkd3d_view *vkd3d_view_create(enum vkd3d_view_type type)
{
view->refcount = 1;
view->type = type;
view->serial_id = InterlockedIncrement64(&object_global_serial_id);
view->vk_counter_view = VK_NULL_HANDLE;
}
return view;
@ -2128,6 +2131,183 @@ void vkd3d_view_decref(struct vkd3d_view *view, struct d3d12_device *device)
vkd3d_view_destroy(view, device);
}
/* TODO: write null descriptors to all applicable sets (invalid behaviour workaround). */
static void d3d12_descriptor_heap_write_vk_descriptor_range(struct d3d12_descriptor_heap_vk_set *descriptor_set,
struct d3d12_desc_copy_location *locations, unsigned int write_count)
{
unsigned int i, info_index = 0, write_index = 0;
switch (locations[0].src.vk_descriptor_type)
{
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
for (; write_index < write_count; ++write_index)
{
descriptor_set->vk_descriptor_writes[write_index].pBufferInfo = &descriptor_set->vk_buffer_infos[info_index];
for (i = 0; i < descriptor_set->vk_descriptor_writes[write_index].descriptorCount; ++i, ++info_index)
descriptor_set->vk_buffer_infos[info_index] = locations[info_index].src.u.vk_cbv_info;
}
break;
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
for (; write_index < write_count; ++write_index)
{
descriptor_set->vk_descriptor_writes[write_index].pImageInfo = &descriptor_set->vk_image_infos[info_index];
for (i = 0; i < descriptor_set->vk_descriptor_writes[write_index].descriptorCount; ++i, ++info_index)
descriptor_set->vk_image_infos[info_index].imageView = locations[info_index].src.u.view_info.view->u.vk_image_view;
}
break;
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
for (; write_index < write_count; ++write_index)
{
descriptor_set->vk_descriptor_writes[write_index].pTexelBufferView = &descriptor_set->vk_buffer_views[info_index];
for (i = 0; i < descriptor_set->vk_descriptor_writes[write_index].descriptorCount; ++i, ++info_index)
descriptor_set->vk_buffer_views[info_index] = locations[info_index].src.u.view_info.view->u.vk_buffer_view;
}
break;
case VK_DESCRIPTOR_TYPE_SAMPLER:
for (; write_index < write_count; ++write_index)
{
descriptor_set->vk_descriptor_writes[write_index].pImageInfo = &descriptor_set->vk_image_infos[info_index];
for (i = 0; i < descriptor_set->vk_descriptor_writes[write_index].descriptorCount; ++i, ++info_index)
descriptor_set->vk_image_infos[info_index].sampler = locations[info_index].src.u.view_info.view->u.vk_sampler;
}
break;
default:
ERR("Unhandled descriptor type %#x.\n", locations[0].src.vk_descriptor_type);
break;
}
}
static void d3d12_desc_write_vk_heap_null_descriptor(struct d3d12_descriptor_heap *descriptor_heap,
uint32_t dst_array_element, const struct d3d12_device *device)
{
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
struct d3d12_descriptor_heap_vk_set *descriptor_set;
VkBufferView vk_buffer_view = VK_NULL_HANDLE;
enum vkd3d_vk_descriptor_set_index i;
VkDescriptorBufferInfo vk_cbv_info;
vk_cbv_info.buffer = VK_NULL_HANDLE;
vk_cbv_info.offset = 0;
vk_cbv_info.range = VK_WHOLE_SIZE;
/* Binding a shader with the wrong null descriptor type works in Windows.
* To support that here we must write one to all applicable Vulkan sets. */
for (i = VKD3D_SET_INDEX_UNIFORM_BUFFER; i <= VKD3D_SET_INDEX_STORAGE_IMAGE; ++i)
{
descriptor_set = &descriptor_heap->vk_descriptor_sets[i];
descriptor_set->vk_descriptor_writes[0].dstArrayElement = dst_array_element;
descriptor_set->vk_descriptor_writes[0].descriptorCount = 1;
switch (i)
{
case VKD3D_SET_INDEX_UNIFORM_BUFFER:
descriptor_set->vk_descriptor_writes[0].pBufferInfo = &vk_cbv_info;
break;
case VKD3D_SET_INDEX_SAMPLED_IMAGE:
case VKD3D_SET_INDEX_STORAGE_IMAGE:
descriptor_set->vk_image_infos[0].imageView = VK_NULL_HANDLE;
break;
case VKD3D_SET_INDEX_UNIFORM_TEXEL_BUFFER:
case VKD3D_SET_INDEX_STORAGE_TEXEL_BUFFER:
descriptor_set->vk_descriptor_writes[0].pTexelBufferView = &vk_buffer_view;
break;
default:
assert(false);
break;
}
VK_CALL(vkUpdateDescriptorSets(device->vk_device, 1, descriptor_set->vk_descriptor_writes, 0, NULL));
}
}
/* dst and src contain the same data unless another thread overwrites dst. The array index is
* calculated from dst, and src is thread safe. */
static void d3d12_desc_write_vk_heap(const struct d3d12_desc *dst, const struct d3d12_desc *src,
struct d3d12_device *device)
{
struct d3d12_descriptor_heap_vk_set *descriptor_set;
struct d3d12_descriptor_heap *descriptor_heap;
const struct vkd3d_vk_device_procs *vk_procs;
bool is_null = false;
descriptor_heap = vkd3d_gpu_descriptor_allocator_heap_from_descriptor(&device->gpu_descriptor_allocator, dst);
descriptor_set = &descriptor_heap->vk_descriptor_sets[vkd3d_vk_descriptor_set_index_from_vk_descriptor_type(
src->vk_descriptor_type)];
vk_procs = &device->vk_procs;
vkd3d_mutex_lock(&descriptor_heap->vk_sets_mutex);
descriptor_set->vk_descriptor_writes[0].dstArrayElement = dst
- (const struct d3d12_desc *)descriptor_heap->descriptors;
descriptor_set->vk_descriptor_writes[0].descriptorCount = 1;
switch (src->vk_descriptor_type)
{
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
descriptor_set->vk_descriptor_writes[0].pBufferInfo = &src->u.vk_cbv_info;
is_null = !src->u.vk_cbv_info.buffer;
break;
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
is_null = !(descriptor_set->vk_image_infos[0].imageView = src->u.view_info.view->u.vk_image_view);
break;
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
descriptor_set->vk_descriptor_writes[0].pTexelBufferView = &src->u.view_info.view->u.vk_buffer_view;
is_null = !src->u.view_info.view->u.vk_buffer_view;
break;
case VK_DESCRIPTOR_TYPE_SAMPLER:
descriptor_set->vk_image_infos[0].sampler = src->u.view_info.view->u.vk_sampler;
break;
default:
ERR("Unhandled descriptor type %#x.\n", src->vk_descriptor_type);
break;
}
if (is_null && device->vk_info.EXT_robustness2)
{
d3d12_desc_write_vk_heap_null_descriptor(descriptor_heap,
descriptor_set->vk_descriptor_writes[0].dstArrayElement, device);
vkd3d_mutex_unlock(&descriptor_heap->vk_sets_mutex);
return;
}
VK_CALL(vkUpdateDescriptorSets(device->vk_device, 1, descriptor_set->vk_descriptor_writes, 0, NULL));
if (src->magic == VKD3D_DESCRIPTOR_MAGIC_UAV && src->u.view_info.view->vk_counter_view)
{
descriptor_set = &descriptor_heap->vk_descriptor_sets[VKD3D_SET_INDEX_UAV_COUNTER];
descriptor_set->vk_descriptor_writes[0].dstArrayElement = dst
- (const struct d3d12_desc *)descriptor_heap->descriptors;
descriptor_set->vk_descriptor_writes[0].descriptorCount = 1;
descriptor_set->vk_descriptor_writes[0].pTexelBufferView = &src->u.view_info.view->vk_counter_view;
VK_CALL(vkUpdateDescriptorSets(device->vk_device, 1, descriptor_set->vk_descriptor_writes, 0, NULL));
}
vkd3d_mutex_unlock(&descriptor_heap->vk_sets_mutex);
}
static void d3d12_desc_write_atomic_d3d12_only(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device)
{
struct vkd3d_view *defunct_view;
struct vkd3d_mutex *mutex;
mutex = d3d12_device_get_descriptor_mutex(device, dst);
vkd3d_mutex_lock(mutex);
if (!(dst->magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) || InterlockedDecrement(&dst->u.view_info.view->refcount))
{
*dst = *src;
vkd3d_mutex_unlock(mutex);
return;
}
defunct_view = dst->u.view_info.view;
*dst = *src;
vkd3d_mutex_unlock(mutex);
/* Destroy the view after unlocking to reduce wait time. */
vkd3d_view_destroy(defunct_view, device);
}
void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src,
struct d3d12_device *device)
{
@ -2139,8 +2319,8 @@ void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *sr
/* Nothing to do for VKD3D_DESCRIPTOR_MAGIC_CBV. */
if ((dst->magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW)
&& !InterlockedDecrement(&dst->u.view->refcount))
defunct_view = dst->u.view;
&& !InterlockedDecrement(&dst->u.view_info.view->refcount))
defunct_view = dst->u.view_info.view;
*dst = *src;
@ -2149,6 +2329,9 @@ void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *sr
/* Destroy the view after unlocking to reduce wait time. */
if (defunct_view)
vkd3d_view_destroy(defunct_view, device);
if (device->use_vk_heaps && dst->magic)
d3d12_desc_write_vk_heap(dst, src, device);
}
static void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_device *device)
@ -2158,6 +2341,56 @@ static void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_devic
d3d12_desc_write_atomic(descriptor, &null_desc, device);
}
void d3d12_desc_copy_vk_heap_range(struct d3d12_desc_copy_location *locations, const struct d3d12_desc_copy_info *info,
struct d3d12_descriptor_heap *descriptor_heap, enum vkd3d_vk_descriptor_set_index set,
struct d3d12_device *device)
{
struct d3d12_descriptor_heap_vk_set *descriptor_set = &descriptor_heap->vk_descriptor_sets[set];
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
unsigned int i, write_count;
vkd3d_mutex_lock(&descriptor_heap->vk_sets_mutex);
for (i = 0, write_count = 0; i < info->count; ++i)
{
d3d12_desc_write_atomic_d3d12_only(locations[i].dst, &locations[i].src, device);
if (i && locations[i].dst == locations[i - 1].dst + 1)
{
++descriptor_set->vk_descriptor_writes[write_count - 1].descriptorCount;
continue;
}
descriptor_set->vk_descriptor_writes[write_count].dstArrayElement = locations[i].dst
- (const struct d3d12_desc *)descriptor_heap->descriptors;
descriptor_set->vk_descriptor_writes[write_count++].descriptorCount = 1;
}
d3d12_descriptor_heap_write_vk_descriptor_range(descriptor_set, locations, write_count);
/* We could pass a VkCopyDescriptorSet array instead, but that would require also storing a src array index
* for each location, which means querying the src descriptor heap. Contiguous copies require contiguous src
* descriptors as well as dst, which is less likely to occur. And client race conditions may break it. */
VK_CALL(vkUpdateDescriptorSets(device->vk_device, write_count, descriptor_set->vk_descriptor_writes, 0, NULL));
if (!info->uav_counter)
goto done;
descriptor_set = &descriptor_heap->vk_descriptor_sets[VKD3D_SET_INDEX_UAV_COUNTER];
for (i = 0, write_count = 0; i < info->count; ++i)
{
if (!locations[i].src.u.view_info.view->vk_counter_view)
continue;
descriptor_set->vk_buffer_views[write_count] = locations[i].src.u.view_info.view->vk_counter_view;
descriptor_set->vk_descriptor_writes[write_count].pTexelBufferView = &descriptor_set->vk_buffer_views[write_count];
descriptor_set->vk_descriptor_writes[write_count].dstArrayElement = locations[i].dst
- (const struct d3d12_desc *)descriptor_heap->descriptors;
descriptor_set->vk_descriptor_writes[write_count++].descriptorCount = 1;
}
VK_CALL(vkUpdateDescriptorSets(device->vk_device, write_count, descriptor_set->vk_descriptor_writes, 0, NULL));
done:
vkd3d_mutex_unlock(&descriptor_heap->vk_sets_mutex);
}
void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src,
struct d3d12_device *device)
{
@ -2172,7 +2405,7 @@ void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src,
vkd3d_mutex_lock(mutex);
if (src->magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW)
vkd3d_view_incref(src->u.view);
vkd3d_view_incref(src->u.view_info.view);
tmp = *src;
@ -2243,10 +2476,10 @@ bool vkd3d_create_buffer_view(struct d3d12_device *device, VkBuffer vk_buffer, c
VkDeviceSize offset, VkDeviceSize size, struct vkd3d_view **view)
{
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
VkBufferView vk_view = VK_NULL_HANDLE;
struct vkd3d_view *object;
VkBufferView vk_view;
if (!vkd3d_create_vk_buffer_view(device, vk_buffer, format, offset, size, &vk_view))
if (vk_buffer && !vkd3d_create_vk_buffer_view(device, vk_buffer, format, offset, size, &vk_view))
return false;
if (!(object = vkd3d_view_create(VKD3D_VIEW_TYPE_BUFFER)))
@ -2531,10 +2764,12 @@ bool vkd3d_create_texture_view(struct d3d12_device *device, VkImage vk_image,
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
const struct vkd3d_format *format = desc->format;
struct VkImageViewCreateInfo view_desc;
VkImageView vk_view = VK_NULL_HANDLE;
struct vkd3d_view *object;
VkImageView vk_view;
VkResult vr;
if (vk_image)
{
view_desc.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
view_desc.pNext = NULL;
view_desc.flags = 0;
@ -2554,6 +2789,7 @@ bool vkd3d_create_texture_view(struct d3d12_device *device, VkImage vk_image,
WARN("Failed to create Vulkan image view, vr %d.\n", vr);
return false;
}
}
if (!(object = vkd3d_view_create(VKD3D_VIEW_TYPE_IMAGE)))
{
@ -2602,7 +2838,7 @@ void d3d12_desc_create_cbv(struct d3d12_desc *descriptor,
/* NULL descriptor */
buffer_info->buffer = device->null_resources.vk_buffer;
buffer_info->offset = 0;
buffer_info->range = VKD3D_NULL_BUFFER_SIZE;
buffer_info->range = VK_WHOLE_SIZE;
}
descriptor->magic = VKD3D_DESCRIPTOR_MAGIC_CBV;
@ -2635,6 +2871,7 @@ static void vkd3d_create_null_srv(struct d3d12_desc *descriptor,
switch (desc->ViewDimension)
{
case D3D12_SRV_DIMENSION_BUFFER:
if (!device->vk_info.EXT_robustness2)
WARN("Creating NULL buffer SRV %#x.\n", desc->Format);
if (vkd3d_create_buffer_view(device, null_resources->vk_buffer,
@ -2643,7 +2880,8 @@ static void vkd3d_create_null_srv(struct d3d12_desc *descriptor,
{
descriptor->magic = VKD3D_DESCRIPTOR_MAGIC_SRV;
descriptor->vk_descriptor_type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
descriptor->u.view = view;
descriptor->u.view_info.view = view;
descriptor->u.view_info.written_serial_id = view->serial_id;
}
return;
@ -2657,10 +2895,18 @@ static void vkd3d_create_null_srv(struct d3d12_desc *descriptor,
break;
default:
if (device->vk_info.EXT_robustness2)
{
vk_image = VK_NULL_HANDLE;
/* view_type is not used for Vulkan null descriptors, but make it valid. */
vkd3d_desc.view_type = VK_IMAGE_VIEW_TYPE_2D;
break;
}
FIXME("Unhandled view dimension %#x.\n", desc->ViewDimension);
return;
}
if (!device->vk_info.EXT_robustness2)
WARN("Creating NULL SRV %#x.\n", desc->ViewDimension);
vkd3d_desc.format = vkd3d_get_format(device, VKD3D_NULL_VIEW_FORMAT, false);
@ -2679,7 +2925,8 @@ static void vkd3d_create_null_srv(struct d3d12_desc *descriptor,
descriptor->magic = VKD3D_DESCRIPTOR_MAGIC_SRV;
descriptor->vk_descriptor_type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
descriptor->u.view = view;
descriptor->u.view_info.view = view;
descriptor->u.view_info.written_serial_id = view->serial_id;
}
static void vkd3d_create_buffer_srv(struct d3d12_desc *descriptor,
@ -2709,7 +2956,8 @@ static void vkd3d_create_buffer_srv(struct d3d12_desc *descriptor,
descriptor->magic = VKD3D_DESCRIPTOR_MAGIC_SRV;
descriptor->vk_descriptor_type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
descriptor->u.view = view;
descriptor->u.view_info.view = view;
descriptor->u.view_info.written_serial_id = view->serial_id;
}
static VkImageAspectFlags vk_image_aspect_flags_from_d3d12_plane_slice(const struct vkd3d_format *format,
@ -2839,7 +3087,8 @@ void d3d12_desc_create_srv(struct d3d12_desc *descriptor,
descriptor->magic = VKD3D_DESCRIPTOR_MAGIC_SRV;
descriptor->vk_descriptor_type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
descriptor->u.view = view;
descriptor->u.view_info.view = view;
descriptor->u.view_info.written_serial_id = view->serial_id;
}
static unsigned int vkd3d_view_flags_from_d3d12_buffer_uav_flags(D3D12_BUFFER_UAV_FLAGS flags)
@ -2868,6 +3117,7 @@ static void vkd3d_create_null_uav(struct d3d12_desc *descriptor,
switch (desc->ViewDimension)
{
case D3D12_UAV_DIMENSION_BUFFER:
if (!device->vk_info.EXT_robustness2)
WARN("Creating NULL buffer UAV %#x.\n", desc->Format);
if (vkd3d_create_buffer_view(device, null_resources->vk_storage_buffer,
@ -2876,7 +3126,8 @@ static void vkd3d_create_null_uav(struct d3d12_desc *descriptor,
{
descriptor->magic = VKD3D_DESCRIPTOR_MAGIC_UAV;
descriptor->vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
descriptor->u.view = view;
descriptor->u.view_info.view = view;
descriptor->u.view_info.written_serial_id = view->serial_id;
}
return;
@ -2890,10 +3141,18 @@ static void vkd3d_create_null_uav(struct d3d12_desc *descriptor,
break;
default:
if (device->vk_info.EXT_robustness2)
{
vk_image = VK_NULL_HANDLE;
/* view_type is not used for Vulkan null descriptors, but make it valid. */
vkd3d_desc.view_type = VK_IMAGE_VIEW_TYPE_2D;
break;
}
FIXME("Unhandled view dimension %#x.\n", desc->ViewDimension);
return;
}
if (!device->vk_info.EXT_robustness2)
WARN("Creating NULL UAV %#x.\n", desc->ViewDimension);
vkd3d_desc.format = vkd3d_get_format(device, VKD3D_NULL_VIEW_FORMAT, false);
@ -2912,7 +3171,8 @@ static void vkd3d_create_null_uav(struct d3d12_desc *descriptor,
descriptor->magic = VKD3D_DESCRIPTOR_MAGIC_UAV;
descriptor->vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
descriptor->u.view = view;
descriptor->u.view_info.view = view;
descriptor->u.view_info.written_serial_id = view->serial_id;
}
static void vkd3d_create_buffer_uav(struct d3d12_desc *descriptor, struct d3d12_device *device,
@ -2942,7 +3202,8 @@ static void vkd3d_create_buffer_uav(struct d3d12_desc *descriptor, struct d3d12_
descriptor->magic = VKD3D_DESCRIPTOR_MAGIC_UAV;
descriptor->vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
descriptor->u.view = view;
descriptor->u.view_info.view = view;
descriptor->u.view_info.written_serial_id = view->serial_id;
if (counter_resource)
{
@ -3017,7 +3278,8 @@ static void vkd3d_create_texture_uav(struct d3d12_desc *descriptor,
descriptor->magic = VKD3D_DESCRIPTOR_MAGIC_UAV;
descriptor->vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
descriptor->u.view = view;
descriptor->u.view_info.view = view;
descriptor->u.view_info.written_serial_id = view->serial_id;
}
void d3d12_desc_create_uav(struct d3d12_desc *descriptor, struct d3d12_device *device,
@ -3179,7 +3441,8 @@ void d3d12_desc_create_sampler(struct d3d12_desc *sampler,
sampler->magic = VKD3D_DESCRIPTOR_MAGIC_SAMPLER;
sampler->vk_descriptor_type = VK_DESCRIPTOR_TYPE_SAMPLER;
sampler->u.view = view;
sampler->u.view_info.view = view;
sampler->u.view_info.written_serial_id = view->serial_id;
}
HRESULT vkd3d_create_static_sampler(struct d3d12_device *device,
@ -3425,9 +3688,12 @@ static ULONG STDMETHODCALLTYPE d3d12_descriptor_heap_Release(ID3D12DescriptorHea
if (!refcount)
{
const struct vkd3d_vk_device_procs *vk_procs;
struct d3d12_device *device = heap->device;
unsigned int i;
vk_procs = &device->vk_procs;
vkd3d_private_store_destroy(&heap->private_store);
switch (heap->desc.Type)
@ -3474,6 +3740,9 @@ static ULONG STDMETHODCALLTYPE d3d12_descriptor_heap_Release(ID3D12DescriptorHea
break;
}
VK_CALL(vkDestroyDescriptorPool(device->vk_device, heap->vk_descriptor_pool, NULL));
vkd3d_mutex_destroy(&heap->vk_sets_mutex);
vkd3d_free(heap);
d3d12_device_release(device);
@ -3584,6 +3853,152 @@ static const struct ID3D12DescriptorHeapVtbl d3d12_descriptor_heap_vtbl =
d3d12_descriptor_heap_GetGPUDescriptorHandleForHeapStart,
};
const enum vkd3d_vk_descriptor_set_index vk_descriptor_set_index_table[] =
{
VKD3D_SET_INDEX_SAMPLER,
VKD3D_SET_INDEX_COUNT,
VKD3D_SET_INDEX_SAMPLED_IMAGE,
VKD3D_SET_INDEX_STORAGE_IMAGE,
VKD3D_SET_INDEX_UNIFORM_TEXEL_BUFFER,
VKD3D_SET_INDEX_STORAGE_TEXEL_BUFFER,
VKD3D_SET_INDEX_UNIFORM_BUFFER,
};
static HRESULT d3d12_descriptor_heap_create_descriptor_pool(struct d3d12_descriptor_heap *descriptor_heap,
struct d3d12_device *device, const D3D12_DESCRIPTOR_HEAP_DESC *desc)
{
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
VkDescriptorPoolSize pool_sizes[VKD3D_SET_INDEX_COUNT];
struct VkDescriptorPoolCreateInfo pool_desc;
VkDevice vk_device = device->vk_device;
enum vkd3d_vk_descriptor_set_index set;
VkResult vr;
for (set = 0, pool_desc.poolSizeCount = 0; set < ARRAY_SIZE(device->vk_descriptor_heap_layouts); ++set)
{
if (device->vk_descriptor_heap_layouts[set].applicable_heap_type == desc->Type)
{
pool_sizes[pool_desc.poolSizeCount].type = device->vk_descriptor_heap_layouts[set].type;
pool_sizes[pool_desc.poolSizeCount++].descriptorCount = desc->NumDescriptors;
}
}
pool_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
pool_desc.pNext = NULL;
pool_desc.flags = VK_DESCRIPTOR_POOL_CREATE_UPDATE_AFTER_BIND_BIT_EXT;
pool_desc.maxSets = pool_desc.poolSizeCount;
pool_desc.pPoolSizes = pool_sizes;
if ((vr = VK_CALL(vkCreateDescriptorPool(vk_device, &pool_desc, NULL, &descriptor_heap->vk_descriptor_pool))) < 0)
ERR("Failed to create descriptor pool, vr %d.\n", vr);
return hresult_from_vk_result(vr);
}
static HRESULT d3d12_descriptor_heap_create_descriptor_set(struct d3d12_descriptor_heap *descriptor_heap,
struct d3d12_device *device, unsigned int set)
{
struct d3d12_descriptor_heap_vk_set *descriptor_set = &descriptor_heap->vk_descriptor_sets[set];
uint32_t variable_binding_size = descriptor_heap->desc.NumDescriptors;
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
VkDescriptorSetVariableDescriptorCountAllocateInfoEXT set_size;
VkDescriptorSetAllocateInfo set_desc;
unsigned int i;
VkResult vr;
set_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
set_desc.pNext = &set_size;
set_desc.descriptorPool = descriptor_heap->vk_descriptor_pool;
set_desc.descriptorSetCount = 1;
set_desc.pSetLayouts = &device->vk_descriptor_heap_layouts[set].vk_set_layout;
set_size.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_ALLOCATE_INFO_EXT;
set_size.pNext = NULL;
set_size.descriptorSetCount = 1;
set_size.pDescriptorCounts = &variable_binding_size;
if ((vr = VK_CALL(vkAllocateDescriptorSets(device->vk_device, &set_desc, &descriptor_set->vk_set))) >= 0)
{
for (i = 0; i < ARRAY_SIZE(descriptor_set->vk_descriptor_writes); ++i)
descriptor_set->vk_descriptor_writes[i].dstSet = descriptor_set->vk_set;
return S_OK;
}
ERR("Failed to allocate descriptor set, vr %d.\n", vr);
return hresult_from_vk_result(vr);
}
static HRESULT d3d12_descriptor_heap_vk_descriptor_sets_init(struct d3d12_descriptor_heap *descriptor_heap,
struct d3d12_device *device, const D3D12_DESCRIPTOR_HEAP_DESC *desc)
{
enum vkd3d_vk_descriptor_set_index set;
HRESULT hr;
descriptor_heap->vk_descriptor_pool = VK_NULL_HANDLE;
memset(descriptor_heap->vk_descriptor_sets, 0, sizeof(descriptor_heap->vk_descriptor_sets));
vkd3d_mutex_init(&descriptor_heap->vk_sets_mutex);
if (!device->use_vk_heaps || (desc->Type != D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV
&& desc->Type != D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER))
return S_OK;
if (FAILED(hr = d3d12_descriptor_heap_create_descriptor_pool(descriptor_heap, device, desc)))
return hr;
for (set = 0; set < ARRAY_SIZE(descriptor_heap->vk_descriptor_sets); ++set)
{
struct d3d12_descriptor_heap_vk_set *descriptor_set = &descriptor_heap->vk_descriptor_sets[set];
unsigned int i;
for (i = 0; i < ARRAY_SIZE(descriptor_set->vk_descriptor_writes); ++i)
{
descriptor_set->vk_descriptor_writes[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
descriptor_set->vk_descriptor_writes[i].pNext = NULL;
descriptor_set->vk_descriptor_writes[i].dstBinding = 0;
descriptor_set->vk_descriptor_writes[i].descriptorType = device->vk_descriptor_heap_layouts[set].type;
descriptor_set->vk_descriptor_writes[i].pImageInfo = NULL;
descriptor_set->vk_descriptor_writes[i].pBufferInfo = NULL;
descriptor_set->vk_descriptor_writes[i].pTexelBufferView = NULL;
}
switch (device->vk_descriptor_heap_layouts[set].type)
{
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
break;
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
descriptor_set->vk_descriptor_writes[0].pImageInfo = &descriptor_set->vk_image_infos[0];
for (i = 0; i < ARRAY_SIZE(descriptor_set->vk_image_infos); ++i)
{
descriptor_set->vk_image_infos[i].sampler = VK_NULL_HANDLE;
descriptor_set->vk_image_infos[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
}
break;
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
descriptor_set->vk_descriptor_writes[0].pImageInfo = &descriptor_set->vk_image_infos[0];
for (i = 0; i < ARRAY_SIZE(descriptor_set->vk_image_infos); ++i)
{
descriptor_set->vk_image_infos[i].sampler = VK_NULL_HANDLE;
descriptor_set->vk_image_infos[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
}
break;
case VK_DESCRIPTOR_TYPE_SAMPLER:
descriptor_set->vk_descriptor_writes[0].pImageInfo = &descriptor_set->vk_image_infos[0];
for (i = 0; i < ARRAY_SIZE(descriptor_set->vk_image_infos); ++i)
{
descriptor_set->vk_image_infos[i].imageView = VK_NULL_HANDLE;
descriptor_set->vk_image_infos[i].imageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
}
break;
default:
ERR("Unhandled descriptor type %#x.\n", device->vk_descriptor_heap_layouts[set].type);
return E_FAIL;
}
if (device->vk_descriptor_heap_layouts[set].applicable_heap_type == desc->Type
&& FAILED(hr = d3d12_descriptor_heap_create_descriptor_set(descriptor_heap, device, set)))
return hr;
}
return S_OK;
}
static HRESULT d3d12_descriptor_heap_init(struct d3d12_descriptor_heap *descriptor_heap,
struct d3d12_device *device, const D3D12_DESCRIPTOR_HEAP_DESC *desc)
{
@ -3591,12 +4006,15 @@ static HRESULT d3d12_descriptor_heap_init(struct d3d12_descriptor_heap *descript
descriptor_heap->ID3D12DescriptorHeap_iface.lpVtbl = &d3d12_descriptor_heap_vtbl;
descriptor_heap->refcount = 1;
descriptor_heap->serial_id = InterlockedIncrement64(&object_global_serial_id);
descriptor_heap->desc = *desc;
if (FAILED(hr = vkd3d_private_store_init(&descriptor_heap->private_store)))
return hr;
d3d12_descriptor_heap_vk_descriptor_sets_init(descriptor_heap, device, desc);
d3d12_device_add_ref(descriptor_heap->device = device);
return S_OK;
@ -4077,6 +4495,9 @@ HRESULT vkd3d_init_null_resources(struct vkd3d_null_resources *null_resources,
memset(null_resources, 0, sizeof(*null_resources));
if (device->vk_info.EXT_robustness2)
return S_OK;
memset(&heap_properties, 0, sizeof(heap_properties));
heap_properties.Type = D3D12_HEAP_TYPE_DEFAULT;

View file

@ -92,6 +92,8 @@ static void d3d12_root_signature_cleanup(struct d3d12_root_signature *root_signa
if (root_signature->descriptor_mapping)
vkd3d_free(root_signature->descriptor_mapping);
vkd3d_free(root_signature->descriptor_offsets);
vkd3d_free(root_signature->uav_counter_mapping);
vkd3d_free(root_signature->uav_counter_offsets);
if (root_signature->root_constants)
vkd3d_free(root_signature->root_constants);
@ -327,6 +329,7 @@ static bool vk_binding_from_d3d12_descriptor_range(struct VkDescriptorSetLayoutB
struct d3d12_root_signature_info
{
size_t binding_count;
size_t uav_range_count;
size_t root_constant_count;
size_t root_descriptor_count;
@ -401,6 +404,7 @@ static HRESULT d3d12_root_signature_info_count_descriptors(struct d3d12_root_sig
info->binding_count += binding_count;
info->uav_count += count * 2u;
uav_unbounded_range |= unbounded;
++info->uav_range_count;
break;
case D3D12_DESCRIPTOR_RANGE_TYPE_CBV:
info->cbv_count += count;
@ -495,6 +499,7 @@ static HRESULT d3d12_root_signature_init_push_constants(struct d3d12_root_signat
uint32_t *push_constant_range_count)
{
uint32_t push_constants_offset[D3D12_SHADER_VISIBILITY_PIXEL + 1];
bool use_vk_heaps = root_signature->device->use_vk_heaps;
unsigned int i, j, push_constant_count;
uint32_t offset;
@ -507,7 +512,8 @@ static HRESULT d3d12_root_signature_init_push_constants(struct d3d12_root_signat
continue;
assert(p->ShaderVisibility <= D3D12_SHADER_VISIBILITY_PIXEL);
push_constants[p->ShaderVisibility].stageFlags = stage_flags_from_visibility(p->ShaderVisibility);
push_constants[p->ShaderVisibility].stageFlags = use_vk_heaps ? VK_SHADER_STAGE_ALL
: stage_flags_from_visibility(p->ShaderVisibility);
push_constants[p->ShaderVisibility].size += p->u.Constants.Num32BitValues * sizeof(uint32_t);
}
if (push_constants[D3D12_SHADER_VISIBILITY_ALL].size)
@ -586,6 +592,8 @@ struct vkd3d_descriptor_set_context
unsigned int table_index;
unsigned int unbounded_offset;
unsigned int descriptor_index;
unsigned int uav_counter_index;
unsigned int push_constant_index;
uint32_t descriptor_binding;
};
@ -595,6 +603,7 @@ static bool vkd3d_validate_descriptor_set_count(struct d3d12_device *device, uns
if (set_count > max_count)
{
/* NOTE: If maxBoundDescriptorSets is < 9, try VKD3D_CONFIG=virtual_heaps */
ERR("Required descriptor set count exceeds maximum allowed count of %u.\n", max_count);
return false;
}
@ -802,6 +811,122 @@ static void d3d12_root_signature_map_vk_unbounded_binding(struct d3d12_root_sign
offset->dynamic_offset_index = ~0u;
}
static unsigned int vk_heap_binding_count_from_descriptor_range(const struct d3d12_root_descriptor_table_range *range,
unsigned int descriptor_set_size)
{
unsigned int max_count;
if (descriptor_set_size <= range->offset)
{
ERR("Descriptor range offset %u exceeds maximum available offset %u.\n", range->offset, descriptor_set_size - 1);
max_count = 0;
}
else
{
max_count = descriptor_set_size - range->offset;
}
if (range->descriptor_count != UINT_MAX)
{
if (range->descriptor_count > max_count)
ERR("Range size %u exceeds available descriptor count %u.\n", range->descriptor_count, max_count);
return range->descriptor_count;
}
else
{
/* Prefer an unsupported binding count vs a zero count, because shader compilation will fail
* to match a declaration to a zero binding, resulting in failure of pipline state creation. */
return max_count + !max_count;
}
}
static void vkd3d_descriptor_heap_binding_from_descriptor_range(const struct d3d12_root_descriptor_table_range *range,
bool is_buffer, const struct d3d12_root_signature *root_signature,
struct vkd3d_shader_descriptor_binding *binding)
{
const struct vkd3d_device_descriptor_limits *descriptor_limits = &root_signature->device->vk_info.descriptor_limits;
unsigned int descriptor_set_size;
switch (range->type)
{
case VKD3D_SHADER_DESCRIPTOR_TYPE_SRV:
binding->set = is_buffer ? VKD3D_SET_INDEX_UNIFORM_TEXEL_BUFFER : VKD3D_SET_INDEX_SAMPLED_IMAGE;
descriptor_set_size = descriptor_limits->sampled_image_max_descriptors;
break;
case VKD3D_SHADER_DESCRIPTOR_TYPE_UAV:
binding->set = is_buffer ? VKD3D_SET_INDEX_STORAGE_TEXEL_BUFFER : VKD3D_SET_INDEX_STORAGE_IMAGE;
descriptor_set_size = descriptor_limits->storage_image_max_descriptors;
break;
case VKD3D_SHADER_DESCRIPTOR_TYPE_CBV:
binding->set = VKD3D_SET_INDEX_UNIFORM_BUFFER;
descriptor_set_size = descriptor_limits->uniform_buffer_max_descriptors;
break;
case VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER:
binding->set = VKD3D_SET_INDEX_SAMPLER;
descriptor_set_size = descriptor_limits->sampler_max_descriptors;
break;
default:
FIXME("Unhandled descriptor range type type %#x.\n", range->type);
binding->set = VKD3D_SET_INDEX_SAMPLED_IMAGE;
descriptor_set_size = descriptor_limits->sampled_image_max_descriptors;
break;
}
binding->set += root_signature->vk_set_count;
binding->binding = 0;
binding->count = vk_heap_binding_count_from_descriptor_range(range, descriptor_set_size);
}
static void d3d12_root_signature_map_vk_heap_binding(struct d3d12_root_signature *root_signature,
const struct d3d12_root_descriptor_table_range *range, bool buffer_descriptor,
enum vkd3d_shader_visibility shader_visibility, struct vkd3d_descriptor_set_context *context)
{
struct vkd3d_shader_resource_binding *mapping = &root_signature->descriptor_mapping[context->descriptor_index];
struct vkd3d_shader_descriptor_offset *offset = &root_signature->descriptor_offsets[context->descriptor_index++];
mapping->type = range->type;
mapping->register_space = range->register_space;
mapping->register_index = range->base_register_idx;
mapping->shader_visibility = shader_visibility;
mapping->flags = buffer_descriptor ? VKD3D_SHADER_BINDING_FLAG_BUFFER : VKD3D_SHADER_BINDING_FLAG_IMAGE;
vkd3d_descriptor_heap_binding_from_descriptor_range(range, buffer_descriptor, root_signature, &mapping->binding);
offset->static_offset = range->offset;
offset->dynamic_offset_index = context->push_constant_index;
}
static void d3d12_root_signature_map_vk_heap_uav_counter(struct d3d12_root_signature *root_signature,
const struct d3d12_root_descriptor_table_range *range, enum vkd3d_shader_visibility shader_visibility,
struct vkd3d_descriptor_set_context *context)
{
struct vkd3d_shader_uav_counter_binding *mapping = &root_signature->uav_counter_mapping[context->uav_counter_index];
struct vkd3d_shader_descriptor_offset *offset = &root_signature->uav_counter_offsets[context->uav_counter_index++];
mapping->register_space = range->register_space;
mapping->register_index = range->base_register_idx;
mapping->shader_visibility = shader_visibility;
mapping->binding.set = root_signature->vk_set_count + VKD3D_SET_INDEX_UAV_COUNTER;
mapping->binding.binding = 0;
mapping->binding.count = vk_heap_binding_count_from_descriptor_range(range,
root_signature->device->vk_info.descriptor_limits.storage_image_max_descriptors);
offset->static_offset = range->offset;
offset->dynamic_offset_index = context->push_constant_index;
}
static void d3d12_root_signature_map_descriptor_heap_binding(struct d3d12_root_signature *root_signature,
const struct d3d12_root_descriptor_table_range *range, enum vkd3d_shader_visibility shader_visibility,
struct vkd3d_descriptor_set_context *context)
{
bool is_buffer = range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_CBV;
if (range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_SRV || range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV)
{
d3d12_root_signature_map_vk_heap_binding(root_signature, range, true, shader_visibility, context);
if (range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV)
d3d12_root_signature_map_vk_heap_uav_counter(root_signature, range, shader_visibility, context);
}
d3d12_root_signature_map_vk_heap_binding(root_signature, range, is_buffer, shader_visibility, context);
}
static void d3d12_root_signature_map_descriptor_unbounded_binding(struct d3d12_root_signature *root_signature,
const struct d3d12_root_descriptor_table_range *range, unsigned int descriptor_offset,
enum vkd3d_shader_visibility shader_visibility, struct vkd3d_descriptor_set_context *context)
@ -868,6 +993,7 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo
struct vkd3d_descriptor_set_context *context)
{
const struct d3d12_device *device = root_signature->device;
bool use_vk_heaps = root_signature->device->use_vk_heaps;
struct d3d12_root_descriptor_table *table;
unsigned int i, j, k, range_count;
uint32_t vk_binding;
@ -935,6 +1061,16 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo
range = &table->ranges[j];
if (use_vk_heaps)
{
/* set, binding and vk_binding_count are not used. */
range->set = 0;
range->binding = 0;
range->vk_binding_count = 0;
d3d12_root_signature_map_descriptor_heap_binding(root_signature, range, shader_visibility, context);
continue;
}
range->set = root_signature->vk_set_count - root_signature->main_set;
if (root_signature->use_descriptor_arrays)
@ -1014,6 +1150,7 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo
context->current_binding = cur_binding;
}
++context->push_constant_index;
}
return S_OK;
@ -1084,9 +1221,36 @@ static HRESULT d3d12_root_signature_init_static_samplers(struct d3d12_root_signa
}
context->current_binding = cur_binding;
if (device->use_vk_heaps)
return d3d12_root_signature_append_descriptor_set_layout(root_signature, context, 0);
return S_OK;
}
static void d3d12_root_signature_init_descriptor_table_push_constants(struct d3d12_root_signature *root_signature,
const struct vkd3d_descriptor_set_context *context)
{
root_signature->descriptor_table_offset = 0;
if ((root_signature->descriptor_table_count = context->push_constant_index))
{
VkPushConstantRange *range = &root_signature->push_constant_ranges[D3D12_SHADER_VISIBILITY_ALL];
root_signature->descriptor_table_offset = align(range->size, 16);
range->size = root_signature->descriptor_table_offset
+ root_signature->descriptor_table_count * sizeof(uint32_t);
if (range->size > root_signature->device->vk_info.device_limits.maxPushConstantsSize)
FIXME("Push constants size %u exceeds maximum allowed size %u. Try VKD3D_CONFIG=virtual_heaps.\n",
range->size, root_signature->device->vk_info.device_limits.maxPushConstantsSize);
if (!root_signature->push_constant_range_count)
{
root_signature->push_constant_range_count = 1;
range->stageFlags = VK_SHADER_STAGE_ALL;
}
}
}
static bool vk_binding_uses_partial_binding(const VkDescriptorSetLayoutBinding *binding)
{
if (binding->descriptorCount == 1)
@ -1194,11 +1358,19 @@ static HRESULT vkd3d_create_pipeline_layout(struct d3d12_device *device,
static unsigned int d3d12_root_signature_copy_descriptor_set_layouts(const struct d3d12_root_signature *root_signature,
VkDescriptorSetLayout *vk_set_layouts)
{
const struct d3d12_device *device = root_signature->device;
enum vkd3d_vk_descriptor_set_index set;
unsigned int i;
for (i = 0; i < root_signature->vk_set_count; ++i)
vk_set_layouts[i] = root_signature->descriptor_set_layouts[i].vk_layout;
if (device->use_vk_heaps)
{
for (set = 0; set < ARRAY_SIZE(device->vk_descriptor_heap_layouts); ++set)
vk_set_layouts[i++] = device->vk_descriptor_heap_layouts[set].vk_set_layout;
}
return i;
}
@ -1210,6 +1382,7 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa
struct vkd3d_descriptor_set_context context;
VkDescriptorSetLayoutBinding *binding_desc;
struct d3d12_root_signature_info info;
bool use_vk_heaps;
unsigned int i;
HRESULT hr;
@ -1226,6 +1399,8 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa
root_signature->flags = desc->Flags;
root_signature->descriptor_mapping = NULL;
root_signature->descriptor_offsets = NULL;
root_signature->uav_counter_mapping = NULL;
root_signature->uav_counter_offsets = NULL;
root_signature->static_sampler_count = 0;
root_signature->static_samplers = NULL;
root_signature->device = device;
@ -1243,9 +1418,13 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa
}
root_signature->binding_count = info.binding_count;
root_signature->uav_mapping_count = info.uav_range_count;
root_signature->static_sampler_count = desc->NumStaticSamplers;
root_signature->root_descriptor_count = info.root_descriptor_count;
root_signature->use_descriptor_arrays = device->vk_info.EXT_descriptor_indexing;
root_signature->descriptor_table_count = 0;
use_vk_heaps = device->use_vk_heaps;
hr = E_OUTOFMEMORY;
root_signature->parameter_count = desc->NumParameters;
@ -1255,6 +1434,11 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa
if (!(root_signature->descriptor_mapping = vkd3d_calloc(root_signature->binding_count,
sizeof(*root_signature->descriptor_mapping))))
goto fail;
if (use_vk_heaps && (!(root_signature->uav_counter_mapping = vkd3d_calloc(root_signature->uav_mapping_count,
sizeof(*root_signature->uav_counter_mapping)))
|| !(root_signature->uav_counter_offsets = vkd3d_calloc(root_signature->uav_mapping_count,
sizeof(*root_signature->uav_counter_offsets)))))
goto fail;
if (root_signature->use_descriptor_arrays && !(root_signature->descriptor_offsets = vkd3d_calloc(
root_signature->binding_count, sizeof(*root_signature->descriptor_offsets))))
goto fail;
@ -1289,8 +1473,11 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa
goto fail;
if (FAILED(hr = d3d12_root_signature_init_static_samplers(root_signature, device, desc, &context)))
goto fail;
context.push_constant_index = 0;
if (FAILED(hr = d3d12_root_signature_init_root_descriptor_tables(root_signature, desc, &info, &context)))
goto fail;
if (use_vk_heaps)
d3d12_root_signature_init_descriptor_table_push_constants(root_signature, &context);
if (FAILED(hr = d3d12_root_signature_append_descriptor_set_layout(root_signature, &context, 0)))
goto fail;
@ -1770,7 +1957,7 @@ static HRESULT create_shader_stage(struct d3d12_device *device,
static const struct vkd3d_shader_compile_option options[] =
{
{VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_3},
{VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_4},
};
stage_desc->sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
@ -1821,7 +2008,7 @@ static int vkd3d_scan_dxbc(const D3D12_SHADER_BYTECODE *code,
static const struct vkd3d_shader_compile_option options[] =
{
{VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_3},
{VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_4},
};
compile_info.type = VKD3D_SHADER_STRUCTURE_TYPE_COMPILE_INFO;
@ -1862,7 +2049,7 @@ static HRESULT vkd3d_create_compute_pipeline(struct d3d12_device *device,
VK_CALL(vkDestroyShaderModule(device->vk_device, pipeline_info.stage.module, NULL));
if (vr < 0)
{
WARN("Failed to create Vulkan compute pipeline, hr %#x.", hr);
WARN("Failed to create Vulkan compute pipeline, hr %#x.\n", hr);
return hresult_from_vk_result(vr);
}
@ -1978,6 +2165,9 @@ static HRESULT d3d12_pipeline_state_find_and_init_uav_counters(struct d3d12_pipe
HRESULT hr;
int ret;
if (device->use_vk_heaps)
return S_OK;
shader_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SCAN_DESCRIPTOR_INFO;
shader_info.next = NULL;
if ((ret = vkd3d_scan_dxbc(code, &shader_info)) < 0)
@ -2030,10 +2220,10 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st
{
offset_info.type = VKD3D_SHADER_STRUCTURE_TYPE_DESCRIPTOR_OFFSET_INFO;
offset_info.next = NULL;
offset_info.descriptor_table_offset = 0;
offset_info.descriptor_table_count = 0;
offset_info.descriptor_table_offset = root_signature->descriptor_table_offset;
offset_info.descriptor_table_count = root_signature->descriptor_table_count;
offset_info.binding_offsets = root_signature->descriptor_offsets;
offset_info.uav_counter_offsets = NULL;
offset_info.uav_counter_offsets = root_signature->uav_counter_offsets;
vkd3d_prepend_struct(&target_info, &offset_info);
}
@ -2045,8 +2235,16 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st
shader_interface.push_constant_buffer_count = root_signature->root_constant_count;
shader_interface.combined_samplers = NULL;
shader_interface.combined_sampler_count = 0;
if (root_signature->uav_counter_mapping)
{
shader_interface.uav_counters = root_signature->uav_counter_mapping;
shader_interface.uav_counter_count = root_signature->uav_mapping_count;
}
else
{
shader_interface.uav_counters = state->uav_counters.bindings;
shader_interface.uav_counter_count = state->uav_counters.binding_count;
}
vk_pipeline_layout = state->uav_counters.vk_pipeline_layout
? state->uav_counters.vk_pipeline_layout : root_signature->vk_pipeline_layout;
@ -2430,9 +2628,9 @@ static HRESULT compute_input_layout_offsets(const struct d3d12_device *device,
if (e->AlignedByteOffset != D3D12_APPEND_ALIGNED_ELEMENT)
offsets[i] = e->AlignedByteOffset;
else
offsets[i] = input_slot_offsets[e->InputSlot];
offsets[i] = align(input_slot_offsets[e->InputSlot], min(4, format->byte_count));
input_slot_offsets[e->InputSlot] = align(offsets[i] + format->byte_count, 4);
input_slot_offsets[e->InputSlot] = offsets[i] + format->byte_count;
}
return S_OK;
@ -2664,7 +2862,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s
}
if (rt_desc->BlendEnable && rt_desc->LogicOpEnable)
{
WARN("Only one of BlendEnable or LogicOpEnable can be set to TRUE.");
WARN("Only one of BlendEnable or LogicOpEnable can be set to TRUE.\n");
hr = E_INVALIDARG;
goto fail;
}
@ -2807,10 +3005,10 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s
{
offset_info.type = VKD3D_SHADER_STRUCTURE_TYPE_DESCRIPTOR_OFFSET_INFO;
offset_info.next = NULL;
offset_info.descriptor_table_offset = 0;
offset_info.descriptor_table_count = 0;
offset_info.descriptor_table_offset = root_signature->descriptor_table_offset;
offset_info.descriptor_table_count = root_signature->descriptor_table_count;
offset_info.binding_offsets = root_signature->descriptor_offsets;
offset_info.uav_counter_offsets = NULL;
offset_info.uav_counter_offsets = root_signature->uav_counter_offsets;
}
for (i = 0; i < ARRAY_SIZE(shader_stages); ++i)
@ -2852,8 +3050,10 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s
break;
case VK_SHADER_STAGE_FRAGMENT_BIT:
shader_interface.uav_counters = state->uav_counters.bindings;
shader_interface.uav_counter_count = state->uav_counters.binding_count;
shader_interface.uav_counters = root_signature->uav_counter_mapping
? root_signature->uav_counter_mapping : state->uav_counters.bindings;
shader_interface.uav_counter_count = root_signature->uav_counter_mapping
? root_signature->uav_mapping_count : state->uav_counters.binding_count;
stage_target_info = &ps_target_info;
break;

View file

@ -510,3 +510,9 @@ HRESULT vkd3d_serialize_versioned_root_signature(const D3D12_VERSIONED_ROOT_SIGN
}
return hr;
}
void vkd3d_set_log_callback(PFN_vkd3d_log callback)
{
vkd3d_shader_set_log_callback(callback);
vkd3d_dbg_set_log_callback(callback);
}

View file

@ -59,6 +59,7 @@
#define VKD3D_MAX_SHADER_EXTENSIONS 3u
#define VKD3D_MAX_SHADER_STAGES 5u
#define VKD3D_MAX_VK_SYNC_OBJECTS 4u
#define VKD3D_MAX_DEVICE_BLOCKED_QUEUES 16u
#define VKD3D_MAX_DESCRIPTOR_SETS 64u
/* D3D12 binding tier 3 has a limit of 2048 samplers. */
#define VKD3D_MAX_DESCRIPTOR_SET_SAMPLERS 2048u
@ -67,6 +68,8 @@
* this number to prevent excessive pool memory use. */
#define VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE (16 * 1024u)
extern LONG64 object_global_serial_id;
struct d3d12_command_list;
struct d3d12_device;
struct d3d12_resource;
@ -123,12 +126,14 @@ struct vkd3d_vulkan_info
bool KHR_maintenance3;
bool KHR_push_descriptor;
bool KHR_sampler_mirror_clamp_to_edge;
bool KHR_timeline_semaphore;
/* EXT device extensions */
bool EXT_calibrated_timestamps;
bool EXT_conditional_rendering;
bool EXT_debug_marker;
bool EXT_depth_clip_enable;
bool EXT_descriptor_indexing;
bool EXT_robustness2;
bool EXT_shader_demote_to_helper_invocation;
bool EXT_shader_stencil_export;
bool EXT_texel_buffer_alignment;
@ -156,6 +161,7 @@ struct vkd3d_vulkan_info
enum vkd3d_config_flags
{
VKD3D_CONFIG_FLAG_VULKAN_DEBUG = 0x00000001,
VKD3D_CONFIG_FLAG_VIRTUAL_HEAPS = 0x00000002,
};
struct vkd3d_instance
@ -327,7 +333,11 @@ struct vkd3d_waiting_fence
{
struct d3d12_fence *fence;
uint64_t value;
struct vkd3d_queue *queue;
union
{
VkFence vk_fence;
VkSemaphore vk_semaphore;
} u;
uint64_t queue_sequence_number;
};
@ -338,28 +348,17 @@ struct vkd3d_fence_worker
struct vkd3d_cond cond;
struct vkd3d_cond fence_destruction_cond;
bool should_exit;
bool pending_fence_destruction;
LONG enqueued_fence_count;
struct vkd3d_enqueued_fence
{
VkFence vk_fence;
struct vkd3d_waiting_fence waiting_fence;
} *enqueued_fences;
size_t enqueued_fences_size;
size_t fence_count;
VkFence *vk_fences;
size_t vk_fences_size;
struct vkd3d_waiting_fence *fences;
size_t fences_size;
void (*wait_for_gpu_fence)(struct vkd3d_fence_worker *worker, const struct vkd3d_waiting_fence *enqueued_fence);
struct vkd3d_queue *queue;
struct d3d12_device *device;
};
HRESULT vkd3d_fence_worker_start(struct vkd3d_fence_worker *worker, struct d3d12_device *device);
HRESULT vkd3d_fence_worker_stop(struct vkd3d_fence_worker *worker, struct d3d12_device *device);
struct vkd3d_gpu_va_allocation
{
D3D12_GPU_VIRTUAL_ADDRESS base;
@ -500,20 +499,29 @@ HRESULT vkd3d_set_private_data_interface(struct vkd3d_private_store *store, cons
struct vkd3d_signaled_semaphore
{
struct list entry;
uint64_t value;
union
{
struct
{
VkSemaphore vk_semaphore;
VkFence vk_fence;
bool is_acquired;
} binary;
uint64_t timeline_value;
} u;
const struct vkd3d_queue *signalling_queue;
};
/* ID3D12Fence */
struct d3d12_fence
{
ID3D12Fence ID3D12Fence_iface;
LONG internal_refcount;
LONG refcount;
uint64_t value;
uint64_t max_pending_value;
struct vkd3d_mutex mutex;
struct vkd3d_cond null_event_cond;
@ -526,10 +534,13 @@ struct d3d12_fence
size_t events_size;
size_t event_count;
struct list semaphores;
unsigned int semaphore_count;
VkSemaphore timeline_semaphore;
uint64_t timeline_value;
uint64_t pending_timeline_value;
LONG pending_worker_operation_count;
struct vkd3d_signaled_semaphore *semaphores;
size_t semaphores_size;
unsigned int semaphore_count;
VkFence old_vk_fences[VKD3D_MAX_VK_SYNC_OBJECTS];
@ -541,6 +552,9 @@ struct d3d12_fence
HRESULT d3d12_fence_create(struct d3d12_device *device, uint64_t initial_value,
D3D12_FENCE_FLAGS flags, struct d3d12_fence **fence);
VkResult vkd3d_create_timeline_semaphore(const struct d3d12_device *device, uint64_t initial_value,
VkSemaphore *timeline_semaphore);
/* ID3D12Heap */
struct d3d12_heap
{
@ -648,6 +662,7 @@ struct vkd3d_view
{
LONG refcount;
enum vkd3d_view_type type;
uint64_t serial_id;
union
{
VkBufferView vk_buffer_view;
@ -694,6 +709,12 @@ bool vkd3d_create_buffer_view(struct d3d12_device *device, VkBuffer vk_buffer, c
bool vkd3d_create_texture_view(struct d3d12_device *device, VkImage vk_image,
const struct vkd3d_texture_view_desc *desc, struct vkd3d_view **view);
struct vkd3d_view_info
{
uint64_t written_serial_id;
struct vkd3d_view *view;
};
struct d3d12_desc
{
uint32_t magic;
@ -701,7 +722,7 @@ struct d3d12_desc
union
{
VkDescriptorBufferInfo vk_cbv_info;
struct vkd3d_view *view;
struct vkd3d_view_info view_info;
} u;
};
@ -772,11 +793,55 @@ static inline struct d3d12_dsv_desc *d3d12_dsv_desc_from_cpu_handle(D3D12_CPU_DE
void d3d12_dsv_desc_create_dsv(struct d3d12_dsv_desc *dsv_desc, struct d3d12_device *device,
struct d3d12_resource *resource, const D3D12_DEPTH_STENCIL_VIEW_DESC *desc);
enum vkd3d_vk_descriptor_set_index
{
VKD3D_SET_INDEX_UNIFORM_BUFFER = 0,
VKD3D_SET_INDEX_UNIFORM_TEXEL_BUFFER = 1,
VKD3D_SET_INDEX_SAMPLED_IMAGE = 2,
VKD3D_SET_INDEX_STORAGE_TEXEL_BUFFER = 3,
VKD3D_SET_INDEX_STORAGE_IMAGE = 4,
VKD3D_SET_INDEX_SAMPLER = 5,
VKD3D_SET_INDEX_UAV_COUNTER = 6,
VKD3D_SET_INDEX_COUNT = 7
};
extern const enum vkd3d_vk_descriptor_set_index vk_descriptor_set_index_table[];
static inline enum vkd3d_vk_descriptor_set_index vkd3d_vk_descriptor_set_index_from_vk_descriptor_type(
VkDescriptorType type)
{
assert(type <= VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
assert(vk_descriptor_set_index_table[type] < VKD3D_SET_INDEX_COUNT);
return vk_descriptor_set_index_table[type];
}
struct vkd3d_vk_descriptor_heap_layout
{
VkDescriptorType type;
bool buffer_dimension;
D3D12_DESCRIPTOR_HEAP_TYPE applicable_heap_type;
unsigned int count;
VkDescriptorSetLayout vk_set_layout;
};
#define VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE 64
struct d3d12_descriptor_heap_vk_set
{
VkDescriptorSet vk_set;
VkDescriptorBufferInfo vk_buffer_infos[VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE];
VkBufferView vk_buffer_views[VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE];
VkDescriptorImageInfo vk_image_infos[VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE];
VkWriteDescriptorSet vk_descriptor_writes[VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE];
};
/* ID3D12DescriptorHeap */
struct d3d12_descriptor_heap
{
ID3D12DescriptorHeap ID3D12DescriptorHeap_iface;
LONG refcount;
uint64_t serial_id;
D3D12_DESCRIPTOR_HEAP_DESC desc;
@ -784,12 +849,32 @@ struct d3d12_descriptor_heap
struct vkd3d_private_store private_store;
VkDescriptorPool vk_descriptor_pool;
struct d3d12_descriptor_heap_vk_set vk_descriptor_sets[VKD3D_SET_INDEX_COUNT];
struct vkd3d_mutex vk_sets_mutex;
BYTE descriptors[];
};
HRESULT d3d12_descriptor_heap_create(struct d3d12_device *device,
const D3D12_DESCRIPTOR_HEAP_DESC *desc, struct d3d12_descriptor_heap **descriptor_heap);
struct d3d12_desc_copy_location
{
struct d3d12_desc src;
struct d3d12_desc *dst;
};
struct d3d12_desc_copy_info
{
unsigned int count;
bool uav_counter;
};
void d3d12_desc_copy_vk_heap_range(struct d3d12_desc_copy_location *locations, const struct d3d12_desc_copy_info *info,
struct d3d12_descriptor_heap *descriptor_heap, enum vkd3d_vk_descriptor_set_index set,
struct d3d12_device *device);
/* ID3D12QueryHeap */
struct d3d12_query_heap
{
@ -898,8 +983,13 @@ struct d3d12_root_signature
D3D12_ROOT_SIGNATURE_FLAGS flags;
unsigned int binding_count;
unsigned int uav_mapping_count;
struct vkd3d_shader_resource_binding *descriptor_mapping;
struct vkd3d_shader_descriptor_offset *descriptor_offsets;
struct vkd3d_shader_uav_counter_binding *uav_counter_mapping;
struct vkd3d_shader_descriptor_offset *uav_counter_offsets;
unsigned int descriptor_table_offset;
unsigned int descriptor_table_count;
unsigned int root_constant_count;
struct vkd3d_shader_push_constant_buffer *root_constants;
@ -1118,6 +1208,8 @@ struct vkd3d_pipeline_bindings
struct d3d12_desc *descriptor_tables[D3D12_MAX_ROOT_COST];
uint64_t descriptor_table_dirty_mask;
uint64_t descriptor_table_active_mask;
uint64_t cbv_srv_uav_heap_id;
uint64_t sampler_heap_id;
VkBufferView *vk_uav_counter_views;
size_t vk_uav_counter_views_size;
@ -1179,6 +1271,8 @@ struct d3d12_command_list
VkBuffer so_counter_buffers[D3D12_SO_BUFFER_SLOT_COUNT];
VkDeviceSize so_counter_buffer_offsets[D3D12_SO_BUFFER_SLOT_COUNT];
void (*update_descriptors)(struct d3d12_command_list *list, enum vkd3d_pipeline_bind_point bind_point);
struct vkd3d_private_store private_store;
};
@ -1217,6 +1311,42 @@ HRESULT vkd3d_queue_create(struct d3d12_device *device, uint32_t family_index,
void vkd3d_queue_destroy(struct vkd3d_queue *queue, struct d3d12_device *device);
void vkd3d_queue_release(struct vkd3d_queue *queue);
enum vkd3d_cs_op
{
VKD3D_CS_OP_WAIT,
VKD3D_CS_OP_SIGNAL,
VKD3D_CS_OP_EXECUTE,
};
struct vkd3d_cs_wait
{
struct d3d12_fence *fence;
uint64_t value;
};
struct vkd3d_cs_signal
{
struct d3d12_fence *fence;
uint64_t value;
};
struct vkd3d_cs_execute
{
VkCommandBuffer *buffers;
unsigned int buffer_count;
};
struct vkd3d_cs_op_data
{
enum vkd3d_cs_op opcode;
union
{
struct vkd3d_cs_wait wait;
struct vkd3d_cs_signal signal;
struct vkd3d_cs_execute execute;
} u;
};
/* ID3D12CommandQueue */
struct d3d12_command_queue
{
@ -1227,11 +1357,18 @@ struct d3d12_command_queue
struct vkd3d_queue *vkd3d_queue;
struct vkd3d_fence_worker fence_worker;
const struct d3d12_fence *last_waited_fence;
uint64_t last_waited_fence_value;
struct d3d12_device *device;
struct vkd3d_mutex op_mutex;
struct vkd3d_cs_op_data *ops;
size_t ops_count;
size_t ops_size;
bool is_flushing;
struct vkd3d_private_store private_store;
};
@ -1329,7 +1466,6 @@ struct d3d12_device
struct vkd3d_gpu_descriptor_allocator gpu_descriptor_allocator;
struct vkd3d_gpu_va_allocator gpu_va_allocator;
struct vkd3d_fence_worker fence_worker;
struct vkd3d_mutex mutex;
struct vkd3d_mutex desc_mutex[8];
@ -1354,6 +1490,9 @@ struct d3d12_device
unsigned int queue_family_count;
VkTimeDomainEXT vk_host_time_domain;
struct d3d12_command_queue *blocked_queues[VKD3D_MAX_DEVICE_BLOCKED_QUEUES];
unsigned int blocked_queue_count;
struct vkd3d_instance *vkd3d_instance;
IUnknown *parent;
@ -1370,6 +1509,8 @@ struct d3d12_device
struct vkd3d_uav_clear_state uav_clear_state;
VkDescriptorPoolSize vk_pool_sizes[VKD3D_DESCRIPTOR_POOL_COUNT];
struct vkd3d_vk_descriptor_heap_layout vk_descriptor_heap_layouts[VKD3D_SET_INDEX_COUNT];
bool use_vk_heaps;
};
HRESULT d3d12_device_create(struct vkd3d_instance *instance,

View file

@ -195,6 +195,11 @@ VK_DEVICE_EXT_PFN(vkGetDescriptorSetLayoutSupportKHR)
/* VK_KHR_push_descriptor */
VK_DEVICE_EXT_PFN(vkCmdPushDescriptorSetKHR)
/* VK_KHR_timeline_semaphore */
VK_DEVICE_EXT_PFN(vkGetSemaphoreCounterValueKHR)
VK_DEVICE_EXT_PFN(vkWaitSemaphoresKHR)
VK_DEVICE_EXT_PFN(vkSignalSemaphoreKHR)
/* VK_EXT_calibrated_timestamps */
VK_DEVICE_EXT_PFN(vkGetCalibratedTimestampsEXT)