From 2f0c0f4c385441be366ef7b7c775222ef3a6c8b0 Mon Sep 17 00:00:00 2001 From: Alexandre Julliard Date: Thu, 2 Nov 2023 12:08:43 +0100 Subject: [PATCH] winedump: Re-import the demangling code from msvcrt. --- tools/winedump/dump.c | 53 +- tools/winedump/main.c | 2 +- tools/winedump/msmangle.c | 2185 +++++++++++++++++++++++++------------ tools/winedump/winedump.h | 2 +- 4 files changed, 1483 insertions(+), 759 deletions(-) diff --git a/tools/winedump/dump.c b/tools/winedump/dump.c index 42916ac4fa3..abfc2ab8c74 100644 --- a/tools/winedump/dump.c +++ b/tools/winedump/dump.c @@ -130,58 +130,11 @@ void dump_unicode_str( const WCHAR *str, int len ) const char* get_symbol_str(const char* symname) { - char* tmp; - const char* ret; + const char* ret = NULL; if (!symname) return "(nil)"; - if (globals.do_demangle) - { - parsed_symbol symbol; - - symbol_init(&symbol, symname); - if (!symbol_demangle(&symbol)) - ret = symname; - else if (symbol.flags & SYM_DATA) - { - ret = tmp = dump_want_n(strlen(symbol.arg_text[0]) + 1); - if (tmp) strcpy(tmp, symbol.arg_text[0]); - } - else - { - unsigned int i, len, start = symbol.flags & SYM_THISCALL ? 1 : 0; - - len = strlen(symbol.return_text) + 3 /* ' __' */ + - strlen(symbol_get_call_convention(&symbol)) + 1 /* ' ' */+ - strlen(symbol.function_name) + 1 /* ')' */; - if (!symbol.argc || (symbol.argc == 1 && symbol.flags & SYM_THISCALL)) - len += 4 /* "void" */; - else for (i = start; i < symbol.argc; i++) - len += (i > start ? 2 /* ", " */ : 0 /* "" */) + strlen(symbol.arg_text[i]); - if (symbol.varargs) len += 5 /* ", ..." */; - len += 2; /* ")\0" */ - - ret = tmp = dump_want_n(len); - if (tmp) - { - sprintf(tmp, "%s __%s %s(", - symbol.return_text, - symbol_get_call_convention(&symbol), - symbol.function_name); - if (!symbol.argc || (symbol.argc == 1 && symbol.flags & SYM_THISCALL)) - strcat(tmp, "void"); - else for (i = start; i < symbol.argc; i++) - { - if (i > start) strcat(tmp, ", "); - strcat(tmp, symbol.arg_text[i]); - } - if (symbol.varargs) strcat(tmp, ", ..."); - strcat(tmp, ")"); - } - } - symbol_clear(&symbol); - } - else ret = symname; - return ret; + if (globals.do_demangle) ret = demangle( symname ); + return ret ? ret : symname; } const char* get_guid_str(const GUID* guid) diff --git a/tools/winedump/main.c b/tools/winedump/main.c index f0668ec0b4f..969b70d7d03 100644 --- a/tools/winedump/main.c +++ b/tools/winedump/main.c @@ -453,7 +453,7 @@ int main (int argc, char *argv[]) if (globals.do_code && symbol_searched(count, symbol.symbol)) { /* Attempt to get information about the symbol */ - BOOL result = symbol_demangle (&symbol) || symbol_search(&symbol); + BOOL result = symbol_search(&symbol); if (result && symbol.function_name) /* Clean up the prototype */ diff --git a/tools/winedump/msmangle.c b/tools/winedump/msmangle.c index 4408e418191..19ccc18f714 100644 --- a/tools/winedump/msmangle.c +++ b/tools/winedump/msmangle.c @@ -2,6 +2,7 @@ * Demangle VC++ symbols into C function prototypes * * Copyright 2000 Jon Griffiths + * 2004 Eric Pouech * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -21,682 +22,708 @@ #include "config.h" #include "winedump.h" +#include "winver.h" +#include "imagehlp.h" + +#define UNDNAME_NO_COMPLEX_TYPE (0x8000) + +struct array +{ + unsigned start; /* first valid reference in array */ + unsigned num; /* total number of used elts */ + unsigned max; + unsigned alloc; + char** elts; +}; + +/* Structure holding a parsed symbol */ +struct parsed_symbol +{ + unsigned flags; /* the UNDNAME_ flags used for demangling */ + + const char* current; /* pointer in input (mangled) string */ + char* result; /* demangled string */ + + struct array names; /* array of names for back reference */ + struct array stack; /* stack of parsed strings */ + + void* alloc_list; /* linked list of allocated blocks */ + unsigned avail_in_first; /* number of available bytes in head block */ +}; + +enum datatype_e +{ + DT_NO_LEADING_WS = 0x01, + DT_NO_LRSEP_WS = 0x02, +}; /* Type for parsing mangled types */ -typedef struct _compound_type +struct datatype_t { - char dest_type; - int flags; - BOOL have_qualifiers; - char *expression; -} compound_type; + const char* left; + const char* right; + enum datatype_e flags; +}; +static BOOL symbol_demangle(struct parsed_symbol* sym); +static char* get_class_name(struct parsed_symbol* sym); -/* Initialise a compound type structure */ -#define INIT_CT(ct) do { memset (&ct, 0, sizeof (ct)); } while (0) +#define und_alloc(sym,len) xmalloc(len) -/* free the memory used by a compound structure */ -#define FREE_CT(ct) free (ct.expression) - -/* Flags for data types */ -#define DATA_VTABLE 0x1 - -/* Internal functions */ -static char *demangle_datatype (char **str, compound_type *ct, - parsed_symbol* sym); - -static char *get_constraints_convention_1 (char **str, compound_type *ct); - -static char *get_constraints_convention_2 (char **str, compound_type *ct); - -static char *get_type_string (const char c, const int constraints); - -static int get_type_constant (const char c, const int constraints); - -static char *get_pointer_type_string (compound_type *ct, - const char *expression); - - -/******************************************************************* - * demangle_symbol - * - * Demangle a C++ linker symbol into a C prototype +/****************************************************************** + * str_array_init + * Initialises an array of strings */ -BOOL symbol_demangle (parsed_symbol *sym) +static void str_array_init(struct array* a) { - compound_type ct; - BOOL is_static = FALSE; - int is_const = 0; - char *function_name = NULL; - char *class_name = NULL; - char *name; - const char *const_status; - static unsigned int hash = 0; /* In case of overloaded functions */ - unsigned int data_flags = 0; + a->start = a->num = a->max = a->alloc = 0; + a->elts = NULL; +} - assert (globals.do_code); - assert (sym && sym->symbol); +/****************************************************************** + * str_array_push + * Adding a new string to an array + */ +static BOOL str_array_push(struct parsed_symbol* sym, const char* ptr, int len, + struct array* a) +{ + char** new; - hash++; + assert(ptr); + assert(a); - /* MS mangled names always begin with '?' */ - name = sym->symbol; - if (*name++ != '?') - return FALSE; - - if (VERBOSE) - puts ("Attempting to demangle symbol"); - - /* Then function name or operator code */ - if (*name == '?') - { - /* C++ operator code (one character, or two if the first is '_') */ - switch (*++name) + if (!a->alloc) { - case '0': function_name = xstrdup ("ctor"); break; - case '1': function_name = xstrdup ("dtor"); break; - case '2': function_name = xstrdup ("operator_new"); break; - case '3': function_name = xstrdup ("operator_delete"); break; - case '4': function_name = xstrdup ("operator_equals"); break; - case '5': function_name = xstrdup ("operator_shiftright"); break; - case '6': function_name = xstrdup ("operator_shiftleft"); break; - case '7': function_name = xstrdup ("operator_not"); break; - case '8': function_name = xstrdup ("operator_equalsequals"); break; - case '9': function_name = xstrdup ("operator_notequals"); break; - case 'A': function_name = xstrdup ("operator_array"); break; - case 'C': function_name = xstrdup ("operator_dereference"); break; - case 'D': function_name = xstrdup ("operator_multiply"); break; - case 'E': function_name = xstrdup ("operator_plusplus"); break; - case 'F': function_name = xstrdup ("operator_minusminus"); break; - case 'G': function_name = xstrdup ("operator_minus"); break; - case 'H': function_name = xstrdup ("operator_plus"); break; - case 'I': function_name = xstrdup ("operator_address"); break; - case 'J': function_name = xstrdup ("operator_dereferencememberptr"); break; - case 'K': function_name = xstrdup ("operator_divide"); break; - case 'L': function_name = xstrdup ("operator_modulo"); break; - case 'M': function_name = xstrdup ("operator_lessthan"); break; - case 'N': function_name = xstrdup ("operator_lessthanequal"); break; - case 'O': function_name = xstrdup ("operator_greaterthan"); break; - case 'P': function_name = xstrdup ("operator_greaterthanequal"); break; - case 'Q': function_name = xstrdup ("operator_comma"); break; - case 'R': function_name = xstrdup ("operator_functioncall"); break; - case 'S': function_name = xstrdup ("operator_complement"); break; - case 'T': function_name = xstrdup ("operator_xor"); break; - case 'U': function_name = xstrdup ("operator_logicalor"); break; - case 'V': function_name = xstrdup ("operator_logicaland"); break; - case 'W': function_name = xstrdup ("operator_or"); break; - case 'X': function_name = xstrdup ("operator_multiplyequals"); break; - case 'Y': function_name = xstrdup ("operator_plusequals"); break; - case 'Z': function_name = xstrdup ("operator_minusequals"); break; - case '_': - switch (*++name) - { - case '0': function_name = xstrdup ("operator_divideequals"); break; - case '1': function_name = xstrdup ("operator_moduloequals"); break; - case '2': function_name = xstrdup ("operator_shiftrightequals"); break; - case '3': function_name = xstrdup ("operator_shiftleftequals"); break; - case '4': function_name = xstrdup ("operator_andequals"); break; - case '5': function_name = xstrdup ("operator_orequals"); break; - case '6': function_name = xstrdup ("operator_xorequals"); break; - case '7': function_name = xstrdup ("vftable"); data_flags = DATA_VTABLE; break; - case '8': function_name = xstrdup ("vbtable"); data_flags = DATA_VTABLE; break; - case '9': function_name = xstrdup ("vcall"); data_flags = DATA_VTABLE; break; - case 'A': function_name = xstrdup ("typeof"); data_flags = DATA_VTABLE; break; - case 'B': function_name = xstrdup ("local_static_guard"); data_flags = DATA_VTABLE; break; - case 'C': function_name = xstrdup ("string"); data_flags = DATA_VTABLE; break; - case 'D': function_name = xstrdup ("vbase_dtor"); data_flags = DATA_VTABLE; break; - case 'E': function_name = xstrdup ("vector_dtor"); break; - case 'G': function_name = xstrdup ("scalar_dtor"); break; - case 'H': function_name = xstrdup ("vector_ctor_iter"); break; - case 'I': function_name = xstrdup ("vector_dtor_iter"); break; - case 'J': function_name = xstrdup ("vector_vbase_ctor_iter"); break; - case 'L': function_name = xstrdup ("eh_vector_ctor_iter"); break; - case 'M': function_name = xstrdup ("eh_vector_dtor_iter"); break; - case 'N': function_name = xstrdup ("eh_vector_vbase_ctor_iter"); break; - case 'O': function_name = xstrdup ("copy_ctor_closure"); break; - case 'S': function_name = xstrdup ("local_vftable"); data_flags = DATA_VTABLE; break; - case 'T': function_name = xstrdup ("local_vftable_ctor_closure"); break; - case 'U': function_name = xstrdup ("operator_new_vector"); break; - case 'V': function_name = xstrdup ("operator_delete_vector"); break; - case 'X': function_name = xstrdup ("placement_new_closure"); break; - case 'Y': function_name = xstrdup ("placement_delete_closure"); break; - default: - return FALSE; - } - break; - default: - /* FIXME: Other operators */ - return FALSE; + new = und_alloc(sym, (a->alloc = 32) * sizeof(a->elts[0])); + if (!new) return FALSE; + a->elts = new; } - name++; - } - else - { - /* Type or function name terminated by '@' */ - function_name = name; - while (*name && *name++ != '@') ; - if (!*name) - return FALSE; - function_name = str_substring (function_name, name - 1); - } - - /* Either a class name, or '@' if the symbol is not a class member */ - if (*name == '@') - { - class_name = xstrdup ("global"); /* Non member function (or a datatype) */ - name++; - } - else - { - /* Class the function is associated with, terminated by '@@' */ - class_name = name; - while (*name && *name++ != '@') ; - if (*name++ != '@') { - free (function_name); - return FALSE; - } - class_name = str_substring (class_name, name - 2); /* Allocates a new string */ - } - - /* Function/Data type and access level */ - /* FIXME: why 2 possible letters for each option? */ - switch(*name++) - { - /* Data */ - - case '0' : /* private static */ - case '1' : /* protected static */ - case '2' : /* public static */ - is_static = TRUE; - /* Fall through */ - case '3' : /* non static */ - case '4' : /* non static */ - /* Data members need to be implemented: report */ - INIT_CT (ct); - if (!demangle_datatype (&name, &ct, sym)) + else if (a->max >= a->alloc) { - if (VERBOSE) - printf ("/*FIXME: %s: unknown data*/\n", sym->symbol); - free (function_name); - free (class_name); - return FALSE; + new = und_alloc(sym, (a->alloc * 2) * sizeof(a->elts[0])); + if (!new) return FALSE; + memcpy(new, a->elts, a->alloc * sizeof(a->elts[0])); + a->alloc *= 2; + a->elts = new; } - sym->flags |= SYM_DATA; - sym->argc = 1; - sym->arg_name[0] = strmake( "%s_%s%s_%s", OUTPUT_UC_DLL_NAME, class_name, - is_static ? "static" : "", function_name ); - sym->arg_text[0] = strmake( "%s %s", ct.expression, sym->arg_name[0] ); - FREE_CT (ct); - free (function_name); - free (class_name); + if (len == -1) len = strlen(ptr); + a->elts[a->num] = und_alloc(sym, len + 1); + assert(a->elts[a->num]); + memcpy(a->elts[a->num], ptr, len); + a->elts[a->num][len] = '\0'; + if (++a->num >= a->max) a->max = a->num; return TRUE; +} - case '6' : /* compiler generated static */ - case '7' : /* compiler generated static */ - if (data_flags & DATA_VTABLE) +/****************************************************************** + * str_array_get_ref + * Extracts a reference from an existing array (doing proper type + * checking) + */ +static char* str_array_get_ref(struct array* cref, unsigned idx) +{ + assert(cref); + if (cref->start + idx >= cref->max) return NULL; + return cref->elts[cref->start + idx]; +} + +/****************************************************************** + * str_printf + * Helper for printf type of command (only %s and %c are implemented) + * while dynamically allocating the buffer + */ +static char* WINAPIV str_printf(struct parsed_symbol* sym, const char* format, ...) +{ + va_list args; + unsigned int len = 1, i, sz; + char* tmp; + char* p; + char* t; + + va_start(args, format); + for (i = 0; format[i]; i++) { - sym->flags |= SYM_DATA; - sym->argc = 1; - sym->arg_name[0] = strmake( "%s_%s_%s", OUTPUT_UC_DLL_NAME, class_name, function_name ); - sym->arg_text[0] = strmake( "void *%s", sym->arg_name[0] ); - - if (VERBOSE) - puts ("Demangled symbol OK [vtable]"); - free (function_name); - free (class_name); - return TRUE; + if (format[i] == '%') + { + switch (format[++i]) + { + case 's': t = va_arg(args, char*); if (t) len += strlen(t); break; + case 'c': (void)va_arg(args, int); len++; break; + default: i--; /* fall through */ + case '%': len++; break; + } + } + else len++; } - free (function_name); - free (class_name); - return FALSE; - - /* Functions */ - - case 'E' : /* private virtual */ - case 'F' : /* private virtual */ - case 'M' : /* protected virtual */ - case 'N' : /* protected virtual */ - case 'U' : /* public virtual */ - case 'V' : /* public virtual */ - /* Virtual functions need to be added to the exported vtable: report */ - if (VERBOSE) - printf ("/*FIXME %s: %s::%s is virtual-add to vftable*/\n", sym->symbol, - class_name, function_name); - /* Fall through */ - case 'A' : /* private */ - case 'B' : /* private */ - case 'I' : /* protected */ - case 'J' : /* protected */ - case 'Q' : /* public */ - case 'R' : /* public */ - /* Implicit 'this' pointer */ - sym->arg_text [sym->argc] = strmake( "struct %s *", class_name ); - sym->arg_type [sym->argc] = ARG_POINTER; - sym->arg_flag [sym->argc] = 0; - sym->arg_name [sym->argc++] = xstrdup ("_this"); - /* New struct definitions can be 'grep'ed out for making a fixup header */ - if (VERBOSE) - printf ("struct %s { void **vtable; /*FIXME: class definition */ };\n", class_name); - break; - case 'C' : /* private: static */ - case 'D' : /* private: static */ - case 'K' : /* protected: static */ - case 'L' : /* protected: static */ - case 'S' : /* public: static */ - case 'T' : /* public: static */ - is_static = TRUE; /* No implicit this pointer */ - break; - case 'Y' : - case 'Z' : - break; - /* FIXME: G,H / O,P / W,X are private / protected / public thunks */ - default: - free (function_name); - free (class_name); - return FALSE; - } - - /* If there is an implicit this pointer, const status follows */ - if (sym->argc) - { - switch (*name++) - { - case 'A': break; /* non-const */ - case 'B': is_const = CT_CONST; break; - case 'C': is_const = CT_VOLATILE; break; - case 'D': is_const = (CT_CONST | CT_VOLATILE); break; - default: - free (function_name); - free (class_name); - return FALSE; - } - } - - /* Next is the calling convention */ - switch (*name++) - { - case 'A': /* __cdecl */ - case 'B': /* __cdecl __declspec(dllexport) */ - if (!sym->argc) + va_end(args); + if (!(tmp = und_alloc(sym, len))) return NULL; + va_start(args, format); + for (p = tmp, i = 0; format[i]; i++) { - sym->flags |= SYM_CDECL; - break; + if (format[i] == '%') + { + switch (format[++i]) + { + case 's': + t = va_arg(args, char*); + if (t) + { + sz = strlen(t); + memcpy(p, t, sz); + p += sz; + } + break; + case 'c': + *p++ = (char)va_arg(args, int); + break; + default: i--; /* fall through */ + case '%': *p++ = '%'; break; + } + } + else *p++ = format[i]; } - /* Else fall through */ - case 'C': /* __pascal */ - case 'D': /* __pascal __declspec(dllexport) */ - case 'E': /* __thiscall */ - case 'F': /* __thiscall __declspec(dllexport) */ - case 'G': /* __stdcall */ - case 'H': /* __stdcall __declspec(dllexport) */ - case 'I': /* __fastcall */ - case 'J': /* __fastcall __declspec(dllexport)*/ - case 'K': /* default (none given) */ - if (sym->argc) - sym->flags |= SYM_THISCALL; + va_end(args); + *p = '\0'; + return tmp; +} + +enum datatype_flags +{ + IN_ARGS = 0x01, + WS_AFTER_QUAL_IF = 0x02, +}; + +/* forward declaration */ +static BOOL demangle_datatype(struct parsed_symbol* sym, struct datatype_t* ct, + struct array* pmt, enum datatype_flags flags); + +static const char* get_number(struct parsed_symbol* sym) +{ + char* ptr; + BOOL sgn = FALSE; + + if (*sym->current == '?') + { + sgn = TRUE; + sym->current++; + } + if (*sym->current >= '0' && *sym->current <= '8') + { + ptr = und_alloc(sym, 3); + if (sgn) ptr[0] = '-'; + ptr[sgn ? 1 : 0] = *sym->current + 1; + ptr[sgn ? 2 : 1] = '\0'; + sym->current++; + } + else if (*sym->current == '9') + { + ptr = und_alloc(sym, 4); + if (sgn) ptr[0] = '-'; + ptr[sgn ? 1 : 0] = '1'; + ptr[sgn ? 2 : 1] = '0'; + ptr[sgn ? 3 : 2] = '\0'; + sym->current++; + } + else if (*sym->current >= 'A' && *sym->current <= 'P') + { + int ret = 0; + + while (*sym->current >= 'A' && *sym->current <= 'P') + { + ret *= 16; + ret += *sym->current++ - 'A'; + } + if (*sym->current != '@') return NULL; + + ptr = und_alloc(sym, 17); + sprintf(ptr, "%s%u", sgn ? "-" : "", ret); + sym->current++; + } + else return NULL; + return ptr; +} + +/****************************************************************** + * get_args + * Parses a list of function/method arguments, creates a string corresponding + * to the arguments' list. + */ +static char* get_args(struct parsed_symbol* sym, struct array* pmt_ref, BOOL z_term, + char open_char, char close_char) + +{ + struct datatype_t ct; + struct array arg_collect; + char* args_str = NULL; + char* last; + unsigned int i; + + str_array_init(&arg_collect); + + /* Now come the function arguments */ + while (*sym->current) + { + /* Decode each data type and append it to the argument list */ + if (*sym->current == '@') + { + sym->current++; + break; + } + if (!demangle_datatype(sym, &ct, pmt_ref, IN_ARGS)) + return NULL; + /* 'void' terminates an argument list in a function */ + if (z_term && !strcmp(ct.left, "void")) break; + if (!str_array_push(sym, str_printf(sym, "%s%s", ct.left, ct.right), -1, + &arg_collect)) + return NULL; + if (!strcmp(ct.left, "...")) break; + } + /* Functions are always terminated by 'Z'. If we made it this far and + * don't find it, we have incorrectly identified a data type. + */ + if (z_term && *sym->current++ != 'Z') return NULL; + + if (arg_collect.num == 0 || + (arg_collect.num == 1 && !strcmp(arg_collect.elts[0], "void"))) + return str_printf(sym, "%cvoid%c", open_char, close_char); + for (i = 1; i < arg_collect.num; i++) + { + args_str = str_printf(sym, "%s,%s", args_str, arg_collect.elts[i]); + } + + last = args_str ? args_str : arg_collect.elts[0]; + if (close_char == '>' && last[strlen(last) - 1] == '>') + args_str = str_printf(sym, "%c%s%s %c", + open_char, arg_collect.elts[0], args_str, close_char); else - sym->flags |= SYM_STDCALL; - break; - default: - free (function_name); - free (class_name); - return FALSE; - } + args_str = str_printf(sym, "%c%s%s%c", + open_char, arg_collect.elts[0], args_str, close_char); - /* Return type, or @ if 'void' */ - if (*name == '@') - { - sym->return_text = xstrdup ("void"); - sym->return_type = ARG_VOID; - name++; - } - else - { - INIT_CT (ct); - if (!demangle_datatype (&name, &ct, sym)) { - free (function_name); - free (class_name); - return FALSE; - } - sym->return_text = ct.expression; - sym->return_type = get_type_constant(ct.dest_type, ct.flags); - ct.expression = NULL; - FREE_CT (ct); - } + return args_str; +} - /* Now come the function arguments */ - while (*name && *name != 'Z') - { - /* Decode each data type and append it to the argument list */ - if (*name != '@') +static void append_extended_qualifier(struct parsed_symbol *sym, const char **where, + const char *str, BOOL is_ms_keyword) +{ + if (!is_ms_keyword || !(sym->flags & UNDNAME_NO_MS_KEYWORDS)) { - INIT_CT (ct); - if (!demangle_datatype(&name, &ct, sym)) { - free (function_name); - free (class_name); + if (is_ms_keyword && (sym->flags & UNDNAME_NO_LEADING_UNDERSCORES)) + str += 2; + *where = *where ? str_printf(sym, "%s%s%s%s", *where, is_ms_keyword ? " " : "", str, is_ms_keyword ? "" : " ") : + str_printf(sym, "%s%s", str, is_ms_keyword ? "" : " "); + } +} + +static void get_extended_qualifier(struct parsed_symbol *sym, struct datatype_t *xdt) +{ + unsigned fl = 0; + xdt->left = xdt->right = NULL; + xdt->flags = 0; + for (;;) + { + switch (*sym->current) + { + case 'E': append_extended_qualifier(sym, &xdt->right, "__ptr64", TRUE); fl |= 2; break; + case 'F': append_extended_qualifier(sym, &xdt->left, "__unaligned", TRUE); fl |= 2; break; +#ifdef _UCRT + case 'G': append_extended_qualifier(sym, &xdt->right, "&", FALSE); fl |= 1; break; + case 'H': append_extended_qualifier(sym, &xdt->right, "&&", FALSE); fl |= 1; break; +#endif + case 'I': append_extended_qualifier(sym, &xdt->right, "__restrict", TRUE); fl |= 2; break; + default: if (fl == 1 || (fl == 3 && (sym->flags & UNDNAME_NO_MS_KEYWORDS))) xdt->flags = DT_NO_LRSEP_WS; return; + } + sym->current++; + } +} + +/****************************************************************** + * get_qualifier + * Parses the type qualifier. Always returns static strings. + */ +static BOOL get_qualifier(struct parsed_symbol *sym, struct datatype_t *xdt, const char** pclass) +{ + char ch; + const char* qualif; + + get_extended_qualifier(sym, xdt); + switch (ch = *sym->current++) + { + case 'A': qualif = NULL; break; + case 'B': qualif = "const"; break; + case 'C': qualif = "volatile"; break; + case 'D': qualif = "const volatile"; break; + case 'Q': qualif = NULL; break; + case 'R': qualif = "const"; break; + case 'S': qualif = "volatile"; break; + case 'T': qualif = "const volatile"; break; + default: return FALSE; + } + if (qualif) + { + xdt->flags &= ~DT_NO_LRSEP_WS; + xdt->left = xdt->left ? str_printf(sym, "%s %s", qualif, xdt->left) : qualif; + } + if (ch >= 'Q' && ch <= 'T') /* pointer to member, fetch class */ + { + const char* class = get_class_name(sym); + if (!class) return FALSE; + if (!pclass) return FALSE; + *pclass = class; + } + else if (pclass) *pclass = NULL; + return TRUE; +} + +static BOOL get_function_qualifier(struct parsed_symbol *sym, const char** qualif) +{ + struct datatype_t xdt; + + if (!get_qualifier(sym, &xdt, NULL)) return FALSE; + *qualif = (xdt.left || xdt.right) ? + str_printf(sym, "%s%s%s", xdt.left, (xdt.flags & DT_NO_LRSEP_WS) ? "" : " ", xdt.right) : NULL; + return TRUE; +} + +static BOOL get_qualified_type(struct datatype_t *ct, struct parsed_symbol* sym, + struct array *pmt_ref, char qualif, enum datatype_flags flags) +{ + struct datatype_t xdt1; + struct datatype_t xdt2; + const char* ref; + const char* str_qualif; + const char* class; + + get_extended_qualifier(sym, &xdt1); + + switch (qualif) + { + case 'A': ref = " &"; str_qualif = NULL; break; + case 'B': ref = " &"; str_qualif = " volatile"; break; + case 'P': ref = " *"; str_qualif = NULL; break; + case 'Q': ref = " *"; str_qualif = " const"; break; + case 'R': ref = " *"; str_qualif = " volatile"; break; + case 'S': ref = " *"; str_qualif = " const volatile"; break; + case '?': ref = NULL; str_qualif = NULL; break; + case '$': ref = " &&"; str_qualif = NULL; break; + default: return FALSE; + } + ct->right = NULL; + ct->flags = 0; + + if (get_qualifier(sym, &xdt2, &class)) + { + unsigned mark = sym->stack.num; + struct datatype_t sub_ct; + + if (ref || str_qualif || xdt1.left || xdt1.right) + { + if (class) + ct->left = str_printf(sym, "%s%s%s%s::%s%s%s", + xdt1.left ? " " : NULL, xdt1.left, + class ? " " : NULL, class, ref ? ref + 1 : NULL, + xdt1.right ? " " : NULL, xdt1.right, str_qualif); + else + ct->left = str_printf(sym, "%s%s%s%s%s%s", + xdt1.left ? " " : NULL, xdt1.left, ref, + xdt1.right ? " " : NULL, xdt1.right, str_qualif); + } + else + ct->left = NULL; + /* multidimensional arrays */ + if (*sym->current == 'Y') + { + const char* n1; + int num; + + sym->current++; + if (!(n1 = get_number(sym))) return FALSE; + num = atoi(n1); + + ct->left = str_printf(sym, " (%s%s", xdt2.left, ct->left && !xdt2.left ? ct->left + 1 : ct->left); + ct->right = ")"; + xdt2.left = NULL; + + while (num--) + ct->right = str_printf(sym, "%s[%s]", ct->right, get_number(sym)); + } + + /* Recurse to get the referred-to type */ + if (!demangle_datatype(sym, &sub_ct, pmt_ref, 0)) + return FALSE; + if (sub_ct.flags & DT_NO_LEADING_WS) + ct->left++; + ct->left = str_printf(sym, "%s%s%s%s%s", sub_ct.left, xdt2.left ? " " : NULL, + xdt2.left, ct->left, + ((xdt2.left || str_qualif) && (flags & WS_AFTER_QUAL_IF)) ? " " : NULL); + if (sub_ct.right) ct->right = str_printf(sym, "%s%s", ct->right, sub_ct.right); + sym->stack.num = mark; + } + else if (ref || str_qualif || xdt1.left || xdt1.right) + ct->left = str_printf(sym, "%s%s%s%s%s%s", + xdt1.left ? " " : NULL, xdt1.left, ref, + xdt1.right ? " " : NULL, xdt1.right, str_qualif); + else + ct->left = NULL; + return TRUE; +} + +/****************************************************************** + * get_literal_string + * Gets the literal name from the current position in the mangled + * symbol to the first '@' character. It pushes the parsed name to + * the symbol names stack and returns a pointer to it or NULL in + * case of an error. + */ +static char* get_literal_string(struct parsed_symbol* sym) +{ + const char *ptr = sym->current; + + do { + if (!((*sym->current >= 'A' && *sym->current <= 'Z') || + (*sym->current >= 'a' && *sym->current <= 'z') || + (*sym->current >= '0' && *sym->current <= '9') || + *sym->current == '_' || *sym->current == '$')) { + return NULL; + } + } while (*++sym->current != '@'); + sym->current++; + if (!str_array_push(sym, ptr, sym->current - 1 - ptr, &sym->names)) + return NULL; + + return str_array_get_ref(&sym->names, sym->names.num - sym->names.start - 1); +} + +/****************************************************************** + * get_template_name + * Parses a name with a template argument list and returns it as + * a string. + * In a template argument list the back reference to the names + * table is separately created. '0' points to the class component + * name with the template arguments. We use the same stack array + * to hold the names but save/restore the stack state before/after + * parsing the template argument list. + */ +static char* get_template_name(struct parsed_symbol* sym) +{ + char *name, *args; + unsigned num_mark = sym->names.num; + unsigned start_mark = sym->names.start; + unsigned stack_mark = sym->stack.num; + struct array array_pmt; + + sym->names.start = sym->names.num; + if (!(name = get_literal_string(sym))) { + sym->names.start = start_mark; return FALSE; - } - - if (strcmp (ct.expression, "void")) - { - sym->arg_text [sym->argc] = ct.expression; - ct.expression = NULL; - sym->arg_type [sym->argc] = get_type_constant (ct.dest_type, ct.flags); - sym->arg_flag [sym->argc] = ct.flags; - sym->arg_name[sym->argc] = strmake( "arg%u", sym->argc ); - sym->argc++; - } - else - break; /* 'void' terminates an argument list */ - FREE_CT (ct); } - else - name++; - } - - while (*name == '@') - name++; - - /* Functions are always terminated by 'Z'. If we made it this far and - * Don't find it, we have incorrectly identified a data type. - */ - if (*name != 'Z') { - free (function_name); - free (class_name); - return FALSE; - } - - /* Note: '()' after 'Z' means 'throws', but we don't care here */ - - /* Create the function name. Include a unique number because otherwise - * overloaded functions could have the same c signature. - */ - switch (is_const) - { - case (CT_CONST | CT_VOLATILE): const_status = "_const_volatile"; break; - case CT_CONST: const_status = "_const"; break; - case CT_VOLATILE: const_status = "_volatile"; break; - default: const_status = "_"; break; - } - sym->function_name = strmake( "%s_%s%s%u", class_name, function_name, - is_static ? "_static" : const_status, hash ); - - assert (sym->return_text); - assert (sym->flags); - assert (sym->function_name); - - free (class_name); - free (function_name); - - if (VERBOSE) - puts ("Demangled symbol OK"); - - return TRUE; + str_array_init(&array_pmt); + args = get_args(sym, &array_pmt, FALSE, '<', '>'); + if (args != NULL) + name = str_printf(sym, "%s%s", name, args); + sym->names.num = num_mark; + sym->names.start = start_mark; + sym->stack.num = stack_mark; + return name; } - -/******************************************************************* - * demangle_datatype - * - * Attempt to demangle a C++ data type, which may be compound. - * a compound type is made up of a number of simple types. e.g: - * char** = (pointer to (pointer to (char))) - * - * Uses a simple recursive descent algorithm that is broken - * and/or incomplete, without a doubt ;-) +/****************************************************************** + * get_class + * Parses class as a list of parent-classes, terminated by '@' and stores the + * result in 'a' array. Each parent-classes, as well as the inner element + * (either field/method name or class name), are represented in the mangled + * name by a literal name ([a-zA-Z0-9_]+ terminated by '@') or a back reference + * ([0-9]) or a name with template arguments ('?$' literal name followed by the + * template argument list). The class name components appear in the reverse + * order in the mangled name, e.g aaa@bbb@ccc@@ will be demangled to + * ccc::bbb::aaa + * For each of these class name components a string will be allocated in the + * array. */ -static char *demangle_datatype (char **str, compound_type *ct, - parsed_symbol* sym) +static BOOL get_class(struct parsed_symbol* sym) { - char *iter; + const char* name = NULL; - assert (str && *str); - assert (ct); - - iter = *str; - - if (!get_constraints_convention_1 (&iter, ct)) - return NULL; - - if (*iter == '_') - { - /* MS type: __int8,__int16 etc */ - ct->flags |= CT_EXTENDED; - iter++; - } - - switch (*iter) - { - case 'C': case 'D': case 'E': case 'F': case 'G': - case 'H': case 'I': case 'J': case 'K': case 'M': - case 'N': case 'O': case 'X': case 'Z': - /* Simple data types */ - ct->dest_type = *iter++; - if (!get_constraints_convention_2 (&iter, ct)) - return NULL; - ct->expression = get_type_string (ct->dest_type, ct->flags); - break; - case 'U': - case 'V': - /* Class/struct/union */ - ct->dest_type = *iter++; - if (*iter == '0' || *iter == '1') - { - /* Referring to class type (implicit 'this') */ - char *stripped; - if (!sym->argc) - return NULL; - - iter++; - /* Apply our constraints to the base type (struct xxx *) */ - stripped = xstrdup (sym->arg_text [0]); - - /* If we're a reference, re-use the pointer already in the type */ - if (!(ct->flags & CT_BY_REFERENCE)) - stripped[ strlen (stripped) - 2] = '\0'; /* otherwise, strip it */ - - ct->expression = strmake( "%s%s", ct->flags & CT_CONST ? "const " : - ct->flags & CT_VOLATILE ? "volatile " : "", stripped); - free (stripped); - } - else if (*iter != '@') - { - /* The name of the class/struct, followed by '@@' */ - char *struct_name = iter; - while (*iter && *iter++ != '@') ; - if (*iter++ != '@') - return NULL; - struct_name = str_substring (struct_name, iter - 2); - ct->expression = strmake( "%sstruct %s%s", ct->flags & CT_CONST ? "const " : - ct->flags & CT_VOLATILE ? "volatile " : "", - struct_name, ct->flags & CT_BY_REFERENCE ? " *" : ""); - free (struct_name); - } - break; - case 'Q': /* FIXME: Array Just treated as pointer currently */ - case 'P': /* Pointer */ - { - compound_type sub_ct; - INIT_CT (sub_ct); - - ct->dest_type = *iter++; - if (!get_constraints_convention_2 (&iter, ct)) - return NULL; - - /* FIXME: P6 = Function pointer, others who knows.. */ - if (isdigit (*iter)) - { - if (*iter == '6') - { - int sub_expressions = 0; - /* FIXME: this is still broken in some cases and it has to be - * merged with the function prototype parsing above... - */ - iter += iter[1] == 'A' ? 2 : 3; /* FIXME */ - if (!demangle_datatype (&iter, &sub_ct, sym)) - return NULL; - ct->expression = strmake( "%s (*)(", sub_ct.expression ); - if (*iter != '@') - { - while (*iter != 'Z') - { - FREE_CT (sub_ct); - INIT_CT (sub_ct); - if (!demangle_datatype (&iter, &sub_ct, sym)) - return NULL; - if (sub_expressions) - ct->expression = strmake( "%s, %s", ct->expression, sub_ct.expression ); - else - ct->expression = strmake( "%s%s", ct->expression, sub_ct.expression ); - while (*iter == '@') iter++; - sub_expressions++; - } - } else while (*iter == '@') iter++; - iter++; - ct->expression = strmake( "%s)", ct->expression ); - } - else - return NULL; - } - else - { - /* Recurse to get the pointed-to type */ - if (!demangle_datatype (&iter, &sub_ct, sym)) - return NULL; - - ct->expression = get_pointer_type_string (ct, sub_ct.expression); - } - - FREE_CT (sub_ct); - } - break; - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - /* Referring back to previously parsed type */ - if (sym->argc >= (size_t)(*iter - '0')) - return NULL; - ct->dest_type = sym->arg_type [*iter - '0']; - ct->expression = xstrdup (sym->arg_text [*iter - '0']); - iter++; - break; - default : - return NULL; - } - if (!ct->expression) - return NULL; - - return *str = iter; -} - - -/* Constraints: - * There are two conventions for specifying data type constants. I - * don't know how the compiler chooses between them, but I suspect it - * is based on ensuring that linker names are unique. - * Convention 1. The data type modifier is given first, followed - * by the data type it operates on. '?' means passed by value, - * 'A' means passed by reference. Note neither of these characters - * is a valid base data type. This is then followed by a character - * specifying constness or volatility. - * Convention 2. The base data type (which is never '?' or 'A') is - * given first. The character modifier is optionally given after - * the base type character. If a valid character modifier is present, - * then it only applies to the current data type if the character - * after that is not 'A' 'B' or 'C' (Because this makes a convention 1 - * constraint for the next data type). - * - * The conventions are usually mixed within the same symbol. - * Since 'C' is both a qualifier and a data type, I suspect that - * convention 1 allows specifying e.g. 'volatile signed char*'. In - * convention 2 this would be 'CC' which is ambiguous (i.e. Is it two - * pointers, or a single pointer + modifier?). In convention 1 it - * is encoded as '?CC' which is not ambiguous. This probably - * holds true for some other types as well. - */ - -/******************************************************************* - * get_constraints_convention_1 - * - * Get type constraint information for a data type - */ -static char *get_constraints_convention_1 (char **str, compound_type *ct) -{ - char *iter = *str, **retval = str; - - if (ct->have_qualifiers) - return *str; /* Previously got constraints for this type */ - - if (*iter == '?' || *iter == 'A') - { - ct->have_qualifiers = TRUE; - ct->flags |= (*iter++ == '?' ? 0 : CT_BY_REFERENCE); - - switch (*iter++) + while (*sym->current != '@') { - case 'A' : - break; /* non-const, non-volatile */ - case 'B' : - ct->flags |= CT_CONST; - break; - case 'C' : - ct->flags |= CT_VOLATILE; - break; - default : - return NULL; + switch (*sym->current) + { + case '\0': return FALSE; + + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + case '8': case '9': + name = str_array_get_ref(&sym->names, *sym->current++ - '0'); + break; + case '?': + switch (*++sym->current) + { + case '$': + sym->current++; + if ((name = get_template_name(sym)) && + !str_array_push(sym, name, -1, &sym->names)) + return FALSE; + break; + case '?': + { + struct array stack = sym->stack; + unsigned int start = sym->names.start; + unsigned int num = sym->names.num; + + str_array_init( &sym->stack ); + if (symbol_demangle( sym )) name = str_printf( sym, "`%s'", sym->result ); + sym->names.start = start; + sym->names.num = num; + sym->stack = stack; + } + break; + default: + if (!(name = get_number( sym ))) return FALSE; + name = str_printf( sym, "`%s'", name ); + break; + } + break; + default: + name = get_literal_string(sym); + break; + } + if (!name || !str_array_push(sym, name, -1, &sym->stack)) + return FALSE; } - } - - return *retval = iter; + sym->current++; + return TRUE; } - -/******************************************************************* - * get_constraints_convention_2 - * - * Get type constraint information for a data type +/****************************************************************** + * get_class_string + * From an array collected by get_class in sym->stack, constructs the + * corresponding (allocated) string */ -static char *get_constraints_convention_2 (char **str, compound_type *ct) +static char* get_class_string(struct parsed_symbol* sym, int start) { - char *iter = *str, **retval = str; + int i; + unsigned int len, sz; + char* ret; + struct array *a = &sym->stack; - /* FIXME: Why do arrays have both convention 1 & 2 constraints? */ - if (ct->have_qualifiers && ct->dest_type != 'Q') - return *str; /* Previously got constraints for this type */ - - ct->have_qualifiers = TRUE; /* Even if none, we've got all we're getting */ - - switch (*iter) - { - case 'A' : - if (iter[1] != 'A' && iter[1] != 'B' && iter[1] != 'C') - iter++; - break; - case 'B' : - ct->flags |= CT_CONST; - iter++; - break; - case 'C' : - /* See note above, if we find 'C' it is _not_ a signed char */ - ct->flags |= CT_VOLATILE; - iter++; - break; - } - - return *retval = iter; + for (len = 0, i = start; i < a->num; i++) + { + assert(a->elts[i]); + len += 2 + strlen(a->elts[i]); + } + if (!(ret = und_alloc(sym, len - 1))) return NULL; + for (len = 0, i = a->num - 1; i >= start; i--) + { + sz = strlen(a->elts[i]); + memcpy(ret + len, a->elts[i], sz); + len += sz; + if (i > start) + { + ret[len++] = ':'; + ret[len++] = ':'; + } + } + ret[len] = '\0'; + return ret; } +/****************************************************************** + * get_class_name + * Wrapper around get_class and get_class_string. + */ +static char* get_class_name(struct parsed_symbol* sym) +{ + unsigned mark = sym->stack.num; + char* s = NULL; + + if (get_class(sym)) + s = get_class_string(sym, mark); + sym->stack.num = mark; + return s; +} + +/****************************************************************** + * get_calling_convention + * Returns a static string corresponding to the calling convention described + * by char 'ch'. Sets export to TRUE iff the calling convention is exported. + */ +static BOOL get_calling_convention(char ch, const char** call_conv, + const char** exported, unsigned flags) +{ + *call_conv = *exported = NULL; + + if (!(flags & (UNDNAME_NO_MS_KEYWORDS | UNDNAME_NO_ALLOCATION_LANGUAGE))) + { + if (flags & UNDNAME_NO_LEADING_UNDERSCORES) + { + if (((ch - 'A') % 2) == 1) *exported = "dll_export "; + switch (ch) + { + case 'A': case 'B': *call_conv = "cdecl"; break; + case 'C': case 'D': *call_conv = "pascal"; break; + case 'E': case 'F': *call_conv = "thiscall"; break; + case 'G': case 'H': *call_conv = "stdcall"; break; + case 'I': case 'J': *call_conv = "fastcall"; break; + case 'K': case 'L': break; + case 'M': *call_conv = "clrcall"; break; + default: return FALSE; + } + } + else + { + if (((ch - 'A') % 2) == 1) *exported = "__dll_export "; + switch (ch) + { + case 'A': case 'B': *call_conv = "__cdecl"; break; + case 'C': case 'D': *call_conv = "__pascal"; break; + case 'E': case 'F': *call_conv = "__thiscall"; break; + case 'G': case 'H': *call_conv = "__stdcall"; break; + case 'I': case 'J': *call_conv = "__fastcall"; break; + case 'K': case 'L': break; + case 'M': *call_conv = "__clrcall"; break; + default: return FALSE; + } + } + } + return TRUE; +} /******************************************************************* - * get_type_string - * - * Return a string containing the name of a data type + * get_simple_type + * Return a string containing an allocated string for a simple data type */ -static char *get_type_string (const char c, const int constraints) +static const char* get_simple_type(char c) { - const char *type_string; + const char* type_string; + + switch (c) + { + case 'C': type_string = "signed char"; break; + case 'D': type_string = "char"; break; + case 'E': type_string = "unsigned char"; break; + case 'F': type_string = "short"; break; + case 'G': type_string = "unsigned short"; break; + case 'H': type_string = "int"; break; + case 'I': type_string = "unsigned int"; break; + case 'J': type_string = "long"; break; + case 'K': type_string = "unsigned long"; break; + case 'M': type_string = "float"; break; + case 'N': type_string = "double"; break; + case 'O': type_string = "long double"; break; + case 'X': type_string = "void"; break; + case 'Z': type_string = "..."; break; + default: type_string = NULL; break; + } + return type_string; +} + +/******************************************************************* + * get_extended_type + * Return a string containing an allocated string for a simple data type + */ +static const char* get_extended_type(char c) +{ + const char* type_string; - if (constraints & CT_EXTENDED) - { switch (c) { case 'D': type_string = "__int8"; break; @@ -709,88 +736,832 @@ static char *get_type_string (const char c, const int constraints) case 'K': type_string = "unsigned __int64"; break; case 'L': type_string = "__int128"; break; case 'M': type_string = "unsigned __int128"; break; - case 'N': type_string = "int"; break; /* bool */ - case 'W': type_string = "WCHAR"; break; /* wchar_t */ - default: - return NULL; - } - } - else - { - switch (c) + case 'N': type_string = "bool"; break; + case 'Q': type_string = "char8_t"; break; + case 'S': type_string = "char16_t"; break; + case 'U': type_string = "char32_t"; break; + case 'W': type_string = "wchar_t"; break; + default: type_string = NULL; break; + } + return type_string; +} + +struct function_signature +{ + const char* call_conv; + const char* exported; + struct datatype_t return_ct; + const char* arguments; +}; + +static BOOL get_function_signature(struct parsed_symbol* sym, struct array* pmt_ref, + struct function_signature* fs) +{ + unsigned mark = sym->stack.num; + + if (!get_calling_convention(*sym->current++, + &fs->call_conv, &fs->exported, + sym->flags & ~UNDNAME_NO_ALLOCATION_LANGUAGE) || + !demangle_datatype(sym, &fs->return_ct, pmt_ref, FALSE)) + return FALSE; + + if (!(fs->arguments = get_args(sym, pmt_ref, TRUE, '(', ')'))) + return FALSE; + sym->stack.num = mark; + + return TRUE; +} + +/******************************************************************* + * demangle_datatype + * + * Attempt to demangle a C++ data type, which may be datatype. + * a datatype type is made up of a number of simple types. e.g: + * char** = (pointer to (pointer to (char))) + */ +static BOOL demangle_datatype(struct parsed_symbol* sym, struct datatype_t* ct, + struct array* pmt_ref, enum datatype_flags flags) +{ + char dt; + BOOL add_pmt = TRUE; + + assert(ct); + ct->left = ct->right = NULL; + ct->flags = 0; + + switch (dt = *sym->current++) { - case 'C': /* Signed char, fall through */ - case 'D': type_string = "char"; break; - case 'E': type_string = "unsigned char"; break; - case 'F': type_string = "short int"; break; - case 'G': type_string = "unsigned short int"; break; - case 'H': type_string = "int"; break; - case 'I': type_string = "unsigned int"; break; - case 'J': type_string = "long"; break; - case 'K': type_string = "unsigned long"; break; - case 'M': type_string = "float"; break; - case 'N': type_string = "double"; break; - case 'O': type_string = "long double"; break; - /* FIXME: T = union */ - case 'U': - case 'V': type_string = "struct"; break; - case 'X': return xstrdup ("void"); - case 'Z': return xstrdup ("..."); + case '_': + /* MS type: __int8,__int16 etc */ + ct->left = get_extended_type(*sym->current++); + break; + case 'C': case 'D': case 'E': case 'F': case 'G': + case 'H': case 'I': case 'J': case 'K': case 'M': + case 'N': case 'O': case 'X': case 'Z': + /* Simple data types */ + ct->left = get_simple_type(dt); + add_pmt = FALSE; + break; + case 'T': /* union */ + case 'U': /* struct */ + case 'V': /* class */ + case 'Y': /* cointerface */ + /* Class/struct/union/cointerface */ + { + const char* struct_name = NULL; + const char* type_name = NULL; + + if (!(struct_name = get_class_name(sym))) + goto done; + if (!(sym->flags & UNDNAME_NO_COMPLEX_TYPE)) + { + switch (dt) + { + case 'T': type_name = "union "; break; + case 'U': type_name = "struct "; break; + case 'V': type_name = "class "; break; + case 'Y': type_name = "cointerface "; break; + } + } + ct->left = str_printf(sym, "%s%s", type_name, struct_name); + } + break; + case '?': + /* not all the time is seems */ + if (flags & IN_ARGS) + { + const char* ptr; + if (!(ptr = get_number(sym))) goto done; + ct->left = str_printf(sym, "`template-parameter-%s'", ptr); + } + else + { + if (!get_qualified_type(ct, sym, pmt_ref, '?', flags)) goto done; + } + break; + case 'A': /* reference */ + case 'B': /* volatile reference */ + if (!get_qualified_type(ct, sym, pmt_ref, dt, flags)) goto done; + break; + case 'Q': /* const pointer */ + case 'R': /* volatile pointer */ + case 'S': /* const volatile pointer */ + if (!get_qualified_type(ct, sym, pmt_ref, (flags & IN_ARGS) ? dt : 'P', flags)) goto done; + break; + case 'P': /* Pointer */ + if (isdigit(*sym->current)) + { + /* FIXME: + * P6 = Function pointer + * P8 = Member function pointer + * others who knows.. */ + if (*sym->current == '8') + { + struct function_signature fs; + const char* class; + const char* function_qualifier; + + sym->current++; + + if (!(class = get_class_name(sym))) + goto done; + if (!get_function_qualifier(sym, &function_qualifier)) + goto done; + if (!get_function_signature(sym, pmt_ref, &fs)) + goto done; + + ct->left = str_printf(sym, "%s%s (%s %s::*", + fs.return_ct.left, fs.return_ct.right, fs.call_conv, class); + ct->right = str_printf(sym, ")%s%s", fs.arguments, function_qualifier); + } + else if (*sym->current == '6') + { + struct function_signature fs; + + sym->current++; + + if (!get_function_signature(sym, pmt_ref, &fs)) + goto done; + + ct->left = str_printf(sym, "%s%s (%s*", + fs.return_ct.left, fs.return_ct.right, fs.call_conv); + ct->flags = DT_NO_LEADING_WS; + ct->right = str_printf(sym, ")%s", fs.arguments); + } + else goto done; + } + else if (!get_qualified_type(ct, sym, pmt_ref, 'P', flags)) goto done; + break; + case 'W': + if (*sym->current == '4') + { + char* enum_name; + sym->current++; + if (!(enum_name = get_class_name(sym))) + goto done; + if (sym->flags & UNDNAME_NO_COMPLEX_TYPE) + ct->left = enum_name; + else + ct->left = str_printf(sym, "enum %s", enum_name); + } + else goto done; + break; + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + /* Referring back to previously parsed type */ + /* left and right are pushed as two separate strings */ + if (!pmt_ref) goto done; + ct->left = str_array_get_ref(pmt_ref, (dt - '0') * 2); + ct->right = str_array_get_ref(pmt_ref, (dt - '0') * 2 + 1); + if (!ct->left) goto done; + add_pmt = FALSE; + break; + case '$': + switch (*sym->current++) + { + case '0': + if (!(ct->left = get_number(sym))) goto done; + break; + case 'D': + { + const char* ptr; + if (!(ptr = get_number(sym))) goto done; + ct->left = str_printf(sym, "`template-parameter%s'", ptr); + } + break; + case 'F': + { + const char* p1; + const char* p2; + if (!(p1 = get_number(sym))) goto done; + if (!(p2 = get_number(sym))) goto done; + ct->left = str_printf(sym, "{%s,%s}", p1, p2); + } + break; + case 'G': + { + const char* p1; + const char* p2; + const char* p3; + if (!(p1 = get_number(sym))) goto done; + if (!(p2 = get_number(sym))) goto done; + if (!(p3 = get_number(sym))) goto done; + ct->left = str_printf(sym, "{%s,%s,%s}", p1, p2, p3); + } + break; + case 'Q': + { + const char* ptr; + if (!(ptr = get_number(sym))) goto done; + ct->left = str_printf(sym, "`non-type-template-parameter%s'", ptr); + } + break; + case '$': + if (*sym->current == 'A') + { + sym->current++; + if (*sym->current == '6') + { + struct function_signature fs; + + sym->current++; + + if (!get_function_signature(sym, pmt_ref, &fs)) + goto done; + ct->left = str_printf(sym, "%s%s %s%s", + fs.return_ct.left, fs.return_ct.right, fs.call_conv, fs.arguments); + } + } + else if (*sym->current == 'B') + { + unsigned mark = sym->stack.num; + struct datatype_t sub_ct; + const char* arr = NULL; + sym->current++; + + /* multidimensional arrays */ + if (*sym->current == 'Y') + { + const char* n1; + int num; + + sym->current++; + if (!(n1 = get_number(sym))) goto done; + num = atoi(n1); + + while (num--) + arr = str_printf(sym, "%s[%s]", arr, get_number(sym)); + } + + if (!demangle_datatype(sym, &sub_ct, pmt_ref, 0)) goto done; + + if (arr) + ct->left = str_printf(sym, "%s %s", sub_ct.left, arr); + else + ct->left = sub_ct.left; + ct->right = sub_ct.right; + sym->stack.num = mark; + } + else if (*sym->current == 'C') + { + struct datatype_t xdt; + + sym->current++; + if (!get_qualifier(sym, &xdt, NULL)) goto done; + if (!demangle_datatype(sym, ct, pmt_ref, flags)) goto done; + ct->left = str_printf(sym, "%s %s", ct->left, xdt.left); + } + else if (*sym->current == 'Q') + { + sym->current++; + if (!get_qualified_type(ct, sym, pmt_ref, '$', flags)) goto done; + } + break; + } + break; + default : + break; + } + if (add_pmt && pmt_ref && (flags & IN_ARGS)) + { + /* left and right are pushed as two separate strings */ + if (!str_array_push(sym, ct->left ? ct->left : "", -1, pmt_ref) || + !str_array_push(sym, ct->right ? ct->right : "", -1, pmt_ref)) + return FALSE; + } +done: + + return ct->left != NULL; +} + +/****************************************************************** + * handle_data + * Does the final parsing and handling for a variable or a field in + * a class. + */ +static BOOL handle_data(struct parsed_symbol* sym) +{ + const char* access = NULL; + const char* member_type = NULL; + struct datatype_t xdt = {NULL}; + struct datatype_t ct; + char* name = NULL; + BOOL ret = FALSE; + + /* 0 private static + * 1 protected static + * 2 public static + * 3 private non-static + * 4 protected non-static + * 5 public non-static + * 6 ?? static + * 7 ?? static + */ + + if (!(sym->flags & UNDNAME_NO_ACCESS_SPECIFIERS)) + { + /* we only print the access for static members */ + switch (*sym->current) + { + case '0': access = "private: "; break; + case '1': access = "protected: "; break; + case '2': access = "public: "; break; + } + } + + if (!(sym->flags & UNDNAME_NO_MEMBER_TYPE)) + { + if (*sym->current >= '0' && *sym->current <= '2') + member_type = "static "; + } + + name = get_class_string(sym, 0); + + switch (*sym->current++) + { + case '0': case '1': case '2': + case '3': case '4': case '5': + { + unsigned mark = sym->stack.num; + struct array pmt; + const char* class; + + str_array_init(&pmt); + + if (!demangle_datatype(sym, &ct, &pmt, 0)) goto done; + if (!get_qualifier(sym, &xdt, &class)) goto done; /* class doesn't seem to be displayed */ + if (xdt.left && xdt.right) xdt.left = str_printf(sym, "%s %s", xdt.left, xdt.right); + else if (!xdt.left) xdt.left = xdt.right; + sym->stack.num = mark; + } + break; + case '6' : /* compiler generated static */ + case '7' : /* compiler generated static */ + ct.left = ct.right = NULL; + if (!get_qualifier(sym, &xdt, NULL)) goto done; + if (*sym->current != '@') + { + char* cls = NULL; + + if (!(cls = get_class_name(sym))) + goto done; + ct.right = str_printf(sym, "{for `%s'}", cls); + } + break; + case '8': + case '9': + xdt.left = ct.left = ct.right = NULL; + break; + default: goto done; + } + if (sym->flags & UNDNAME_NAME_ONLY) ct.left = ct.right = xdt.left = NULL; + + sym->result = str_printf(sym, "%s%s%s%s%s%s%s%s", access, + member_type, ct.left, + xdt.left && ct.left ? " " : NULL, xdt.left, + xdt.left || ct.left ? " " : NULL, name, ct.right); + ret = TRUE; +done: + return ret; +} + +/****************************************************************** + * handle_method + * Does the final parsing and handling for a function or a method in + * a class. + */ +static BOOL handle_method(struct parsed_symbol* sym, BOOL cast_op) +{ + char accmem; + const char* access = NULL; + int access_id = -1; + const char* member_type = NULL; + struct datatype_t ct_ret; + const char* call_conv; + const char* function_qualifier = NULL; + const char* exported; + const char* args_str = NULL; + const char* name = NULL; + BOOL ret = FALSE, has_args = TRUE, has_ret = TRUE; + unsigned mark; + struct array array_pmt; + + /* FIXME: why 2 possible letters for each option? + * 'A' private: + * 'B' private: + * 'C' private: static + * 'D' private: static + * 'E' private: virtual + * 'F' private: virtual + * 'G' private: thunk + * 'H' private: thunk + * 'I' protected: + * 'J' protected: + * 'K' protected: static + * 'L' protected: static + * 'M' protected: virtual + * 'N' protected: virtual + * 'O' protected: thunk + * 'P' protected: thunk + * 'Q' public: + * 'R' public: + * 'S' public: static + * 'T' public: static + * 'U' public: virtual + * 'V' public: virtual + * 'W' public: thunk + * 'X' public: thunk + * 'Y' + * 'Z' + * "$0" private: thunk vtordisp + * "$1" private: thunk vtordisp + * "$2" protected: thunk vtordisp + * "$3" protected: thunk vtordisp + * "$4" public: thunk vtordisp + * "$5" public: thunk vtordisp + * "$B" vcall thunk + * "$R" thunk vtordispex + */ + accmem = *sym->current++; + if (accmem == '$') + { + if (*sym->current >= '0' && *sym->current <= '5') + access_id = (*sym->current - '0') / 2; + else if (*sym->current == 'R') + access_id = (sym->current[1] - '0') / 2; + else if (*sym->current != 'B') + goto done; + } + else if (accmem >= 'A' && accmem <= 'Z') + access_id = (accmem - 'A') / 8; + else + goto done; + + switch (access_id) + { + case 0: access = "private: "; break; + case 1: access = "protected: "; break; + case 2: access = "public: "; break; + } + if (accmem == '$' || (accmem - 'A') % 8 == 6 || (accmem - 'A') % 8 == 7) + access = str_printf(sym, "[thunk]:%s", access ? access : " "); + + if (accmem == '$' && *sym->current != 'B') + member_type = "virtual "; + else if (accmem <= 'X') + { + switch ((accmem - 'A') % 8) + { + case 2: case 3: member_type = "static "; break; + case 4: case 5: case 6: case 7: member_type = "virtual "; break; + } + } + + if (sym->flags & UNDNAME_NO_ACCESS_SPECIFIERS) + access = NULL; + if (sym->flags & UNDNAME_NO_MEMBER_TYPE) + member_type = NULL; + + name = get_class_string(sym, 0); + + if (accmem == '$' && *sym->current == 'B') /* vcall thunk */ + { + const char *n; + + sym->current++; + n = get_number(sym); + + if(!n || *sym->current++ != 'A') goto done; + name = str_printf(sym, "%s{%s,{flat}}' }'", name, n); + has_args = FALSE; + has_ret = FALSE; + } + else if (accmem == '$' && *sym->current == 'R') /* vtordispex thunk */ + { + const char *n1, *n2, *n3, *n4; + + sym->current += 2; + n1 = get_number(sym); + n2 = get_number(sym); + n3 = get_number(sym); + n4 = get_number(sym); + + if(!n1 || !n2 || !n3 || !n4) goto done; + name = str_printf(sym, "%s`vtordispex{%s,%s,%s,%s}' ", name, n1, n2, n3, n4); + } + else if (accmem == '$') /* vtordisp thunk */ + { + const char *n1, *n2; + + sym->current++; + n1 = get_number(sym); + n2 = get_number(sym); + + if (!n1 || !n2) goto done; + name = str_printf(sym, "%s`vtordisp{%s,%s}' ", name, n1, n2); + } + else if ((accmem - 'A') % 8 == 6 || (accmem - 'A') % 8 == 7) /* a thunk */ + name = str_printf(sym, "%s`adjustor{%s}' ", name, get_number(sym)); + + if (has_args && (accmem == '$' || + (accmem <= 'X' && (accmem - 'A') % 8 != 2 && (accmem - 'A') % 8 != 3))) + { + /* Implicit 'this' pointer */ + if (!get_function_qualifier(sym, &function_qualifier)) goto done; + } + + if (!get_calling_convention(*sym->current++, &call_conv, &exported, + sym->flags)) + goto done; + + str_array_init(&array_pmt); + + /* Return type, or @ if 'void' */ + if (has_ret && *sym->current == '@') + { + ct_ret.left = "void"; + ct_ret.right = NULL; + sym->current++; + } + else if (has_ret) + { + if (!demangle_datatype(sym, &ct_ret, &array_pmt, cast_op ? WS_AFTER_QUAL_IF : 0)) + goto done; + } + if (!has_ret || sym->flags & UNDNAME_NO_FUNCTION_RETURNS) + ct_ret.left = ct_ret.right = NULL; + if (cast_op) + { + name = str_printf(sym, "%s %s%s", name, ct_ret.left, ct_ret.right); + ct_ret.left = ct_ret.right = NULL; + } + + mark = sym->stack.num; + if (has_args && !(args_str = get_args(sym, &array_pmt, TRUE, '(', ')'))) goto done; + if (sym->flags & UNDNAME_NAME_ONLY) args_str = function_qualifier = NULL; + if (sym->flags & UNDNAME_NO_THISTYPE) function_qualifier = NULL; + sym->stack.num = mark; + + /* Note: '()' after 'Z' means 'throws', but we don't care here + * Yet!!! FIXME + */ + sym->result = str_printf(sym, "%s%s%s%s%s%s%s%s%s%s%s", + access, member_type, ct_ret.left, + (ct_ret.left && !ct_ret.right) ? " " : NULL, + call_conv, call_conv ? " " : NULL, exported, + name, args_str, function_qualifier, ct_ret.right); + ret = TRUE; +done: + return ret; +} + +/******************************************************************* + * symbol_demangle + * Demangle a C++ linker symbol + */ +static BOOL symbol_demangle(struct parsed_symbol* sym) +{ + BOOL ret = FALSE; + enum { + PP_NONE, + PP_CONSTRUCTOR, + PP_DESTRUCTOR, + PP_CAST_OPERATOR, + } post_process = PP_NONE; + + /* FIXME seems wrong as name, as it demangles a simple data type */ + if (sym->flags & UNDNAME_NO_ARGUMENTS) + { + struct datatype_t ct; + + if (demangle_datatype(sym, &ct, NULL, 0)) + { + sym->result = str_printf(sym, "%s%s", ct.left, ct.right); + ret = TRUE; + } + goto done; + } + + /* MS mangled names always begin with '?' */ + if (*sym->current != '?') return FALSE; + sym->current++; + + /* Then function name or operator code */ + if (*sym->current == '?') + { + const char* function_name = NULL; + BOOL in_template = FALSE; + + if (sym->current[1] == '$' && sym->current[2] == '?') + { + in_template = TRUE; + sym->current += 2; + } + + /* C++ operator code (one character, or two if the first is '_') */ + switch (*++sym->current) + { + case '0': function_name = ""; post_process = PP_CONSTRUCTOR; break; + case '1': function_name = ""; post_process = PP_DESTRUCTOR; break; + case '2': function_name = "operator new"; break; + case '3': function_name = "operator delete"; break; + case '4': function_name = "operator="; break; + case '5': function_name = "operator>>"; break; + case '6': function_name = "operator<<"; break; + case '7': function_name = "operator!"; break; + case '8': function_name = "operator=="; break; + case '9': function_name = "operator!="; break; + case 'A': function_name = "operator[]"; break; + case 'B': function_name = "operator"; post_process = PP_CAST_OPERATOR; break; + case 'C': function_name = "operator->"; break; + case 'D': function_name = "operator*"; break; + case 'E': function_name = "operator++"; break; + case 'F': function_name = "operator--"; break; + case 'G': function_name = "operator-"; break; + case 'H': function_name = "operator+"; break; + case 'I': function_name = "operator&"; break; + case 'J': function_name = "operator->*"; break; + case 'K': function_name = "operator/"; break; + case 'L': function_name = "operator%"; break; + case 'M': function_name = "operator<"; break; + case 'N': function_name = "operator<="; break; + case 'O': function_name = "operator>"; break; + case 'P': function_name = "operator>="; break; + case 'Q': function_name = "operator,"; break; + case 'R': function_name = "operator()"; break; + case 'S': function_name = "operator~"; break; + case 'T': function_name = "operator^"; break; + case 'U': function_name = "operator|"; break; + case 'V': function_name = "operator&&"; break; + case 'W': function_name = "operator||"; break; + case 'X': function_name = "operator*="; break; + case 'Y': function_name = "operator+="; break; + case 'Z': function_name = "operator-="; break; + case '_': + switch (*++sym->current) + { + case '0': function_name = "operator/="; break; + case '1': function_name = "operator%="; break; + case '2': function_name = "operator>>="; break; + case '3': function_name = "operator<<="; break; + case '4': function_name = "operator&="; break; + case '5': function_name = "operator|="; break; + case '6': function_name = "operator^="; break; + case '7': function_name = "`vftable'"; break; + case '8': function_name = "`vbtable'"; break; + case '9': function_name = "`vcall'"; break; + case 'A': function_name = "`typeof'"; break; + case 'B': function_name = "`local static guard'"; break; + case 'C': sym->result = (char*)"`string'"; /* string literal: followed by string encoding (native never undecode it) */ + /* FIXME: should unmangle the whole string for error reporting */ + if (*sym->current && sym->current[strlen(sym->current) - 1] == '@') ret = TRUE; + goto done; + case 'D': function_name = "`vbase destructor'"; break; + case 'E': function_name = "`vector deleting destructor'"; break; + case 'F': function_name = "`default constructor closure'"; break; + case 'G': function_name = "`scalar deleting destructor'"; break; + case 'H': function_name = "`vector constructor iterator'"; break; + case 'I': function_name = "`vector destructor iterator'"; break; + case 'J': function_name = "`vector vbase constructor iterator'"; break; + case 'K': function_name = "`virtual displacement map'"; break; + case 'L': function_name = "`eh vector constructor iterator'"; break; + case 'M': function_name = "`eh vector destructor iterator'"; break; + case 'N': function_name = "`eh vector vbase constructor iterator'"; break; + case 'O': function_name = "`copy constructor closure'"; break; + case 'R': + sym->flags |= UNDNAME_NO_FUNCTION_RETURNS; + switch (*++sym->current) + { + case '0': + { + struct datatype_t ct; + + sym->current++; + if (!demangle_datatype(sym, &ct, NULL, 0)) + goto done; + function_name = str_printf(sym, "%s%s `RTTI Type Descriptor'", + ct.left, ct.right); + sym->current--; + } + break; + case '1': + { + const char* n1, *n2, *n3, *n4; + sym->current++; + n1 = get_number(sym); + n2 = get_number(sym); + n3 = get_number(sym); + n4 = get_number(sym); + sym->current--; + function_name = str_printf(sym, "`RTTI Base Class Descriptor at (%s,%s,%s,%s)'", + n1, n2, n3, n4); + } + break; + case '2': function_name = "`RTTI Base Class Array'"; break; + case '3': function_name = "`RTTI Class Hierarchy Descriptor'"; break; + case '4': function_name = "`RTTI Complete Object Locator'"; break; + default: + break; + } + break; + case 'S': function_name = "`local vftable'"; break; + case 'T': function_name = "`local vftable constructor closure'"; break; + case 'U': function_name = "operator new[]"; break; + case 'V': function_name = "operator delete[]"; break; + case 'X': function_name = "`placement delete closure'"; break; + case 'Y': function_name = "`placement delete[] closure'"; break; + case '_': + switch (*++sym->current) + { + case 'K': + sym->current++; + function_name = str_printf(sym, "operator \"\" %s", get_literal_string(sym)); + --sym->current; + break; + default: + return FALSE; + } + break; + default: + return FALSE; + } + break; + case '$': + sym->current++; + if (!(function_name = get_template_name(sym))) goto done; + --sym->current; + break; + default: + /* FIXME: Other operators */ + return FALSE; + } + sym->current++; + if (in_template) + { + const char *args; + struct array array_pmt; + + str_array_init(&array_pmt); + args = get_args(sym, &array_pmt, FALSE, '<', '>'); + if (args) function_name = function_name ? str_printf(sym, "%s%s", function_name, args) : args; + sym->names.num = 0; + } + if (!str_array_push(sym, function_name, -1, &sym->stack)) + return FALSE; + } + else if (*sym->current == '$') + { + /* Strange construct, it's a name with a template argument list + and that's all. */ + sym->current++; + ret = (sym->result = get_template_name(sym)) != NULL; + goto done; + } + + /* Either a class name, or '@' if the symbol is not a class member */ + switch (*sym->current) + { + case '@': sym->current++; break; + case '$': break; default: - return NULL; - } - } + /* Class the function is associated with, terminated by '@@' */ + if (!get_class(sym)) goto done; + break; + } - return strmake( "%s%s%s", constraints & CT_CONST ? "const " : - constraints & CT_VOLATILE ? "volatile " : "", type_string, - constraints & CT_BY_REFERENCE ? " *" : "" ); + switch (post_process) + { + case PP_NONE: default: break; + case PP_CONSTRUCTOR: case PP_DESTRUCTOR: + /* it's time to set the member name for ctor & dtor */ + if (sym->stack.num <= 1) goto done; + sym->stack.elts[0] = str_printf(sym, "%s%s%s", post_process == PP_DESTRUCTOR ? "~" : NULL, + sym->stack.elts[1], sym->stack.elts[0]); + /* ctors and dtors don't have return type */ + sym->flags |= UNDNAME_NO_FUNCTION_RETURNS; + break; + case PP_CAST_OPERATOR: + sym->flags &= ~UNDNAME_NO_FUNCTION_RETURNS; + break; + } + + /* Function/Data type and access level */ + if (*sym->current >= '0' && *sym->current <= '9') + ret = handle_data(sym); + else if ((*sym->current >= 'A' && *sym->current <= 'Z') || *sym->current == '$') + ret = handle_method(sym, post_process == PP_CAST_OPERATOR); + else ret = FALSE; +done: + return ret; } - -/******************************************************************* - * get_type_constant - * - * Get the ARG_* constant for this data type - */ -static int get_type_constant (const char c, const int constraints) +char *demangle( const char *mangled ) { - /* Any reference type is really a pointer */ - if (constraints & CT_BY_REFERENCE) - return ARG_POINTER; - - switch (c) - { - case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': - case 'J': case 'K': - return ARG_LONG; - case 'M': - return ARG_FLOAT; - case 'N': case 'O': - return ARG_DOUBLE; - case 'P': case 'Q': - return ARG_POINTER; - case 'U': case 'V': - return ARG_STRUCT; - case 'X': - return ARG_VOID; - case 'Z': - default: - return -1; - } -} - - -/******************************************************************* - * get_pointer_type_string - * - * Return a string containing 'pointer to expression' - */ -static char *get_pointer_type_string (compound_type *ct, - const char *expression) -{ - /* FIXME: set a compound flag for bracketing expression if needed */ - return strmake( "%s%s%s", ct->flags & CT_CONST ? "const " : - ct->flags & CT_VOLATILE ? "volatile " : "", expression, - ct->flags & CT_BY_REFERENCE ? " **" : " *" ); + struct parsed_symbol sym; + memset(&sym, 0, sizeof(struct parsed_symbol)); + sym.current = mangled; + str_array_init( &sym.names ); + str_array_init( &sym.stack ); + return symbol_demangle(&sym) ? sym.result : NULL; } diff --git a/tools/winedump/winedump.h b/tools/winedump/winedump.h index e2b3f7b1ca6..91b7170b774 100644 --- a/tools/winedump/winedump.h +++ b/tools/winedump/winedump.h @@ -168,7 +168,7 @@ BOOL dll_next_symbol (parsed_symbol * sym); /* Symbol functions */ void symbol_init(parsed_symbol* symbol, const char* name); -BOOL symbol_demangle (parsed_symbol *symbol); +char *demangle( const char *name ); BOOL symbol_search (parsed_symbol *symbol);