From b4d0a09f15c60c88bbf516d2f6dcdb451dcad9c7 Mon Sep 17 00:00:00 2001 From: George Marques Date: Mon, 22 Jan 2024 11:31:55 -0300 Subject: [PATCH] GDScript: Reintroduce binary tokenization on export This adds back a function available in 3.x: exporting the GDScript files in a binary form by converting the tokens recognized by the tokenizer into a data format. It is enabled by default on export but can be manually disabled. The format helps with loading times since, the tokens are easily reconstructed, and with hiding the source code, since recovering it would require a specialized tool. Code comments are not stored in this format. The `--test` command can also include a `--use-binary-tokens` flag which will run the GDScript tests with the binary format instead of the regular source code by converting them in-memory before the test runs. --- editor/export/editor_export.cpp | 2 + editor/export/editor_export_preset.cpp | 9 + editor/export/editor_export_preset.h | 9 + editor/export/project_export.cpp | 30 +- editor/export/project_export.h | 4 + modules/gdscript/gdscript.cpp | 24 +- modules/gdscript/gdscript.h | 5 + modules/gdscript/gdscript_cache.cpp | 55 ++- modules/gdscript/gdscript_cache.h | 1 + modules/gdscript/gdscript_editor.cpp | 2 +- modules/gdscript/gdscript_parser.cpp | 104 ++-- modules/gdscript/gdscript_parser.h | 3 +- modules/gdscript/gdscript_tokenizer.cpp | 67 +-- modules/gdscript/gdscript_tokenizer.h | 58 ++- .../gdscript/gdscript_tokenizer_buffer.cpp | 457 ++++++++++++++++++ modules/gdscript/gdscript_tokenizer_buffer.h | 87 ++++ .../gdscript_extend_parser.cpp | 2 +- modules/gdscript/register_types.cpp | 26 +- .../gdscript/tests/gdscript_test_runner.cpp | 102 +++- modules/gdscript/tests/gdscript_test_runner.h | 13 +- .../tests/gdscript_test_runner_suite.h | 6 +- ...aces.gd => mixing_tabs_spaces.textonly.gd} | 0 ...es.out => mixing_tabs_spaces.textonly.out} | 0 .../scripts/parser/features/multiline_if.gd | 1 + modules/gdscript/tests/test_gdscript.cpp | 61 ++- modules/gdscript/tests/test_gdscript.h | 1 + 26 files changed, 1010 insertions(+), 119 deletions(-) create mode 100644 modules/gdscript/gdscript_tokenizer_buffer.cpp create mode 100644 modules/gdscript/gdscript_tokenizer_buffer.h rename modules/gdscript/tests/scripts/parser/errors/{mixing_tabs_spaces.gd => mixing_tabs_spaces.textonly.gd} (100%) rename modules/gdscript/tests/scripts/parser/errors/{mixing_tabs_spaces.out => mixing_tabs_spaces.textonly.out} (100%) diff --git a/editor/export/editor_export.cpp b/editor/export/editor_export.cpp index 670fd0a06d94..cd7e813dbdb4 100644 --- a/editor/export/editor_export.cpp +++ b/editor/export/editor_export.cpp @@ -85,6 +85,7 @@ void EditorExport::_save() { config->set_value(section, "encryption_exclude_filters", preset->get_enc_ex_filter()); config->set_value(section, "encrypt_pck", preset->get_enc_pck()); config->set_value(section, "encrypt_directory", preset->get_enc_directory()); + config->set_value(section, "script_export_mode", preset->get_script_export_mode()); credentials->set_value(section, "script_encryption_key", preset->get_script_encryption_key()); String option_section = "preset." + itos(i) + ".options"; @@ -269,6 +270,7 @@ void EditorExport::load_config() { preset->set_include_filter(config->get_value(section, "include_filter")); preset->set_exclude_filter(config->get_value(section, "exclude_filter")); preset->set_export_path(config->get_value(section, "export_path", "")); + preset->set_script_export_mode(config->get_value(section, "script_export_mode", EditorExportPreset::MODE_SCRIPT_BINARY_TOKENS)); if (config->has_section_key(section, "encrypt_pck")) { preset->set_enc_pck(config->get_value(section, "encrypt_pck")); diff --git a/editor/export/editor_export_preset.cpp b/editor/export/editor_export_preset.cpp index b941170b7bc0..478ef980374a 100644 --- a/editor/export/editor_export_preset.cpp +++ b/editor/export/editor_export_preset.cpp @@ -323,6 +323,15 @@ String EditorExportPreset::get_script_encryption_key() const { return script_key; } +void EditorExportPreset::set_script_export_mode(int p_mode) { + script_mode = p_mode; + EditorExport::singleton->save_presets(); +} + +int EditorExportPreset::get_script_export_mode() const { + return script_mode; +} + Variant EditorExportPreset::get_or_env(const StringName &p_name, const String &p_env_var, bool *r_valid) const { const String from_env = OS::get_singleton()->get_environment(p_env_var); if (!from_env.is_empty()) { diff --git a/editor/export/editor_export_preset.h b/editor/export/editor_export_preset.h index 025e7603f346..c5f2a0ee79a4 100644 --- a/editor/export/editor_export_preset.h +++ b/editor/export/editor_export_preset.h @@ -54,6 +54,11 @@ public: MODE_FILE_REMOVE, }; + enum ScriptExportMode { + MODE_SCRIPT_TEXT, + MODE_SCRIPT_BINARY_TOKENS, + }; + private: Ref platform; ExportFilter export_filter = EXPORT_ALL_RESOURCES; @@ -84,6 +89,7 @@ private: bool enc_directory = false; String script_key; + int script_mode = MODE_SCRIPT_BINARY_TOKENS; protected: bool _set(const StringName &p_name, const Variant &p_value); @@ -152,6 +158,9 @@ public: void set_script_encryption_key(const String &p_key); String get_script_encryption_key() const; + void set_script_export_mode(int p_mode); + int get_script_export_mode() const; + Variant get_or_env(const StringName &p_name, const String &p_env_var, bool *r_valid = nullptr) const; // Return the preset's version number, or fall back to the diff --git a/editor/export/project_export.cpp b/editor/export/project_export.cpp index 63bd87e6cc00..dba524310ea2 100644 --- a/editor/export/project_export.cpp +++ b/editor/export/project_export.cpp @@ -383,6 +383,9 @@ void ProjectExportDialog::_edit_preset(int p_index) { script_key_error->hide(); } + int script_export_mode = current->get_script_export_mode(); + script_mode->select(script_export_mode); + updating = false; } @@ -582,6 +585,19 @@ bool ProjectExportDialog::_validate_script_encryption_key(const String &p_key) { return is_valid; } +void ProjectExportDialog::_script_export_mode_changed(int p_mode) { + if (updating) { + return; + } + + Ref current = get_current_preset(); + ERR_FAIL_COND(current.is_null()); + + current->set_script_export_mode(p_mode); + + _update_current_preset(); +} + void ProjectExportDialog::_duplicate_preset() { Ref current = get_current_preset(); if (current.is_null()) { @@ -1328,7 +1344,7 @@ ProjectExportDialog::ProjectExportDialog() { feature_vb->add_margin_child(TTR("Feature List:"), custom_feature_display, true); sections->add_child(feature_vb); - // Script export parameters. + // Encryption export parameters. VBoxContainer *sec_vb = memnew(VBoxContainer); sec_vb->set_name(TTR("Encryption")); @@ -1373,6 +1389,18 @@ ProjectExportDialog::ProjectExportDialog() { sec_more_info->connect("pressed", callable_mp(this, &ProjectExportDialog::_open_key_help_link)); sec_vb->add_child(sec_more_info); + // Script export parameters. + + VBoxContainer *script_vb = memnew(VBoxContainer); + script_vb->set_name(TTR("Scripts")); + + script_mode = memnew(OptionButton); + script_vb->add_margin_child(TTR("GDScript Export Mode:"), script_mode); + script_mode->add_item(TTR("Text (easier debugging)"), (int)EditorExportPreset::MODE_SCRIPT_TEXT); + script_mode->add_item(TTR("Binary tokens (faster loading)"), (int)EditorExportPreset::MODE_SCRIPT_BINARY_TOKENS); + script_mode->connect("item_selected", callable_mp(this, &ProjectExportDialog::_script_export_mode_changed)); + sections->add_child(script_vb); + sections->connect("tab_changed", callable_mp(this, &ProjectExportDialog::_tab_changed)); // Disable by default. diff --git a/editor/export/project_export.h b/editor/export/project_export.h index 1a359b08dabd..0fe7ecc2a8f3 100644 --- a/editor/export/project_export.h +++ b/editor/export/project_export.h @@ -160,6 +160,8 @@ class ProjectExportDialog : public ConfirmationDialog { LineEdit *enc_in_filters = nullptr; LineEdit *enc_ex_filters = nullptr; + OptionButton *script_mode = nullptr; + void _open_export_template_manager(); void _export_pck_zip(); @@ -183,6 +185,8 @@ class ProjectExportDialog : public ConfirmationDialog { void _script_encryption_key_changed(const String &p_key); bool _validate_script_encryption_key(const String &p_key); + void _script_export_mode_changed(int p_mode); + void _open_key_help_link(); void _tab_changed(int); diff --git a/modules/gdscript/gdscript.cpp b/modules/gdscript/gdscript.cpp index e78c113c6cf7..551adcb3202e 100644 --- a/modules/gdscript/gdscript.cpp +++ b/modules/gdscript/gdscript.cpp @@ -35,6 +35,7 @@ #include "gdscript_compiler.h" #include "gdscript_parser.h" #include "gdscript_rpc_callable.h" +#include "gdscript_tokenizer_buffer.h" #include "gdscript_warning.h" #ifdef TOOLS_ENABLED @@ -740,7 +741,12 @@ Error GDScript::reload(bool p_keep_state) { valid = false; GDScriptParser parser; - Error err = parser.parse(source, path, false); + Error err; + if (!binary_tokens.is_empty()) { + err = parser.parse_binary(binary_tokens, path); + } else { + err = parser.parse(source, path, false); + } if (err) { if (EngineDebugger::is_active()) { GDScriptLanguage::get_singleton()->debug_break_parse(_get_debug_path(), parser.get_errors().front()->get().line, "Parser Error: " + parser.get_errors().front()->get().message); @@ -1050,6 +1056,19 @@ Error GDScript::load_source_code(const String &p_path) { return OK; } +void GDScript::set_binary_tokens_source(const Vector &p_binary_tokens) { + binary_tokens = p_binary_tokens; +} + +const Vector &GDScript::get_binary_tokens_source() const { + return binary_tokens; +} + +Vector GDScript::get_as_binary_tokens() const { + GDScriptTokenizerBuffer tokenizer; + return tokenizer.parse_code_string(source); +} + const HashMap &GDScript::debug_get_member_functions() const { return member_functions; } @@ -2805,6 +2824,7 @@ Ref ResourceFormatLoaderGDScript::load(const String &p_path, const Str void ResourceFormatLoaderGDScript::get_recognized_extensions(List *p_extensions) const { p_extensions->push_back("gd"); + p_extensions->push_back("gdc"); } bool ResourceFormatLoaderGDScript::handles_type(const String &p_type) const { @@ -2813,7 +2833,7 @@ bool ResourceFormatLoaderGDScript::handles_type(const String &p_type) const { String ResourceFormatLoaderGDScript::get_resource_type(const String &p_path) const { String el = p_path.get_extension().to_lower(); - if (el == "gd") { + if (el == "gd" || el == "gdc") { return "GDScript"; } return ""; diff --git a/modules/gdscript/gdscript.h b/modules/gdscript/gdscript.h index 2da9b89eb9e4..56a8deb905d5 100644 --- a/modules/gdscript/gdscript.h +++ b/modules/gdscript/gdscript.h @@ -176,6 +176,7 @@ private: bool clearing = false; //exported members String source; + Vector binary_tokens; String path; bool path_valid = false; // False if using default path. StringName local_name; // Inner class identifier or `class_name`. @@ -296,6 +297,10 @@ public: String get_script_path() const; Error load_source_code(const String &p_path); + void set_binary_tokens_source(const Vector &p_binary_tokens); + const Vector &get_binary_tokens_source() const; + Vector get_as_binary_tokens() const; + bool get_property_default_value(const StringName &p_property, Variant &r_value) const override; virtual void get_script_method_list(List *p_list) const override; diff --git a/modules/gdscript/gdscript_cache.cpp b/modules/gdscript/gdscript_cache.cpp index 76f4e69ab9fc..ef783ab5644b 100644 --- a/modules/gdscript/gdscript_cache.cpp +++ b/modules/gdscript/gdscript_cache.cpp @@ -67,10 +67,15 @@ Error GDScriptParserRef::raise_status(Status p_new_status) { while (p_new_status > status) { switch (status) { - case EMPTY: + case EMPTY: { status = PARSED; - result = parser->parse(GDScriptCache::get_source_code(path), path, false); - break; + String remapped_path = ResourceLoader::path_remap(path); + if (remapped_path.get_extension().to_lower() == "gdc") { + result = parser->parse_binary(GDScriptCache::get_binary_tokens(remapped_path), path); + } else { + result = parser->parse(GDScriptCache::get_source_code(remapped_path), path, false); + } + } break; case PARSED: { status = INHERITANCE_SOLVED; Error inheritance_result = get_analyzer()->resolve_inheritance(); @@ -205,7 +210,8 @@ Ref GDScriptCache::get_parser(const String &p_path, GDScriptP return ref; } } else { - if (!FileAccess::exists(p_path)) { + String remapped_path = ResourceLoader::path_remap(p_path); + if (!FileAccess::exists(remapped_path)) { r_error = ERR_FILE_NOT_FOUND; return ref; } @@ -239,6 +245,20 @@ String GDScriptCache::get_source_code(const String &p_path) { return source; } +Vector GDScriptCache::get_binary_tokens(const String &p_path) { + Vector buffer; + Error err = OK; + Ref f = FileAccess::open(p_path, FileAccess::READ, &err); + ERR_FAIL_COND_V_MSG(err != OK, buffer, "Failed to open binary GDScript file '" + p_path + "'."); + + uint64_t len = f->get_length(); + buffer.resize(len); + uint64_t read = f->get_buffer(buffer.ptrw(), buffer.size()); + ERR_FAIL_COND_V_MSG(read != len, Vector(), "Failed to read binary GDScript file '" + p_path + "'."); + + return buffer; +} + Ref GDScriptCache::get_shallow_script(const String &p_path, Error &r_error, const String &p_owner) { MutexLock lock(singleton->mutex); if (!p_owner.is_empty()) { @@ -251,10 +271,20 @@ Ref GDScriptCache::get_shallow_script(const String &p_path, Error &r_e return singleton->shallow_gdscript_cache[p_path]; } + String remapped_path = ResourceLoader::path_remap(p_path); + Ref script; script.instantiate(); script->set_path(p_path, true); - r_error = script->load_source_code(p_path); + if (remapped_path.get_extension().to_lower() == "gdc") { + Vector buffer = get_binary_tokens(remapped_path); + if (buffer.is_empty()) { + r_error = ERR_FILE_CANT_READ; + } + script->set_binary_tokens_source(buffer); + } else { + r_error = script->load_source_code(remapped_path); + } if (r_error) { return Ref(); // Returns null and does not cache when the script fails to load. @@ -294,9 +324,18 @@ Ref GDScriptCache::get_full_script(const String &p_path, Error &r_erro } if (p_update_from_disk) { - r_error = script->load_source_code(p_path); - if (r_error) { - return script; + if (p_path.get_extension().to_lower() == "gdc") { + Vector buffer = get_binary_tokens(p_path); + if (buffer.is_empty()) { + r_error = ERR_FILE_CANT_READ; + return script; + } + script->set_binary_tokens_source(buffer); + } else { + r_error = script->load_source_code(p_path); + if (r_error) { + return script; + } } } diff --git a/modules/gdscript/gdscript_cache.h b/modules/gdscript/gdscript_cache.h index 0a0f403e44bb..0754e9feb6ca 100644 --- a/modules/gdscript/gdscript_cache.h +++ b/modules/gdscript/gdscript_cache.h @@ -99,6 +99,7 @@ public: static void remove_script(const String &p_path); static Ref get_parser(const String &p_path, GDScriptParserRef::Status status, Error &r_error, const String &p_owner = String()); static String get_source_code(const String &p_path); + static Vector get_binary_tokens(const String &p_path); static Ref get_shallow_script(const String &p_path, Error &r_error, const String &p_owner = String()); static Ref get_full_script(const String &p_path, Error &r_error, const String &p_owner = String(), bool p_update_from_disk = false); static Ref get_cached_script(const String &p_path); diff --git a/modules/gdscript/gdscript_editor.cpp b/modules/gdscript/gdscript_editor.cpp index 78f9c0846f17..1a574b3f3e8e 100644 --- a/modules/gdscript/gdscript_editor.cpp +++ b/modules/gdscript/gdscript_editor.cpp @@ -210,7 +210,7 @@ bool GDScriptLanguage::supports_documentation() const { } int GDScriptLanguage::find_function(const String &p_function, const String &p_code) const { - GDScriptTokenizer tokenizer; + GDScriptTokenizerText tokenizer; tokenizer.set_source_code(p_code); int indent = 0; GDScriptTokenizer::Token current = tokenizer.scan(); diff --git a/modules/gdscript/gdscript_parser.cpp b/modules/gdscript/gdscript_parser.cpp index 129d62cabd5f..a0036d38d6d3 100644 --- a/modules/gdscript/gdscript_parser.cpp +++ b/modules/gdscript/gdscript_parser.cpp @@ -31,6 +31,7 @@ #include "gdscript_parser.h" #include "gdscript.h" +#include "gdscript_tokenizer_buffer.h" #include "core/config/project_settings.h" #include "core/io/file_access.h" @@ -226,7 +227,7 @@ void GDScriptParser::make_completion_context(CompletionType p_type, Node *p_node if (!for_completion || (!p_force && completion_context.type != COMPLETION_NONE)) { return; } - if (previous.cursor_place != GDScriptTokenizer::CURSOR_MIDDLE && previous.cursor_place != GDScriptTokenizer::CURSOR_END && current.cursor_place == GDScriptTokenizer::CURSOR_NONE) { + if (previous.cursor_place != GDScriptTokenizerText::CURSOR_MIDDLE && previous.cursor_place != GDScriptTokenizerText::CURSOR_END && current.cursor_place == GDScriptTokenizerText::CURSOR_NONE) { return; } CompletionContext context; @@ -234,7 +235,7 @@ void GDScriptParser::make_completion_context(CompletionType p_type, Node *p_node context.current_class = current_class; context.current_function = current_function; context.current_suite = current_suite; - context.current_line = tokenizer.get_cursor_line(); + context.current_line = tokenizer->get_cursor_line(); context.current_argument = p_argument; context.node = p_node; completion_context = context; @@ -244,7 +245,7 @@ void GDScriptParser::make_completion_context(CompletionType p_type, Variant::Typ if (!for_completion || (!p_force && completion_context.type != COMPLETION_NONE)) { return; } - if (previous.cursor_place != GDScriptTokenizer::CURSOR_MIDDLE && previous.cursor_place != GDScriptTokenizer::CURSOR_END && current.cursor_place == GDScriptTokenizer::CURSOR_NONE) { + if (previous.cursor_place != GDScriptTokenizerText::CURSOR_MIDDLE && previous.cursor_place != GDScriptTokenizerText::CURSOR_END && current.cursor_place == GDScriptTokenizerText::CURSOR_NONE) { return; } CompletionContext context; @@ -252,7 +253,7 @@ void GDScriptParser::make_completion_context(CompletionType p_type, Variant::Typ context.current_class = current_class; context.current_function = current_function; context.current_suite = current_suite; - context.current_line = tokenizer.get_cursor_line(); + context.current_line = tokenizer->get_cursor_line(); context.builtin_type = p_builtin_type; completion_context = context; } @@ -265,7 +266,7 @@ void GDScriptParser::push_completion_call(Node *p_call) { call.call = p_call; call.argument = 0; completion_call_stack.push_back(call); - if (previous.cursor_place == GDScriptTokenizer::CURSOR_MIDDLE || previous.cursor_place == GDScriptTokenizer::CURSOR_END || current.cursor_place == GDScriptTokenizer::CURSOR_BEGINNING) { + if (previous.cursor_place == GDScriptTokenizerText::CURSOR_MIDDLE || previous.cursor_place == GDScriptTokenizerText::CURSOR_END || current.cursor_place == GDScriptTokenizerText::CURSOR_BEGINNING) { completion_call = call; } } @@ -328,17 +329,21 @@ Error GDScriptParser::parse(const String &p_source_code, const String &p_script_ source = source.replace_first(String::chr(0xFFFF), String()); } - tokenizer.set_source_code(source); - tokenizer.set_cursor_position(cursor_line, cursor_column); + GDScriptTokenizerText *text_tokenizer = memnew(GDScriptTokenizerText); + text_tokenizer->set_source_code(source); + + tokenizer = text_tokenizer; + + tokenizer->set_cursor_position(cursor_line, cursor_column); script_path = p_script_path.simplify_path(); - current = tokenizer.scan(); + current = tokenizer->scan(); // Avoid error or newline as the first token. // The latter can mess with the parser when opening files filled exclusively with comments and newlines. while (current.type == GDScriptTokenizer::Token::ERROR || current.type == GDScriptTokenizer::Token::NEWLINE) { if (current.type == GDScriptTokenizer::Token::ERROR) { push_error(current.literal); } - current = tokenizer.scan(); + current = tokenizer->scan(); } #ifdef DEBUG_ENABLED @@ -359,6 +364,8 @@ Error GDScriptParser::parse(const String &p_source_code, const String &p_script_ parse_program(); pop_multiline(); + memdelete(text_tokenizer); + #ifdef DEBUG_ENABLED if (multiline_stack.size() > 0) { ERR_PRINT("Parser bug: Imbalanced multiline stack."); @@ -372,6 +379,39 @@ Error GDScriptParser::parse(const String &p_source_code, const String &p_script_ } } +Error GDScriptParser::parse_binary(const Vector &p_binary, const String &p_script_path) { + GDScriptTokenizerBuffer *buffer_tokenizer = memnew(GDScriptTokenizerBuffer); + Error err = buffer_tokenizer->set_code_buffer(p_binary); + + if (err) { + return err; + } + + tokenizer = buffer_tokenizer; + script_path = p_script_path; + current = tokenizer->scan(); + // Avoid error or newline as the first token. + // The latter can mess with the parser when opening files filled exclusively with comments and newlines. + while (current.type == GDScriptTokenizer::Token::ERROR || current.type == GDScriptTokenizer::Token::NEWLINE) { + if (current.type == GDScriptTokenizer::Token::ERROR) { + push_error(current.literal); + } + current = tokenizer->scan(); + } + + push_multiline(false); // Keep one for the whole parsing. + parse_program(); + pop_multiline(); + + memdelete(buffer_tokenizer); + + if (errors.is_empty()) { + return OK; + } else { + return ERR_PARSE_ERROR; + } +} + GDScriptTokenizer::Token GDScriptParser::advance() { lambda_ended = false; // Empty marker since we're past the end in any case. @@ -379,16 +419,16 @@ GDScriptTokenizer::Token GDScriptParser::advance() { ERR_FAIL_COND_V_MSG(current.type == GDScriptTokenizer::Token::TK_EOF, current, "GDScript parser bug: Trying to advance past the end of stream."); } if (for_completion && !completion_call_stack.is_empty()) { - if (completion_call.call == nullptr && tokenizer.is_past_cursor()) { + if (completion_call.call == nullptr && tokenizer->is_past_cursor()) { completion_call = completion_call_stack.back()->get(); passed_cursor = true; } } previous = current; - current = tokenizer.scan(); + current = tokenizer->scan(); while (current.type == GDScriptTokenizer::Token::ERROR) { push_error(current.literal); - current = tokenizer.scan(); + current = tokenizer->scan(); } if (previous.type != GDScriptTokenizer::Token::DEDENT) { // `DEDENT` belongs to the next non-empty line. for (Node *n : nodes_in_progress) { @@ -457,11 +497,11 @@ void GDScriptParser::synchronize() { void GDScriptParser::push_multiline(bool p_state) { multiline_stack.push_back(p_state); - tokenizer.set_multiline_mode(p_state); + tokenizer->set_multiline_mode(p_state); if (p_state) { // Consume potential whitespace tokens already waiting in line. while (current.type == GDScriptTokenizer::Token::NEWLINE || current.type == GDScriptTokenizer::Token::INDENT || current.type == GDScriptTokenizer::Token::DEDENT) { - current = tokenizer.scan(); // Don't call advance() here, as we don't want to change the previous token. + current = tokenizer->scan(); // Don't call advance() here, as we don't want to change the previous token. } } } @@ -469,7 +509,7 @@ void GDScriptParser::push_multiline(bool p_state) { void GDScriptParser::pop_multiline() { ERR_FAIL_COND_MSG(multiline_stack.size() == 0, "Parser bug: trying to pop from multiline stack without available value."); multiline_stack.pop_back(); - tokenizer.set_multiline_mode(multiline_stack.size() > 0 ? multiline_stack.back()->get() : false); + tokenizer->set_multiline_mode(multiline_stack.size() > 0 ? multiline_stack.back()->get() : false); } bool GDScriptParser::is_statement_end_token() const { @@ -588,7 +628,7 @@ void GDScriptParser::parse_program() { complete_extents(head); #ifdef TOOLS_ENABLED - const HashMap &comments = tokenizer.get_comments(); + const HashMap &comments = tokenizer->get_comments(); int line = MIN(max_script_doc_line, head->end_line); while (line > 0) { if (comments.has(line) && comments[line].new_line && comments[line].comment.begins_with("##")) { @@ -597,6 +637,7 @@ void GDScriptParser::parse_program() { } line--; } + #endif // TOOLS_ENABLED if (!check(GDScriptTokenizer::Token::TK_EOF)) { @@ -793,7 +834,7 @@ void GDScriptParser::parse_class_member(T *(GDScriptParser::*p_parse_function)(b if (has_comment(member->start_line, true)) { // Inline doc comment. member->doc_data = parse_class_doc_comment(member->start_line, true); - } else if (has_comment(doc_comment_line, true) && tokenizer.get_comments()[doc_comment_line].new_line) { + } else if (has_comment(doc_comment_line, true) && tokenizer->get_comments()[doc_comment_line].new_line) { // Normal doc comment. Don't check `min_member_doc_line` because a class ends parsing after its members. // This may not work correctly for cases like `var a; class B`, but it doesn't matter in practice. member->doc_data = parse_class_doc_comment(doc_comment_line); @@ -802,7 +843,7 @@ void GDScriptParser::parse_class_member(T *(GDScriptParser::*p_parse_function)(b if (has_comment(member->start_line, true)) { // Inline doc comment. member->doc_data = parse_doc_comment(member->start_line, true); - } else if (doc_comment_line >= min_member_doc_line && has_comment(doc_comment_line, true) && tokenizer.get_comments()[doc_comment_line].new_line) { + } else if (doc_comment_line >= min_member_doc_line && has_comment(doc_comment_line, true) && tokenizer->get_comments()[doc_comment_line].new_line) { // Normal doc comment. member->doc_data = parse_doc_comment(doc_comment_line); } @@ -1357,7 +1398,7 @@ GDScriptParser::EnumNode *GDScriptParser::parse_enum(bool p_is_static) { if (i == enum_node->values.size() - 1 || enum_node->values[i + 1].line > enum_value_line) { doc_data = parse_doc_comment(enum_value_line, true); } - } else if (doc_comment_line >= min_enum_value_doc_line && has_comment(doc_comment_line, true) && tokenizer.get_comments()[doc_comment_line].new_line) { + } else if (doc_comment_line >= min_enum_value_doc_line && has_comment(doc_comment_line, true) && tokenizer->get_comments()[doc_comment_line].new_line) { // Normal doc comment. doc_data = parse_doc_comment(doc_comment_line); } @@ -2346,6 +2387,9 @@ GDScriptParser::ExpressionNode *GDScriptParser::parse_identifier(ExpressionNode IdentifierNode *identifier = alloc_node(); complete_extents(identifier); identifier->name = previous.get_identifier(); + if (identifier->name.operator String().is_empty()) { + print_line("Empty identifier found."); + } identifier->suite = current_suite; if (current_suite != nullptr && current_suite->has_local(identifier->name)) { @@ -3050,7 +3094,7 @@ GDScriptParser::ExpressionNode *GDScriptParser::parse_call(ExpressionNode *p_pre // Allow for trailing comma. break; } - bool use_identifier_completion = current.cursor_place == GDScriptTokenizer::CURSOR_END || current.cursor_place == GDScriptTokenizer::CURSOR_MIDDLE; + bool use_identifier_completion = current.cursor_place == GDScriptTokenizerText::CURSOR_END || current.cursor_place == GDScriptTokenizerText::CURSOR_MIDDLE; ExpressionNode *argument = parse_expression(false); if (argument == nullptr) { push_error(R"(Expected expression as the function argument.)"); @@ -3220,7 +3264,7 @@ GDScriptParser::ExpressionNode *GDScriptParser::parse_lambda(ExpressionNode *p_p // Reset the multiline stack since we don't want the multiline mode one in the lambda body. push_multiline(false); if (multiline_context) { - tokenizer.push_expression_indented_block(); + tokenizer->push_expression_indented_block(); } push_multiline(true); // For the parameters. @@ -3267,9 +3311,9 @@ GDScriptParser::ExpressionNode *GDScriptParser::parse_lambda(ExpressionNode *p_p if (multiline_context) { // If we're in multiline mode, we want to skip the spurious DEDENT and NEWLINE tokens. while (check(GDScriptTokenizer::Token::DEDENT) || check(GDScriptTokenizer::Token::INDENT) || check(GDScriptTokenizer::Token::NEWLINE)) { - current = tokenizer.scan(); // Not advance() since we don't want to change the previous token. + current = tokenizer->scan(); // Not advance() since we don't want to change the previous token. } - tokenizer.pop_expression_indented_block(); + tokenizer->pop_expression_indented_block(); } current_function = previous_function; @@ -3518,20 +3562,20 @@ static String _process_doc_line(const String &p_line, const String &p_text, cons } bool GDScriptParser::has_comment(int p_line, bool p_must_be_doc) { - bool has_comment = tokenizer.get_comments().has(p_line); + bool has_comment = tokenizer->get_comments().has(p_line); // If there are no comments or if we don't care whether the comment // is a docstring, we have our result. if (!p_must_be_doc || !has_comment) { return has_comment; } - return tokenizer.get_comments()[p_line].comment.begins_with("##"); + return tokenizer->get_comments()[p_line].comment.begins_with("##"); } GDScriptParser::MemberDocData GDScriptParser::parse_doc_comment(int p_line, bool p_single_line) { ERR_FAIL_COND_V(!has_comment(p_line, true), MemberDocData()); - const HashMap &comments = tokenizer.get_comments(); + const HashMap &comments = tokenizer->get_comments(); int line = p_line; if (!p_single_line) { @@ -3580,7 +3624,7 @@ GDScriptParser::MemberDocData GDScriptParser::parse_doc_comment(int p_line, bool GDScriptParser::ClassDocData GDScriptParser::parse_class_doc_comment(int p_line, bool p_single_line) { ERR_FAIL_COND_V(!has_comment(p_line, true), ClassDocData()); - const HashMap &comments = tokenizer.get_comments(); + const HashMap &comments = tokenizer->get_comments(); int line = p_line; if (!p_single_line) { @@ -5027,6 +5071,9 @@ void GDScriptParser::TreePrinter::print_function(FunctionNode *p_function, const for (const AnnotationNode *E : p_function->annotations) { print_annotation(E); } + if (p_function->is_static) { + push_text("Static "); + } push_text(p_context); push_text(" "); if (p_function->identifier) { @@ -5371,6 +5418,9 @@ void GDScriptParser::TreePrinter::print_variable(VariableNode *p_variable) { print_annotation(E); } + if (p_variable->is_static) { + push_text("Static "); + } push_text("Variable "); print_identifier(p_variable->identifier); diff --git a/modules/gdscript/gdscript_parser.h b/modules/gdscript/gdscript_parser.h index 11c5e51b9a5a..c064a2d0f46a 100644 --- a/modules/gdscript/gdscript_parser.h +++ b/modules/gdscript/gdscript_parser.h @@ -1336,7 +1336,7 @@ private: HashSet unsafe_lines; #endif - GDScriptTokenizer tokenizer; + GDScriptTokenizer *tokenizer = nullptr; GDScriptTokenizer::Token previous; GDScriptTokenizer::Token current; @@ -1540,6 +1540,7 @@ private: public: Error parse(const String &p_source_code, const String &p_script_path, bool p_for_completion); + Error parse_binary(const Vector &p_binary, const String &p_script_path); ClassNode *get_tree() const { return head; } bool is_tool() const { return _is_tool; } ClassNode *find_class(const String &p_qualified_name) const; diff --git a/modules/gdscript/gdscript_tokenizer.cpp b/modules/gdscript/gdscript_tokenizer.cpp index 29cf7bc6ca9c..a4425a2bf005 100644 --- a/modules/gdscript/gdscript_tokenizer.cpp +++ b/modules/gdscript/gdscript_tokenizer.cpp @@ -256,7 +256,7 @@ String GDScriptTokenizer::get_token_name(Token::Type p_token_type) { return token_names[p_token_type]; } -void GDScriptTokenizer::set_source_code(const String &p_source_code) { +void GDScriptTokenizerText::set_source_code(const String &p_source_code) { source = p_source_code; if (source.is_empty()) { _source = U""; @@ -270,34 +270,34 @@ void GDScriptTokenizer::set_source_code(const String &p_source_code) { position = 0; } -void GDScriptTokenizer::set_cursor_position(int p_line, int p_column) { +void GDScriptTokenizerText::set_cursor_position(int p_line, int p_column) { cursor_line = p_line; cursor_column = p_column; } -void GDScriptTokenizer::set_multiline_mode(bool p_state) { +void GDScriptTokenizerText::set_multiline_mode(bool p_state) { multiline_mode = p_state; } -void GDScriptTokenizer::push_expression_indented_block() { +void GDScriptTokenizerText::push_expression_indented_block() { indent_stack_stack.push_back(indent_stack); } -void GDScriptTokenizer::pop_expression_indented_block() { +void GDScriptTokenizerText::pop_expression_indented_block() { ERR_FAIL_COND(indent_stack_stack.size() == 0); indent_stack = indent_stack_stack.back()->get(); indent_stack_stack.pop_back(); } -int GDScriptTokenizer::get_cursor_line() const { +int GDScriptTokenizerText::get_cursor_line() const { return cursor_line; } -int GDScriptTokenizer::get_cursor_column() const { +int GDScriptTokenizerText::get_cursor_column() const { return cursor_column; } -bool GDScriptTokenizer::is_past_cursor() const { +bool GDScriptTokenizerText::is_past_cursor() const { if (line < cursor_line) { return false; } @@ -310,7 +310,7 @@ bool GDScriptTokenizer::is_past_cursor() const { return true; } -char32_t GDScriptTokenizer::_advance() { +char32_t GDScriptTokenizerText::_advance() { if (unlikely(_is_at_end())) { return '\0'; } @@ -329,11 +329,11 @@ char32_t GDScriptTokenizer::_advance() { return _peek(-1); } -void GDScriptTokenizer::push_paren(char32_t p_char) { +void GDScriptTokenizerText::push_paren(char32_t p_char) { paren_stack.push_back(p_char); } -bool GDScriptTokenizer::pop_paren(char32_t p_expected) { +bool GDScriptTokenizerText::pop_paren(char32_t p_expected) { if (paren_stack.is_empty()) { return false; } @@ -343,13 +343,13 @@ bool GDScriptTokenizer::pop_paren(char32_t p_expected) { return actual == p_expected; } -GDScriptTokenizer::Token GDScriptTokenizer::pop_error() { +GDScriptTokenizer::Token GDScriptTokenizerText::pop_error() { Token error = error_stack.back()->get(); error_stack.pop_back(); return error; } -GDScriptTokenizer::Token GDScriptTokenizer::make_token(Token::Type p_type) { +GDScriptTokenizer::Token GDScriptTokenizerText::make_token(Token::Type p_type) { Token token(p_type); token.start_line = start_line; token.end_line = line; @@ -408,35 +408,35 @@ GDScriptTokenizer::Token GDScriptTokenizer::make_token(Token::Type p_type) { return token; } -GDScriptTokenizer::Token GDScriptTokenizer::make_literal(const Variant &p_literal) { +GDScriptTokenizer::Token GDScriptTokenizerText::make_literal(const Variant &p_literal) { Token token = make_token(Token::LITERAL); token.literal = p_literal; return token; } -GDScriptTokenizer::Token GDScriptTokenizer::make_identifier(const StringName &p_identifier) { +GDScriptTokenizer::Token GDScriptTokenizerText::make_identifier(const StringName &p_identifier) { Token identifier = make_token(Token::IDENTIFIER); identifier.literal = p_identifier; return identifier; } -GDScriptTokenizer::Token GDScriptTokenizer::make_error(const String &p_message) { +GDScriptTokenizer::Token GDScriptTokenizerText::make_error(const String &p_message) { Token error = make_token(Token::ERROR); error.literal = p_message; return error; } -void GDScriptTokenizer::push_error(const String &p_message) { +void GDScriptTokenizerText::push_error(const String &p_message) { Token error = make_error(p_message); error_stack.push_back(error); } -void GDScriptTokenizer::push_error(const Token &p_error) { +void GDScriptTokenizerText::push_error(const Token &p_error) { error_stack.push_back(p_error); } -GDScriptTokenizer::Token GDScriptTokenizer::make_paren_error(char32_t p_paren) { +GDScriptTokenizer::Token GDScriptTokenizerText::make_paren_error(char32_t p_paren) { if (paren_stack.is_empty()) { return make_error(vformat("Closing \"%c\" doesn't have an opening counterpart.", p_paren)); } @@ -445,7 +445,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::make_paren_error(char32_t p_paren) { return error; } -GDScriptTokenizer::Token GDScriptTokenizer::check_vcs_marker(char32_t p_test, Token::Type p_double_type) { +GDScriptTokenizer::Token GDScriptTokenizerText::check_vcs_marker(char32_t p_test, Token::Type p_double_type) { const char32_t *next = _current + 1; int chars = 2; // Two already matched. @@ -469,7 +469,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::check_vcs_marker(char32_t p_test, To } } -GDScriptTokenizer::Token GDScriptTokenizer::annotation() { +GDScriptTokenizer::Token GDScriptTokenizerText::annotation() { if (is_unicode_identifier_start(_peek())) { _advance(); // Consume start character. } else { @@ -550,7 +550,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::annotation() { #define MAX_KEYWORD_LENGTH 10 #ifdef DEBUG_ENABLED -void GDScriptTokenizer::make_keyword_list() { +void GDScriptTokenizerText::make_keyword_list() { #define KEYWORD_LINE(keyword, token_type) keyword, #define KEYWORD_GROUP_IGNORE(group) keyword_list = { @@ -561,7 +561,7 @@ void GDScriptTokenizer::make_keyword_list() { } #endif // DEBUG_ENABLED -GDScriptTokenizer::Token GDScriptTokenizer::potential_identifier() { +GDScriptTokenizer::Token GDScriptTokenizerText::potential_identifier() { bool only_ascii = _peek(-1) < 128; // Consume all identifier characters. @@ -611,7 +611,9 @@ GDScriptTokenizer::Token GDScriptTokenizer::potential_identifier() { static_assert(keyword_length <= MAX_KEYWORD_LENGTH, "There's a keyword longer than the defined maximum length"); \ static_assert(keyword_length >= MIN_KEYWORD_LENGTH, "There's a keyword shorter than the defined minimum length"); \ if (keyword_length == len && name == keyword) { \ - return make_token(token_type); \ + Token kw = make_token(token_type); \ + kw.literal = name; \ + return kw; \ } \ } @@ -646,7 +648,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::potential_identifier() { #undef MIN_KEYWORD_LENGTH #undef KEYWORDS -void GDScriptTokenizer::newline(bool p_make_token) { +void GDScriptTokenizerText::newline(bool p_make_token) { // Don't overwrite previous newline, nor create if we want a line continuation. if (p_make_token && !pending_newline && !line_continuation) { Token newline(Token::NEWLINE); @@ -667,7 +669,7 @@ void GDScriptTokenizer::newline(bool p_make_token) { leftmost_column = 1; } -GDScriptTokenizer::Token GDScriptTokenizer::number() { +GDScriptTokenizer::Token GDScriptTokenizerText::number() { int base = 10; bool has_decimal = false; bool has_exponent = false; @@ -868,7 +870,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::number() { } } -GDScriptTokenizer::Token GDScriptTokenizer::string() { +GDScriptTokenizer::Token GDScriptTokenizerText::string() { enum StringType { STRING_REGULAR, STRING_NAME, @@ -1154,7 +1156,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::string() { return make_literal(string); } -void GDScriptTokenizer::check_indent() { +void GDScriptTokenizerText::check_indent() { ERR_FAIL_COND_MSG(column != 1, "Checking tokenizer indentation in the middle of a line."); if (_is_at_end()) { @@ -1323,13 +1325,13 @@ void GDScriptTokenizer::check_indent() { } } -String GDScriptTokenizer::_get_indent_char_name(char32_t ch) { +String GDScriptTokenizerText::_get_indent_char_name(char32_t ch) { ERR_FAIL_COND_V(ch != ' ' && ch != '\t', String(&ch, 1).c_escape()); return ch == ' ' ? "space" : "tab"; } -void GDScriptTokenizer::_skip_whitespace() { +void GDScriptTokenizerText::_skip_whitespace() { if (pending_indents != 0) { // Still have some indent/dedent tokens to give. return; @@ -1391,7 +1393,7 @@ void GDScriptTokenizer::_skip_whitespace() { } } -GDScriptTokenizer::Token GDScriptTokenizer::scan() { +GDScriptTokenizer::Token GDScriptTokenizerText::scan() { if (has_error()) { return pop_error(); } @@ -1453,6 +1455,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::scan() { if (_peek() != '\n') { return make_error("Expected new line after \"\\\"."); } + continuation_lines.push_back(line); _advance(); newline(false); line_continuation = true; @@ -1673,7 +1676,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::scan() { } } -GDScriptTokenizer::GDScriptTokenizer() { +GDScriptTokenizerText::GDScriptTokenizerText() { #ifdef TOOLS_ENABLED if (EditorSettings::get_singleton()) { tab_size = EditorSettings::get_singleton()->get_setting("text_editor/behavior/indent/size"); diff --git a/modules/gdscript/gdscript_tokenizer.h b/modules/gdscript/gdscript_tokenizer.h index a64aaf6820e8..5d7637517381 100644 --- a/modules/gdscript/gdscript_tokenizer.h +++ b/modules/gdscript/gdscript_tokenizer.h @@ -181,14 +181,13 @@ public: bool can_precede_bin_op() const; bool is_identifier() const; bool is_node_name() const; - StringName get_identifier() const { return source; } + StringName get_identifier() const { return literal; } Token(Type p_type) { type = p_type; } - Token() { - } + Token() {} }; #ifdef TOOLS_ENABLED @@ -203,12 +202,26 @@ public: new_line = p_new_line; } }; - const HashMap &get_comments() const { - return comments; - } + virtual const HashMap &get_comments() const = 0; #endif // TOOLS_ENABLED -private: + static String get_token_name(Token::Type p_token_type); + + virtual int get_cursor_line() const = 0; + virtual int get_cursor_column() const = 0; + virtual void set_cursor_position(int p_line, int p_column) = 0; + virtual void set_multiline_mode(bool p_state) = 0; + virtual bool is_past_cursor() const = 0; + virtual void push_expression_indented_block() = 0; // For lambdas, or blocks inside expressions. + virtual void pop_expression_indented_block() = 0; // For lambdas, or blocks inside expressions. + virtual bool is_text() = 0; + + virtual Token scan() = 0; + + virtual ~GDScriptTokenizer() {} +}; + +class GDScriptTokenizerText : public GDScriptTokenizer { String source; const char32_t *_source = nullptr; const char32_t *_current = nullptr; @@ -235,6 +248,7 @@ private: char32_t indent_char = '\0'; int position = 0; int length = 0; + Vector continuation_lines; #ifdef DEBUG_ENABLED Vector keyword_list; #endif // DEBUG_ENABLED @@ -275,20 +289,28 @@ private: Token annotation(); public: - Token scan(); - void set_source_code(const String &p_source_code); - int get_cursor_line() const; - int get_cursor_column() const; - void set_cursor_position(int p_line, int p_column); - void set_multiline_mode(bool p_state); - bool is_past_cursor() const; - static String get_token_name(Token::Type p_token_type); - void push_expression_indented_block(); // For lambdas, or blocks inside expressions. - void pop_expression_indented_block(); // For lambdas, or blocks inside expressions. + const Vector &get_continuation_lines() const { return continuation_lines; } - GDScriptTokenizer(); + virtual int get_cursor_line() const override; + virtual int get_cursor_column() const override; + virtual void set_cursor_position(int p_line, int p_column) override; + virtual void set_multiline_mode(bool p_state) override; + virtual bool is_past_cursor() const override; + virtual void push_expression_indented_block() override; // For lambdas, or blocks inside expressions. + virtual void pop_expression_indented_block() override; // For lambdas, or blocks inside expressions. + virtual bool is_text() override { return true; } + +#ifdef TOOLS_ENABLED + virtual const HashMap &get_comments() const override { + return comments; + } +#endif // TOOLS_ENABLED + + virtual Token scan() override; + + GDScriptTokenizerText(); }; #endif // GDSCRIPT_TOKENIZER_H diff --git a/modules/gdscript/gdscript_tokenizer_buffer.cpp b/modules/gdscript/gdscript_tokenizer_buffer.cpp new file mode 100644 index 000000000000..5b41c411d841 --- /dev/null +++ b/modules/gdscript/gdscript_tokenizer_buffer.cpp @@ -0,0 +1,457 @@ +/**************************************************************************/ +/* gdscript_tokenizer_buffer.cpp */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#include "gdscript_tokenizer_buffer.h" + +#include "core/io/marshalls.h" + +#define TOKENIZER_VERSION 100 + +int GDScriptTokenizerBuffer::_token_to_binary(const Token &p_token, Vector &r_buffer, int p_start, HashMap &r_identifiers_map, HashMap &r_constants_map) { + int pos = p_start; + + int token_type = p_token.type & TOKEN_MASK; + + switch (p_token.type) { + case GDScriptTokenizer::Token::ANNOTATION: + case GDScriptTokenizer::Token::IDENTIFIER: { + // Add identifier to map. + int identifier_pos; + StringName id = p_token.get_identifier(); + if (r_identifiers_map.has(id)) { + identifier_pos = r_identifiers_map[id]; + } else { + identifier_pos = r_identifiers_map.size(); + r_identifiers_map[id] = identifier_pos; + } + token_type |= identifier_pos << TOKEN_BITS; + } break; + case GDScriptTokenizer::Token::ERROR: + case GDScriptTokenizer::Token::LITERAL: { + // Add literal to map. + int constant_pos; + if (r_constants_map.has(p_token.literal)) { + constant_pos = r_constants_map[p_token.literal]; + } else { + constant_pos = r_constants_map.size(); + r_constants_map[p_token.literal] = constant_pos; + } + token_type |= constant_pos << TOKEN_BITS; + } break; + default: + break; + } + + // Encode token. + int token_len; + if (token_type & TOKEN_MASK) { + token_len = 8; + r_buffer.resize(pos + token_len); + encode_uint32(token_type | TOKEN_BYTE_MASK, &r_buffer.write[pos]); + pos += 4; + } else { + token_len = 5; + r_buffer.resize(pos + token_len); + r_buffer.write[pos] = token_type; + pos++; + } + encode_uint32(p_token.start_line, &r_buffer.write[pos]); + return token_len; +} + +GDScriptTokenizer::Token GDScriptTokenizerBuffer::_binary_to_token(const uint8_t *p_buffer) { + Token token; + const uint8_t *b = p_buffer; + + uint32_t token_type = decode_uint32(b); + token.type = (Token::Type)(token_type & TOKEN_MASK); + if (token_type & TOKEN_BYTE_MASK) { + b += 4; + } else { + b++; + } + token.start_line = decode_uint32(b); + token.end_line = token.start_line; + + token.literal = token.get_name(); + if (token.type == Token::CONST_NAN) { + token.literal = String("NAN"); // Special case since name and notation are different. + } + + switch (token.type) { + case GDScriptTokenizer::Token::ANNOTATION: + case GDScriptTokenizer::Token::IDENTIFIER: { + // Get name from map. + int identifier_pos = token_type >> TOKEN_BITS; + if (unlikely(identifier_pos >= identifiers.size())) { + Token error; + error.type = Token::ERROR; + error.literal = "Identifier index out of bounds."; + return error; + } + token.literal = identifiers[identifier_pos]; + } break; + case GDScriptTokenizer::Token::ERROR: + case GDScriptTokenizer::Token::LITERAL: { + // Get literal from map. + int constant_pos = token_type >> TOKEN_BITS; + if (unlikely(constant_pos >= constants.size())) { + Token error; + error.type = Token::ERROR; + error.literal = "Constant index out of bounds."; + return error; + } + token.literal = constants[constant_pos]; + } break; + default: + break; + } + + return token; +} + +Error GDScriptTokenizerBuffer::set_code_buffer(const Vector &p_buffer) { + const uint8_t *buf = p_buffer.ptr(); + int total_len = p_buffer.size(); + ERR_FAIL_COND_V(p_buffer.size() < 24 || p_buffer[0] != 'G' || p_buffer[1] != 'D' || p_buffer[2] != 'S' || p_buffer[3] != 'C', ERR_INVALID_DATA); + + int version = decode_uint32(&buf[4]); + ERR_FAIL_COND_V_MSG(version > TOKENIZER_VERSION, ERR_INVALID_DATA, "Binary GDScript is too recent! Please use a newer engine version."); + + uint32_t identifier_count = decode_uint32(&buf[8]); + uint32_t constant_count = decode_uint32(&buf[12]); + uint32_t token_line_count = decode_uint32(&buf[16]); + uint32_t token_count = decode_uint32(&buf[20]); + + const uint8_t *b = &buf[24]; + total_len -= 24; + + identifiers.resize(identifier_count); + for (uint32_t i = 0; i < identifier_count; i++) { + uint32_t len = decode_uint32(b); + total_len -= 4; + ERR_FAIL_COND_V((len * 4u) > (uint32_t)total_len, ERR_INVALID_DATA); + b += 4; + Vector cs; + cs.resize(len); + for (uint32_t j = 0; j < len; j++) { + uint8_t tmp[4]; + for (uint32_t k = 0; k < 4; k++) { + tmp[k] = b[j * 4 + k] ^ 0xb6; + } + cs.write[j] = decode_uint32(tmp); + } + + String s(reinterpret_cast(cs.ptr()), len); + b += len * 4; + total_len -= len * 4; + identifiers.write[i] = s; + } + + constants.resize(constant_count); + for (uint32_t i = 0; i < constant_count; i++) { + Variant v; + int len; + Error err = decode_variant(v, b, total_len, &len, false); + if (err) { + return err; + } + b += len; + total_len -= len; + constants.write[i] = v; + } + + for (uint32_t i = 0; i < token_line_count; i++) { + ERR_FAIL_COND_V(total_len < 8, ERR_INVALID_DATA); + uint32_t token_index = decode_uint32(b); + b += 4; + uint32_t line = decode_uint32(b); + b += 4; + total_len -= 8; + token_lines[token_index] = line; + } + for (uint32_t i = 0; i < token_line_count; i++) { + ERR_FAIL_COND_V(total_len < 8, ERR_INVALID_DATA); + uint32_t token_index = decode_uint32(b); + b += 4; + uint32_t column = decode_uint32(b); + b += 4; + total_len -= 8; + token_columns[token_index] = column; + } + + tokens.resize(token_count); + for (uint32_t i = 0; i < token_count; i++) { + int token_len = 5; + if ((*b) & TOKEN_BYTE_MASK) { + token_len = 8; + } + ERR_FAIL_COND_V(total_len < token_len, ERR_INVALID_DATA); + Token token = _binary_to_token(b); + b += token_len; + ERR_FAIL_INDEX_V(token.type, Token::TK_MAX, ERR_INVALID_DATA); + tokens.write[i] = token; + total_len -= token_len; + } + + ERR_FAIL_COND_V(total_len > 0, ERR_INVALID_DATA); + + return OK; +} + +Vector GDScriptTokenizerBuffer::parse_code_string(const String &p_code) { + Vector buf; + + HashMap identifier_map; + HashMap constant_map; + Vector token_buffer; + HashMap token_lines; + HashMap token_columns; + + GDScriptTokenizerText tokenizer; + tokenizer.set_source_code(p_code); + tokenizer.set_multiline_mode(true); // Ignore whitespace tokens. + Token current = tokenizer.scan(); + int token_pos = 0; + int last_token_line = 0; + int token_counter = 0; + + while (current.type != Token::TK_EOF) { + int token_len = _token_to_binary(current, token_buffer, token_pos, identifier_map, constant_map); + token_pos += token_len; + if (token_counter > 0 && current.start_line > last_token_line) { + token_lines[token_counter] = current.start_line; + token_columns[token_counter] = current.start_column; + } + last_token_line = current.end_line; + + current = tokenizer.scan(); + token_counter++; + } + + // Reverse maps. + Vector rev_identifier_map; + rev_identifier_map.resize(identifier_map.size()); + for (const KeyValue &E : identifier_map) { + rev_identifier_map.write[E.value] = E.key; + } + Vector rev_constant_map; + rev_constant_map.resize(constant_map.size()); + for (const KeyValue &E : constant_map) { + rev_constant_map.write[E.value] = E.key; + } + HashMap rev_token_lines; + for (const KeyValue &E : token_lines) { + rev_token_lines[E.value] = E.key; + } + + // Remove continuation lines from map. + for (int line : tokenizer.get_continuation_lines()) { + if (rev_token_lines.has(line + 1)) { + token_lines.erase(rev_token_lines[line + 1]); + token_columns.erase(rev_token_lines[line + 1]); + } + } + + // Save header. + buf.resize(24); + buf.write[0] = 'G'; + buf.write[1] = 'D'; + buf.write[2] = 'S'; + buf.write[3] = 'C'; + encode_uint32(TOKENIZER_VERSION, &buf.write[4]); + encode_uint32(identifier_map.size(), &buf.write[8]); + encode_uint32(constant_map.size(), &buf.write[12]); + encode_uint32(token_lines.size(), &buf.write[16]); + encode_uint32(token_counter, &buf.write[20]); + + int buf_pos = 24; + + // Save identifiers. + for (const StringName &id : rev_identifier_map) { + String s = id.operator String(); + int len = s.length(); + + buf.resize(buf_pos + (len + 1) * 4); + + encode_uint32(len, &buf.write[buf_pos]); + buf_pos += 4; + + for (int i = 0; i < len; i++) { + uint8_t tmp[4]; + encode_uint32(s[i], tmp); + + for (int b = 0; b < 4; b++) { + buf.write[buf_pos + b] = tmp[b] ^ 0xb6; + } + + buf_pos += 4; + } + } + + // Save constants. + for (const Variant &v : rev_constant_map) { + int len; + // Objects cannot be constant, never encode objects. + Error err = encode_variant(v, nullptr, len, false); + ERR_FAIL_COND_V_MSG(err != OK, Vector(), "Error when trying to encode Variant."); + buf.resize(buf_pos + len); + encode_variant(v, &buf.write[buf_pos], len, false); + buf_pos += len; + } + + // Save lines and columns. + buf.resize(buf_pos + token_lines.size() * 16); + for (const KeyValue &e : token_lines) { + encode_uint32(e.key, &buf.write[buf_pos]); + buf_pos += 4; + encode_uint32(e.value, &buf.write[buf_pos]); + buf_pos += 4; + } + for (const KeyValue &e : token_columns) { + encode_uint32(e.key, &buf.write[buf_pos]); + buf_pos += 4; + encode_uint32(e.value, &buf.write[buf_pos]); + buf_pos += 4; + } + + // Store tokens. + buf.append_array(token_buffer); + + return buf; +} + +int GDScriptTokenizerBuffer::get_cursor_line() const { + return 0; +} + +int GDScriptTokenizerBuffer::get_cursor_column() const { + return 0; +} + +void GDScriptTokenizerBuffer::set_cursor_position(int p_line, int p_column) { +} + +void GDScriptTokenizerBuffer::set_multiline_mode(bool p_state) { + multiline_mode = p_state; +} + +bool GDScriptTokenizerBuffer::is_past_cursor() const { + return false; +} + +void GDScriptTokenizerBuffer::push_expression_indented_block() { + indent_stack_stack.push_back(indent_stack); +} + +void GDScriptTokenizerBuffer::pop_expression_indented_block() { + ERR_FAIL_COND(indent_stack_stack.size() == 0); + indent_stack = indent_stack_stack.back()->get(); + indent_stack_stack.pop_back(); +} + +GDScriptTokenizer::Token GDScriptTokenizerBuffer::scan() { + // Add final newline. + if (current >= tokens.size() && !last_token_was_newline) { + Token newline; + newline.type = Token::NEWLINE; + newline.start_line = current_line; + newline.end_line = current_line; + last_token_was_newline = true; + return newline; + } + + // Resolve pending indentation change. + if (pending_indents > 0) { + pending_indents--; + Token indent; + indent.type = Token::INDENT; + indent.start_line = current_line; + indent.end_line = current_line; + return indent; + } else if (pending_indents < 0) { + pending_indents++; + Token dedent; + dedent.type = Token::DEDENT; + dedent.start_line = current_line; + dedent.end_line = current_line; + return dedent; + } + + if (current >= tokens.size()) { + if (!indent_stack.is_empty()) { + pending_indents -= indent_stack.size(); + indent_stack.clear(); + return scan(); + } + Token eof; + eof.type = Token::TK_EOF; + return eof; + }; + + if (!last_token_was_newline && token_lines.has(current)) { + current_line = token_lines[current]; + uint32_t current_column = token_columns[current]; + + // Check if there's a need to indent/dedent. + if (!multiline_mode) { + uint32_t previous_indent = 0; + if (!indent_stack.is_empty()) { + previous_indent = indent_stack.back()->get(); + } + if (current_column - 1 > previous_indent) { + pending_indents++; + indent_stack.push_back(current_column - 1); + } else { + while (current_column - 1 < previous_indent) { + pending_indents--; + indent_stack.pop_back(); + if (indent_stack.is_empty()) { + break; + } + previous_indent = indent_stack.back()->get(); + } + } + + Token newline; + newline.type = Token::NEWLINE; + newline.start_line = current_line; + newline.end_line = current_line; + last_token_was_newline = true; + + return newline; + } + } + + last_token_was_newline = false; + + Token token = tokens[current++]; + return token; +} diff --git a/modules/gdscript/gdscript_tokenizer_buffer.h b/modules/gdscript/gdscript_tokenizer_buffer.h new file mode 100644 index 000000000000..192a7b3f1599 --- /dev/null +++ b/modules/gdscript/gdscript_tokenizer_buffer.h @@ -0,0 +1,87 @@ +/**************************************************************************/ +/* gdscript_tokenizer_buffer.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#ifndef GDSCRIPT_TOKENIZER_BUFFER_H +#define GDSCRIPT_TOKENIZER_BUFFER_H + +#include "gdscript_tokenizer.h" + +class GDScriptTokenizerBuffer : public GDScriptTokenizer { + enum { + TOKEN_BYTE_MASK = 0x80, + TOKEN_BITS = 8, + TOKEN_MASK = (1 << (TOKEN_BITS - 1)) - 1, + }; + + Vector identifiers; + Vector constants; + Vector continuation_lines; + HashMap token_lines; + HashMap token_columns; + Vector tokens; + int current = 0; + uint32_t current_line = 1; + + bool multiline_mode = false; + List indent_stack; + List> indent_stack_stack; // For lambdas, which require manipulating the indentation point. + int pending_indents = 0; + bool last_token_was_newline = false; + +#ifdef TOOLS_ENABLED + HashMap dummy; +#endif // TOOLS_ENABLED + + static int _token_to_binary(const Token &p_token, Vector &r_buffer, int p_start, HashMap &r_identifiers_map, HashMap &r_constants_map); + Token _binary_to_token(const uint8_t *p_buffer); + +public: + Error set_code_buffer(const Vector &p_buffer); + static Vector parse_code_string(const String &p_code); + + virtual int get_cursor_line() const override; + virtual int get_cursor_column() const override; + virtual void set_cursor_position(int p_line, int p_column) override; + virtual void set_multiline_mode(bool p_state) override; + virtual bool is_past_cursor() const override; + virtual void push_expression_indented_block() override; // For lambdas, or blocks inside expressions. + virtual void pop_expression_indented_block() override; // For lambdas, or blocks inside expressions. + virtual bool is_text() override { return false; }; + +#ifdef TOOLS_ENABLED + virtual const HashMap &get_comments() const override { + return dummy; + } +#endif // TOOLS_ENABLED + + virtual Token scan() override; +}; + +#endif // GDSCRIPT_TOKENIZER_BUFFER_H diff --git a/modules/gdscript/language_server/gdscript_extend_parser.cpp b/modules/gdscript/language_server/gdscript_extend_parser.cpp index 0f8648e9a34f..ad7af34bf113 100644 --- a/modules/gdscript/language_server/gdscript_extend_parser.cpp +++ b/modules/gdscript/language_server/gdscript_extend_parser.cpp @@ -191,7 +191,7 @@ void ExtendGDScriptParser::update_symbols() { void ExtendGDScriptParser::update_document_links(const String &p_code) { document_links.clear(); - GDScriptTokenizer scr_tokenizer; + GDScriptTokenizerText scr_tokenizer; Ref fs = FileAccess::create(FileAccess::ACCESS_RESOURCES); scr_tokenizer.set_source_code(p_code); while (true) { diff --git a/modules/gdscript/register_types.cpp b/modules/gdscript/register_types.cpp index 605e82be6e81..e835c93b7c05 100644 --- a/modules/gdscript/register_types.cpp +++ b/modules/gdscript/register_types.cpp @@ -34,6 +34,7 @@ #include "gdscript_analyzer.h" #include "gdscript_cache.h" #include "gdscript_tokenizer.h" +#include "gdscript_tokenizer_buffer.h" #include "gdscript_utility_functions.h" #ifdef TOOLS_ENABLED @@ -83,18 +84,32 @@ class EditorExportGDScript : public EditorExportPlugin { public: virtual void _export_file(const String &p_path, const String &p_type, const HashSet &p_features) override { - String script_key; + int script_mode = EditorExportPreset::MODE_SCRIPT_BINARY_TOKENS; const Ref &preset = get_export_preset(); if (preset.is_valid()) { - script_key = preset->get_script_encryption_key().to_lower(); + script_mode = preset->get_script_export_mode(); } - if (!p_path.ends_with(".gd")) { + if (!p_path.ends_with(".gd") || script_mode == EditorExportPreset::MODE_SCRIPT_TEXT) { return; } + Vector file = FileAccess::get_file_as_bytes(p_path); + if (file.is_empty()) { + return; + } + + String source; + source.parse_utf8(reinterpret_cast(file.ptr()), file.size()); + file = GDScriptTokenizerBuffer::parse_code_string(source); + if (file.is_empty()) { + return; + } + + add_file(p_path.get_basename() + ".gdc", file, true); + return; } @@ -185,6 +200,10 @@ void test_tokenizer() { GDScriptTests::test(GDScriptTests::TestType::TEST_TOKENIZER); } +void test_tokenizer_buffer() { + GDScriptTests::test(GDScriptTests::TestType::TEST_TOKENIZER_BUFFER); +} + void test_parser() { GDScriptTests::test(GDScriptTests::TestType::TEST_PARSER); } @@ -198,6 +217,7 @@ void test_bytecode() { } REGISTER_TEST_COMMAND("gdscript-tokenizer", &test_tokenizer); +REGISTER_TEST_COMMAND("gdscript-tokenizer-buffer", &test_tokenizer_buffer); REGISTER_TEST_COMMAND("gdscript-parser", &test_parser); REGISTER_TEST_COMMAND("gdscript-compiler", &test_compiler); REGISTER_TEST_COMMAND("gdscript-bytecode", &test_bytecode); diff --git a/modules/gdscript/tests/gdscript_test_runner.cpp b/modules/gdscript/tests/gdscript_test_runner.cpp index 4d93a6fc1804..880289d2a80b 100644 --- a/modules/gdscript/tests/gdscript_test_runner.cpp +++ b/modules/gdscript/tests/gdscript_test_runner.cpp @@ -34,6 +34,7 @@ #include "../gdscript_analyzer.h" #include "../gdscript_compiler.h" #include "../gdscript_parser.h" +#include "../gdscript_tokenizer_buffer.h" #include "core/config/project_settings.h" #include "core/core_globals.h" @@ -131,10 +132,11 @@ void finish_language() { StringName GDScriptTestRunner::test_function_name; -GDScriptTestRunner::GDScriptTestRunner(const String &p_source_dir, bool p_init_language, bool p_print_filenames) { +GDScriptTestRunner::GDScriptTestRunner(const String &p_source_dir, bool p_init_language, bool p_print_filenames, bool p_use_binary_tokens) { test_function_name = StaticCString::create("test"); do_init_languages = p_init_language; print_filenames = p_print_filenames; + binary_tokens = p_use_binary_tokens; source_dir = p_source_dir; if (!source_dir.ends_with("/")) { @@ -277,6 +279,9 @@ bool GDScriptTestRunner::make_tests_for_dir(const String &p_dir) { if (next.ends_with(".notest.gd")) { next = dir->get_next(); continue; + } else if (binary_tokens && next.ends_with(".textonly.gd")) { + next = dir->get_next(); + continue; } else if (next.get_extension().to_lower() == "gd") { #ifndef DEBUG_ENABLED // On release builds, skip tests marked as debug only. @@ -299,6 +304,9 @@ bool GDScriptTestRunner::make_tests_for_dir(const String &p_dir) { ERR_FAIL_V_MSG(false, "Could not find output file for " + next); } GDScriptTest test(current_dir.path_join(next), current_dir.path_join(out_file), source_dir); + if (binary_tokens) { + test.set_tokenizer_mode(GDScriptTest::TOKENIZER_BUFFER); + } tests.push_back(test); } } @@ -321,24 +329,65 @@ bool GDScriptTestRunner::make_tests() { return make_tests_for_dir(dir->get_current_dir()); } -bool GDScriptTestRunner::generate_class_index() { - StringName gdscript_name = GDScriptLanguage::get_singleton()->get_name(); - for (int i = 0; i < tests.size(); i++) { - GDScriptTest test = tests[i]; - String base_type; +static bool generate_class_index_recursive(const String &p_dir) { + Error err = OK; + Ref dir(DirAccess::open(p_dir, &err)); - String class_name = GDScriptLanguage::get_singleton()->get_global_class_name(test.get_source_file(), &base_type); - if (class_name.is_empty()) { - continue; - } - ERR_FAIL_COND_V_MSG(ScriptServer::is_global_class(class_name), false, - "Class name '" + class_name + "' from " + test.get_source_file() + " is already used in " + ScriptServer::get_global_class_path(class_name)); - - ScriptServer::add_global_class(class_name, base_type, gdscript_name, test.get_source_file()); + if (err != OK) { + return false; } + + String current_dir = dir->get_current_dir(); + + dir->list_dir_begin(); + String next = dir->get_next(); + + StringName gdscript_name = GDScriptLanguage::get_singleton()->get_name(); + while (!next.is_empty()) { + if (dir->current_is_dir()) { + if (next == "." || next == ".." || next == "completion" || next == "lsp") { + next = dir->get_next(); + continue; + } + if (!generate_class_index_recursive(current_dir.path_join(next))) { + return false; + } + } else { + if (!next.ends_with(".gd")) { + next = dir->get_next(); + continue; + } + String base_type; + String source_file = current_dir.path_join(next); + String class_name = GDScriptLanguage::get_singleton()->get_global_class_name(source_file, &base_type); + if (class_name.is_empty()) { + next = dir->get_next(); + continue; + } + ERR_FAIL_COND_V_MSG(ScriptServer::is_global_class(class_name), false, + "Class name '" + class_name + "' from " + source_file + " is already used in " + ScriptServer::get_global_class_path(class_name)); + + ScriptServer::add_global_class(class_name, base_type, gdscript_name, source_file); + } + + next = dir->get_next(); + } + + dir->list_dir_end(); + return true; } +bool GDScriptTestRunner::generate_class_index() { + Error err = OK; + Ref dir(DirAccess::open(source_dir, &err)); + + ERR_FAIL_COND_V_MSG(err != OK, false, "Could not open specified test directory."); + + source_dir = dir->get_current_dir() + "/"; // Make it absolute path. + return generate_class_index_recursive(dir->get_current_dir()); +} + GDScriptTest::GDScriptTest(const String &p_source_path, const String &p_output_path, const String &p_base_dir) { source_file = p_source_path; output_file = p_output_path; @@ -484,7 +533,15 @@ GDScriptTest::TestResult GDScriptTest::execute_test_code(bool p_is_generating) { Ref script; script.instantiate(); script->set_path(source_file); - err = script->load_source_code(source_file); + if (tokenizer_mode == TOKENIZER_TEXT) { + err = script->load_source_code(source_file); + } else { + String code = FileAccess::get_file_as_string(source_file, &err); + if (!err) { + Vector buffer = GDScriptTokenizerBuffer::parse_code_string(code); + script->set_binary_tokens_source(buffer); + } + } if (err != OK) { enable_stdout(); result.status = GDTEST_LOAD_ERROR; @@ -494,7 +551,11 @@ GDScriptTest::TestResult GDScriptTest::execute_test_code(bool p_is_generating) { // Test parsing. GDScriptParser parser; - err = parser.parse(script->get_source_code(), source_file, false); + if (tokenizer_mode == TOKENIZER_TEXT) { + err = parser.parse(script->get_source_code(), source_file, false); + } else { + err = parser.parse_binary(script->get_binary_tokens_source(), source_file); + } if (err != OK) { enable_stdout(); result.status = GDTEST_PARSER_ERROR; @@ -583,7 +644,14 @@ GDScriptTest::TestResult GDScriptTest::execute_test_code(bool p_is_generating) { add_print_handler(&_print_handler); add_error_handler(&_error_handler); - script->reload(); + err = script->reload(); + if (err) { + enable_stdout(); + result.status = GDTEST_LOAD_ERROR; + result.output = ""; + result.passed = false; + ERR_FAIL_V_MSG(result, "\nCould not reload script: '" + source_file + "'"); + } // Create object instance for test. Object *obj = ClassDB::instantiate(script->get_native()->get_name()); diff --git a/modules/gdscript/tests/gdscript_test_runner.h b/modules/gdscript/tests/gdscript_test_runner.h index b1190604ada1..57e3ac86f9d1 100644 --- a/modules/gdscript/tests/gdscript_test_runner.h +++ b/modules/gdscript/tests/gdscript_test_runner.h @@ -62,6 +62,11 @@ public: bool passed; }; + enum TokenizerMode { + TOKENIZER_TEXT, + TOKENIZER_BUFFER, + }; + private: struct ErrorHandlerData { TestResult *result = nullptr; @@ -79,6 +84,8 @@ private: PrintHandlerList _print_handler; ErrorHandlerList _error_handler; + TokenizerMode tokenizer_mode = TOKENIZER_TEXT; + void enable_stdout(); void disable_stdout(); bool check_output(const String &p_output) const; @@ -96,6 +103,9 @@ public: const String get_source_relative_filepath() const { return source_file.trim_prefix(base_dir); } const String &get_output_file() const { return output_file; } + void set_tokenizer_mode(TokenizerMode p_tokenizer_mode) { tokenizer_mode = p_tokenizer_mode; } + TokenizerMode get_tokenizer_mode() const { return tokenizer_mode; } + GDScriptTest(const String &p_source_path, const String &p_output_path, const String &p_base_dir); GDScriptTest() : GDScriptTest(String(), String(), String()) {} // Needed to use in Vector. @@ -108,6 +118,7 @@ class GDScriptTestRunner { bool is_generating = false; bool do_init_languages = false; bool print_filenames; // Whether filenames should be printed when generated/running tests + bool binary_tokens; // Test with buffer tokenizer. bool make_tests(); bool make_tests_for_dir(const String &p_dir); @@ -120,7 +131,7 @@ public: int run_tests(); bool generate_outputs(); - GDScriptTestRunner(const String &p_source_dir, bool p_init_language, bool p_print_filenames = false); + GDScriptTestRunner(const String &p_source_dir, bool p_init_language, bool p_print_filenames = false, bool p_use_binary_tokens = false); ~GDScriptTestRunner(); }; diff --git a/modules/gdscript/tests/gdscript_test_runner_suite.h b/modules/gdscript/tests/gdscript_test_runner_suite.h index 5fd7d942d223..5acf436e420e 100644 --- a/modules/gdscript/tests/gdscript_test_runner_suite.h +++ b/modules/gdscript/tests/gdscript_test_runner_suite.h @@ -38,12 +38,10 @@ namespace GDScriptTests { TEST_SUITE("[Modules][GDScript]") { - // GDScript 2.0 is still under heavy construction. - // Allow the tests to fail, but do not ignore errors during development. - // Update the scripts and expected output as needed. TEST_CASE("Script compilation and runtime") { bool print_filenames = OS::get_singleton()->get_cmdline_args().find("--print-filenames") != nullptr; - GDScriptTestRunner runner("modules/gdscript/tests/scripts", true, print_filenames); + bool use_binary_tokens = OS::get_singleton()->get_cmdline_args().find("--use-binary-tokens") != nullptr; + GDScriptTestRunner runner("modules/gdscript/tests/scripts", true, print_filenames, use_binary_tokens); int fail_count = runner.run_tests(); INFO("Make sure `*.out` files have expected results."); REQUIRE_MESSAGE(fail_count == 0, "All GDScript tests should pass."); diff --git a/modules/gdscript/tests/scripts/parser/errors/mixing_tabs_spaces.gd b/modules/gdscript/tests/scripts/parser/errors/mixing_tabs_spaces.textonly.gd similarity index 100% rename from modules/gdscript/tests/scripts/parser/errors/mixing_tabs_spaces.gd rename to modules/gdscript/tests/scripts/parser/errors/mixing_tabs_spaces.textonly.gd diff --git a/modules/gdscript/tests/scripts/parser/errors/mixing_tabs_spaces.out b/modules/gdscript/tests/scripts/parser/errors/mixing_tabs_spaces.textonly.out similarity index 100% rename from modules/gdscript/tests/scripts/parser/errors/mixing_tabs_spaces.out rename to modules/gdscript/tests/scripts/parser/errors/mixing_tabs_spaces.textonly.out diff --git a/modules/gdscript/tests/scripts/parser/features/multiline_if.gd b/modules/gdscript/tests/scripts/parser/features/multiline_if.gd index 86152f4543c5..7b82d9b1da65 100644 --- a/modules/gdscript/tests/scripts/parser/features/multiline_if.gd +++ b/modules/gdscript/tests/scripts/parser/features/multiline_if.gd @@ -9,6 +9,7 @@ func test(): # Alternatively, backslashes can be used. if 1 == 1 \ + \ and 2 == 2 and \ 3 == 3: pass diff --git a/modules/gdscript/tests/test_gdscript.cpp b/modules/gdscript/tests/test_gdscript.cpp index 467bedc4b218..e4fab68e06c2 100644 --- a/modules/gdscript/tests/test_gdscript.cpp +++ b/modules/gdscript/tests/test_gdscript.cpp @@ -34,6 +34,7 @@ #include "../gdscript_compiler.h" #include "../gdscript_parser.h" #include "../gdscript_tokenizer.h" +#include "../gdscript_tokenizer_buffer.h" #include "core/config/project_settings.h" #include "core/io/file_access.h" @@ -50,7 +51,7 @@ namespace GDScriptTests { static void test_tokenizer(const String &p_code, const Vector &p_lines) { - GDScriptTokenizer tokenizer; + GDScriptTokenizerText tokenizer; tokenizer.set_source_code(p_code); int tab_size = 4; @@ -107,6 +108,53 @@ static void test_tokenizer(const String &p_code, const Vector &p_lines) print_line(current.get_name()); // Should be EOF } +static void test_tokenizer_buffer(const Vector &p_buffer, const Vector &p_lines); + +static void test_tokenizer_buffer(const String &p_code, const Vector &p_lines) { + Vector binary = GDScriptTokenizerBuffer::parse_code_string(p_code); + test_tokenizer_buffer(binary, p_lines); +} + +static void test_tokenizer_buffer(const Vector &p_buffer, const Vector &p_lines) { + GDScriptTokenizerBuffer tokenizer; + tokenizer.set_code_buffer(p_buffer); + + int tab_size = 4; +#ifdef TOOLS_ENABLED + if (EditorSettings::get_singleton()) { + tab_size = EditorSettings::get_singleton()->get_setting("text_editor/behavior/indent/size"); + } +#endif // TOOLS_ENABLED + String tab = String(" ").repeat(tab_size); + + GDScriptTokenizer::Token current = tokenizer.scan(); + while (current.type != GDScriptTokenizer::Token::TK_EOF) { + StringBuilder token; + token += " --> "; // Padding for line number. + + for (int l = current.start_line; l <= current.end_line && l <= p_lines.size(); l++) { + print_line(vformat("%04d %s", l, p_lines[l - 1]).replace("\t", tab)); + } + + token += current.get_name(); + + if (current.type == GDScriptTokenizer::Token::ERROR || current.type == GDScriptTokenizer::Token::LITERAL || current.type == GDScriptTokenizer::Token::IDENTIFIER || current.type == GDScriptTokenizer::Token::ANNOTATION) { + token += "("; + token += Variant::get_type_name(current.literal.get_type()); + token += ") "; + token += current.literal; + } + + print_line(token.as_string()); + + print_line("-------------------------------------------------------"); + + current = tokenizer.scan(); + } + + print_line(current.get_name()); // Should be EOF +} + static void test_parser(const String &p_code, const String &p_script_path, const Vector &p_lines) { GDScriptParser parser; Error err = parser.parse(p_code, p_script_path, false); @@ -119,7 +167,7 @@ static void test_parser(const String &p_code, const String &p_script_path, const } GDScriptAnalyzer analyzer(&parser); - analyzer.analyze(); + err = analyzer.analyze(); if (err != OK) { const List &errors = parser.get_errors(); @@ -212,7 +260,7 @@ void test(TestType p_type) { } String test = cmdlargs.back()->get(); - if (!test.ends_with(".gd")) { + if (!test.ends_with(".gd") && !test.ends_with(".gdc")) { print_line("This test expects a path to a GDScript file as its last parameter. Got: " + test); return; } @@ -255,6 +303,13 @@ void test(TestType p_type) { case TEST_TOKENIZER: test_tokenizer(code, lines); break; + case TEST_TOKENIZER_BUFFER: + if (test.ends_with(".gdc")) { + test_tokenizer_buffer(buf, lines); + } else { + test_tokenizer_buffer(code, lines); + } + break; case TEST_PARSER: test_parser(code, test, lines); break; diff --git a/modules/gdscript/tests/test_gdscript.h b/modules/gdscript/tests/test_gdscript.h index b39dfe2b5ab8..32f278d5ce34 100644 --- a/modules/gdscript/tests/test_gdscript.h +++ b/modules/gdscript/tests/test_gdscript.h @@ -39,6 +39,7 @@ namespace GDScriptTests { enum TestType { TEST_TOKENIZER, + TEST_TOKENIZER_BUFFER, TEST_PARSER, TEST_COMPILER, TEST_BYTECODE,