// Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file // for details. All rights reserved. Use of this source code is governed by a // BSD-style license that can be found in the LICENSE file. #include "vm/symbols.h" #include "platform/unicode.h" #include "vm/canonical_tables.h" #include "vm/handles.h" #include "vm/hash_table.h" #include "vm/heap/safepoint.h" #include "vm/isolate.h" #include "vm/object.h" #include "vm/object_store.h" #include "vm/raw_object.h" #include "vm/reusable_handles.h" #include "vm/visitor.h" namespace dart { StringPtr Symbols::predefined_[Symbols::kNumberOfOneCharCodeSymbols]; String* Symbols::symbol_handles_[Symbols::kMaxPredefinedId]; static const char* const names[] = { // clang-format off NULL, #define DEFINE_SYMBOL_LITERAL(symbol, literal) literal, PREDEFINED_SYMBOLS_LIST(DEFINE_SYMBOL_LITERAL) #undef DEFINE_SYMBOL_LITERAL "", // matches kTokenTableStart. #define DEFINE_TOKEN_SYMBOL_INDEX(t, s, p, a) s, DART_TOKEN_LIST(DEFINE_TOKEN_SYMBOL_INDEX) DART_KEYWORD_LIST(DEFINE_TOKEN_SYMBOL_INDEX) #undef DEFINE_TOKEN_SYMBOL_INDEX // clang-format on }; StringPtr StringFrom(const uint8_t* data, intptr_t len, Heap::Space space) { return String::FromLatin1(data, len, space); } StringPtr StringFrom(const uint16_t* data, intptr_t len, Heap::Space space) { return String::FromUTF16(data, len, space); } StringPtr StringSlice::ToSymbol() const { if (is_all() && str_.IsOld()) { str_.SetCanonical(); return str_.ptr(); } else { String& result = String::Handle(String::SubString(str_, begin_index_, len_, Heap::kOld)); result.SetCanonical(); result.SetHash(hash_); return result.ptr(); } } StringPtr ConcatString::ToSymbol() const { String& result = String::Handle(String::Concat(str1_, str2_, Heap::kOld)); result.SetCanonical(); result.SetHash(hash_); return result.ptr(); } const char* Symbols::Name(SymbolId symbol) { ASSERT((symbol > kIllegal) && (symbol < kNullCharId)); return names[symbol]; } const String& Symbols::Token(Token::Kind token) { const int tok_index = token; ASSERT((0 <= tok_index) && (tok_index < Token::kNumTokens)); // First keyword symbol is in symbol_handles_[kTokenTableStart + 1]. const intptr_t token_id = Symbols::kTokenTableStart + 1 + tok_index; ASSERT(symbol_handles_[token_id] != NULL); return *symbol_handles_[token_id]; } void Symbols::Init(IsolateGroup* vm_isolate_group) { // Should only be run by the vm isolate. ASSERT(IsolateGroup::Current() == Dart::vm_isolate_group()); ASSERT(vm_isolate_group == Dart::vm_isolate_group()); Zone* zone = Thread::Current()->zone(); // Create and setup a symbol table in the vm isolate. SetupSymbolTable(vm_isolate_group); // Create all predefined symbols. ASSERT((sizeof(names) / sizeof(const char*)) == Symbols::kNullCharId); CanonicalStringSet table(zone, vm_isolate_group->object_store()->symbol_table()); // First set up all the predefined string symbols. // Create symbols for language keywords. Some keywords are equal to // symbols we already created, so use New() instead of Add() to ensure // that the symbols are canonicalized. for (intptr_t i = 1; i < Symbols::kNullCharId; i++) { String* str = String::ReadOnlyHandle(); *str = OneByteString::New(names[i], Heap::kOld); str->Hash(); *str ^= table.InsertOrGet(*str); str->SetCanonical(); // Make canonical once entered. symbol_handles_[i] = str; } // Add Latin1 characters as Symbols, so that Symbols::FromCharCode is fast. for (intptr_t c = 0; c < kNumberOfOneCharCodeSymbols; c++) { intptr_t idx = (kNullCharId + c); ASSERT(idx < kMaxPredefinedId); ASSERT(Utf::IsLatin1(c)); uint8_t ch = static_cast(c); String* str = String::ReadOnlyHandle(); *str = OneByteString::New(&ch, 1, Heap::kOld); str->Hash(); *str ^= table.InsertOrGet(*str); ASSERT(predefined_[c] == nullptr); str->SetCanonical(); // Make canonical once entered. predefined_[c] = str->ptr(); symbol_handles_[idx] = str; } vm_isolate_group->object_store()->set_symbol_table(table.Release()); } void Symbols::InitFromSnapshot(IsolateGroup* vm_isolate_group) { // Should only be run by the vm isolate. ASSERT(IsolateGroup::Current() == Dart::vm_isolate_group()); ASSERT(vm_isolate_group == Dart::vm_isolate_group()); Zone* zone = Thread::Current()->zone(); CanonicalStringSet table(zone, vm_isolate_group->object_store()->symbol_table()); // Lookup all the predefined string symbols and language keyword symbols // and cache them in the read only handles for fast access. for (intptr_t i = 1; i < Symbols::kNullCharId; i++) { String* str = String::ReadOnlyHandle(); const unsigned char* name = reinterpret_cast(names[i]); *str ^= table.GetOrNull(Latin1Array(name, strlen(names[i]))); ASSERT(!str->IsNull()); ASSERT(str->HasHash()); ASSERT(str->IsCanonical()); symbol_handles_[i] = str; } // Lookup Latin1 character Symbols and cache them in read only handles, // so that Symbols::FromCharCode is fast. for (intptr_t c = 0; c < kNumberOfOneCharCodeSymbols; c++) { intptr_t idx = (kNullCharId + c); ASSERT(idx < kMaxPredefinedId); ASSERT(Utf::IsLatin1(c)); uint8_t ch = static_cast(c); String* str = String::ReadOnlyHandle(); *str ^= table.GetOrNull(Latin1Array(&ch, 1)); ASSERT(!str->IsNull()); ASSERT(str->HasHash()); ASSERT(str->IsCanonical()); predefined_[c] = str->ptr(); symbol_handles_[idx] = str; } vm_isolate_group->object_store()->set_symbol_table(table.Release()); } void Symbols::SetupSymbolTable(IsolateGroup* isolate_group) { ASSERT(isolate_group != nullptr); // Setup the symbol table used within the String class. const intptr_t initial_size = (isolate_group == Dart::vm_isolate_group()) ? kInitialVMIsolateSymtabSize : kInitialSymtabSize; Array& array = Array::Handle( HashTables::New(initial_size, Heap::kOld)); isolate_group->object_store()->set_symbol_table(array); } void Symbols::GetStats(IsolateGroup* isolate_group, intptr_t* size, intptr_t* capacity) { ASSERT(isolate_group != nullptr); CanonicalStringSet table(isolate_group->object_store()->symbol_table()); *size = table.NumOccupied(); *capacity = table.NumEntries(); table.Release(); } StringPtr Symbols::New(Thread* thread, const char* cstr, intptr_t len) { ASSERT((cstr != NULL) && (len >= 0)); const uint8_t* utf8_array = reinterpret_cast(cstr); return Symbols::FromUTF8(thread, utf8_array, len); } StringPtr Symbols::FromUTF8(Thread* thread, const uint8_t* utf8_array, intptr_t array_len) { if (array_len == 0 || utf8_array == NULL) { return FromLatin1(thread, reinterpret_cast(NULL), 0); } Utf8::Type type; intptr_t len = Utf8::CodeUnitCount(utf8_array, array_len, &type); ASSERT(len != 0); Zone* zone = thread->zone(); if (type == Utf8::kLatin1) { uint8_t* characters = zone->Alloc(len); if (!Utf8::DecodeToLatin1(utf8_array, array_len, characters, len)) { Utf8::ReportInvalidByte(utf8_array, array_len, len); return String::null(); } return FromLatin1(thread, characters, len); } ASSERT((type == Utf8::kBMP) || (type == Utf8::kSupplementary)); uint16_t* characters = zone->Alloc(len); if (!Utf8::DecodeToUTF16(utf8_array, array_len, characters, len)) { Utf8::ReportInvalidByte(utf8_array, array_len, len); return String::null(); } return FromUTF16(thread, characters, len); } StringPtr Symbols::FromLatin1(Thread* thread, const uint8_t* latin1_array, intptr_t len) { return NewSymbol(thread, Latin1Array(latin1_array, len)); } StringPtr Symbols::FromUTF16(Thread* thread, const uint16_t* utf16_array, intptr_t len) { return NewSymbol(thread, UTF16Array(utf16_array, len)); } StringPtr Symbols::FromConcat(Thread* thread, const String& str1, const String& str2) { if (str1.Length() == 0) { return New(thread, str2); } else if (str2.Length() == 0) { return New(thread, str1); } else { return NewSymbol(thread, ConcatString(str1, str2)); } } StringPtr Symbols::FromGet(Thread* thread, const String& str) { return FromConcat(thread, GetterPrefix(), str); } StringPtr Symbols::FromSet(Thread* thread, const String& str) { return FromConcat(thread, SetterPrefix(), str); } StringPtr Symbols::FromDot(Thread* thread, const String& str) { return FromConcat(thread, str, Dot()); } // TODO(srdjan): If this becomes performance critical code, consider looking // up symbol from hash of pieces instead of concatenating them first into // a string. StringPtr Symbols::FromConcatAll( Thread* thread, const GrowableHandlePtrArray& strs) { const intptr_t strs_length = strs.length(); GrowableArray lengths(strs_length); intptr_t len_sum = 0; const intptr_t kOneByteChar = 1; intptr_t char_size = kOneByteChar; for (intptr_t i = 0; i < strs_length; i++) { const String& str = strs[i]; const intptr_t str_len = str.Length(); if ((String::kMaxElements - len_sum) < str_len) { Exceptions::ThrowOOM(); UNREACHABLE(); } len_sum += str_len; lengths.Add(str_len); char_size = Utils::Maximum(char_size, str.CharSize()); } const bool is_one_byte_string = char_size == kOneByteChar; Zone* zone = thread->zone(); if (is_one_byte_string) { uint8_t* buffer = zone->Alloc(len_sum); const uint8_t* const orig_buffer = buffer; for (intptr_t i = 0; i < strs_length; i++) { NoSafepointScope no_safepoint; intptr_t str_len = lengths[i]; if (str_len > 0) { const String& str = strs[i]; ASSERT(str.IsOneByteString() || str.IsExternalOneByteString()); const uint8_t* src_p = str.IsOneByteString() ? OneByteString::DataStart(str) : ExternalOneByteString::DataStart(str); memmove(buffer, src_p, str_len); buffer += str_len; } } ASSERT(len_sum == buffer - orig_buffer); return Symbols::FromLatin1(thread, orig_buffer, len_sum); } else { uint16_t* buffer = zone->Alloc(len_sum); const uint16_t* const orig_buffer = buffer; for (intptr_t i = 0; i < strs_length; i++) { NoSafepointScope no_safepoint; intptr_t str_len = lengths[i]; if (str_len > 0) { const String& str = strs[i]; if (str.IsTwoByteString()) { memmove(buffer, TwoByteString::DataStart(str), str_len * 2); } else if (str.IsExternalTwoByteString()) { memmove(buffer, ExternalTwoByteString::DataStart(str), str_len * 2); } else { // One-byte to two-byte string copy. ASSERT(str.IsOneByteString() || str.IsExternalOneByteString()); const uint8_t* src_p = str.IsOneByteString() ? OneByteString::DataStart(str) : ExternalOneByteString::DataStart(str); for (int n = 0; n < str_len; n++) { buffer[n] = src_p[n]; } } buffer += str_len; } } ASSERT(len_sum == buffer - orig_buffer); return Symbols::FromUTF16(thread, orig_buffer, len_sum); } } // StringType can be StringSlice, ConcatString, or {Latin1,UTF16}Array. template StringPtr Symbols::NewSymbol(Thread* thread, const StringType& str) { REUSABLE_OBJECT_HANDLESCOPE(thread); REUSABLE_SMI_HANDLESCOPE(thread); REUSABLE_ARRAY_HANDLESCOPE(thread); String& symbol = String::Handle(thread->zone()); dart::Object& key = thread->ObjectHandle(); Smi& value = thread->SmiHandle(); Array& data = thread->ArrayHandle(); { auto vm_isolate_group = Dart::vm_isolate_group(); data = vm_isolate_group->object_store()->symbol_table(); CanonicalStringSet table(&key, &value, &data); symbol ^= table.GetOrNull(str); table.Release(); } if (symbol.IsNull()) { IsolateGroup* group = thread->isolate_group(); ObjectStore* object_store = group->object_store(); RELEASE_ASSERT(!thread->IsAtSafepoint()); // Most common case: The symbol is already in the table. { // We do allow lock-free concurrent read access to the symbol table. // Both, the array in the ObjectStore as well as elements in the array // are accessed via store-release/load-acquire barriers. data = object_store->symbol_table(); CanonicalStringSet table(&key, &value, &data); symbol ^= table.GetOrNull(str); table.Release(); } // Otherwise we'll have to get exclusive access and get-or-insert it. if (symbol.IsNull()) { SafepointMutexLocker ml(group->symbols_mutex()); data = object_store->symbol_table(); CanonicalStringSet table(&key, &value, &data); symbol ^= table.InsertNewOrGet(str); object_store->set_symbol_table(table.Release()); } } ASSERT(symbol.IsSymbol()); ASSERT(symbol.HasHash()); return symbol.ptr(); } template StringPtr Symbols::Lookup(Thread* thread, const StringType& str) { REUSABLE_OBJECT_HANDLESCOPE(thread); REUSABLE_SMI_HANDLESCOPE(thread); REUSABLE_ARRAY_HANDLESCOPE(thread); String& symbol = String::Handle(thread->zone()); dart::Object& key = thread->ObjectHandle(); Smi& value = thread->SmiHandle(); Array& data = thread->ArrayHandle(); { auto vm_isolate_group = Dart::vm_isolate_group(); data = vm_isolate_group->object_store()->symbol_table(); CanonicalStringSet table(&key, &value, &data); symbol ^= table.GetOrNull(str); table.Release(); } if (symbol.IsNull()) { IsolateGroup* group = thread->isolate_group(); ObjectStore* object_store = group->object_store(); // See `Symbols::NewSymbol` for more information why we separate the two // cases. if (thread->IsAtSafepoint()) { RELEASE_ASSERT(group->safepoint_handler()->IsOwnedByTheThread(thread)); data = object_store->symbol_table(); CanonicalStringSet table(&key, &value, &data); symbol ^= table.GetOrNull(str); table.Release(); } else { data = object_store->symbol_table(); CanonicalStringSet table(&key, &value, &data); symbol ^= table.GetOrNull(str); table.Release(); } } ASSERT(symbol.IsNull() || symbol.IsSymbol()); ASSERT(symbol.IsNull() || symbol.HasHash()); return symbol.ptr(); } StringPtr Symbols::LookupFromConcat(Thread* thread, const String& str1, const String& str2) { if (str1.Length() == 0) { return Lookup(thread, str2); } else if (str2.Length() == 0) { return Lookup(thread, str1); } else { return Lookup(thread, ConcatString(str1, str2)); } } StringPtr Symbols::LookupFromGet(Thread* thread, const String& str) { return LookupFromConcat(thread, GetterPrefix(), str); } StringPtr Symbols::LookupFromSet(Thread* thread, const String& str) { return LookupFromConcat(thread, SetterPrefix(), str); } StringPtr Symbols::LookupFromDot(Thread* thread, const String& str) { return LookupFromConcat(thread, str, Dot()); } StringPtr Symbols::New(Thread* thread, const String& str) { if (str.IsSymbol()) { return str.ptr(); } return New(thread, str, 0, str.Length()); } StringPtr Symbols::New(Thread* thread, const String& str, intptr_t begin_index, intptr_t len) { return NewSymbol(thread, StringSlice(str, begin_index, len)); } StringPtr Symbols::NewFormatted(Thread* thread, const char* format, ...) { va_list args; va_start(args, format); StringPtr result = NewFormattedV(thread, format, args); NoSafepointScope no_safepoint; va_end(args); return result; } StringPtr Symbols::NewFormattedV(Thread* thread, const char* format, va_list args) { va_list args_copy; va_copy(args_copy, args); intptr_t len = Utils::VSNPrint(NULL, 0, format, args_copy); va_end(args_copy); Zone* zone = Thread::Current()->zone(); char* buffer = zone->Alloc(len + 1); Utils::VSNPrint(buffer, (len + 1), format, args); return Symbols::New(thread, buffer); } StringPtr Symbols::FromCharCode(Thread* thread, uint16_t char_code) { if (char_code > kMaxOneCharCodeSymbol) { return FromUTF16(thread, &char_code, 1); } return predefined_[char_code]; } void Symbols::DumpStats(IsolateGroup* isolate_group) { intptr_t size = -1; intptr_t capacity = -1; // First dump VM symbol table stats. GetStats(Dart::vm_isolate_group(), &size, &capacity); OS::PrintErr("VM Isolate: Number of symbols : %" Pd "\n", size); OS::PrintErr("VM Isolate: Symbol table capacity : %" Pd "\n", capacity); // Now dump regular isolate symbol table stats. GetStats(isolate_group, &size, &capacity); OS::PrintErr("Isolate: Number of symbols : %" Pd "\n", size); OS::PrintErr("Isolate: Symbol table capacity : %" Pd "\n", capacity); // TODO(koda): Consider recording growth and collision stats in HashTable, // in DEBUG mode. } void Symbols::DumpTable(IsolateGroup* isolate_group) { OS::PrintErr("symbols:\n"); CanonicalStringSet table(isolate_group->object_store()->symbol_table()); table.Dump(); table.Release(); } } // namespace dart