diff --git a/Userland/Libraries/LibUnicode/CharacterTypes.cpp b/Userland/Libraries/LibUnicode/CharacterTypes.cpp index 89182efeb6..4f55760243 100644 --- a/Userland/Libraries/LibUnicode/CharacterTypes.cpp +++ b/Userland/Libraries/LibUnicode/CharacterTypes.cpp @@ -53,10 +53,10 @@ ErrorOr to_unicode_uppercase_full(StringView string, Optional< return builder.to_deprecated_string(); } -ErrorOr to_unicode_titlecase_full(StringView string, Optional const& locale) +ErrorOr to_unicode_titlecase_full(StringView string, Optional const& locale, TrailingCodePointTransformation trailing_code_point_transformation) { StringBuilder builder; - TRY(Detail::build_titlecase_string(Utf8View { string }, builder, locale)); + TRY(Detail::build_titlecase_string(Utf8View { string }, builder, locale, trailing_code_point_transformation)); return builder.to_string(); } diff --git a/Userland/Libraries/LibUnicode/CharacterTypes.h b/Userland/Libraries/LibUnicode/CharacterTypes.h index 8c4c205b62..29a61b0f61 100644 --- a/Userland/Libraries/LibUnicode/CharacterTypes.h +++ b/Userland/Libraries/LibUnicode/CharacterTypes.h @@ -34,6 +34,13 @@ struct BlockName { StringView display_name; }; +enum class TrailingCodePointTransformation : u8 { + // Default behaviour; Puts the first typographic letter unit of each word, if lowercase, in titlecase; the other characters in lowercase. + Lowercase, + // Puts the first typographic letter unit of each word, if lowercase, in titlecase; other characters are unaffected. (https://drafts.csswg.org/css-text/#valdef-text-transform-capitalize) + PreserveExisting, +}; + Optional code_point_display_name(u32 code_point); Optional code_point_block_display_name(u32 code_point); Optional code_point_abbreviation(u32 code_point); @@ -50,7 +57,7 @@ u32 to_unicode_titlecase(u32 code_point); ErrorOr to_unicode_lowercase_full(StringView, Optional const& locale = {}); ErrorOr to_unicode_uppercase_full(StringView, Optional const& locale = {}); -ErrorOr to_unicode_titlecase_full(StringView, Optional const& locale = {}); +ErrorOr to_unicode_titlecase_full(StringView, Optional const& locale = {}, TrailingCodePointTransformation trailing_code_point_transformation = TrailingCodePointTransformation::Lowercase); ErrorOr to_unicode_casefold_full(StringView); Optional general_category_from_string(StringView); diff --git a/Userland/Libraries/LibUnicode/Forward.h b/Userland/Libraries/LibUnicode/Forward.h index c6c6dfd9da..e61e1c076d 100644 --- a/Userland/Libraries/LibUnicode/Forward.h +++ b/Userland/Libraries/LibUnicode/Forward.h @@ -19,6 +19,7 @@ enum class Property : u8; enum class Script : u8; enum class SentenceBreakProperty : u8; enum class WordBreakProperty : u8; +enum class TrailingCodePointTransformation : u8; struct CodePointDecomposition; struct CurrencyCode; diff --git a/Userland/Libraries/LibUnicode/String.cpp b/Userland/Libraries/LibUnicode/String.cpp index f6636a1575..9bbd3fb7fc 100644 --- a/Userland/Libraries/LibUnicode/String.cpp +++ b/Userland/Libraries/LibUnicode/String.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include // This file contains definitions of AK::String methods which require UCD data. @@ -31,7 +32,7 @@ ErrorOr String::to_uppercase(Optional const& locale) const ErrorOr String::to_titlecase(Optional const& locale) const { StringBuilder builder; - TRY(Unicode::Detail::build_titlecase_string(code_points(), builder, locale)); + TRY(Unicode::Detail::build_titlecase_string(code_points(), builder, locale, Unicode::TrailingCodePointTransformation::Lowercase)); return builder.to_string(); } diff --git a/Userland/Libraries/LibUnicode/UnicodeUtils.cpp b/Userland/Libraries/LibUnicode/UnicodeUtils.cpp index 7fe764252d..fc50cd0edd 100644 --- a/Userland/Libraries/LibUnicode/UnicodeUtils.cpp +++ b/Userland/Libraries/LibUnicode/UnicodeUtils.cpp @@ -266,7 +266,7 @@ ErrorOr build_uppercase_string([[maybe_unused]] Utf8View code_points, [[ma } // https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G34078 -ErrorOr build_titlecase_string([[maybe_unused]] Utf8View code_points, [[maybe_unused]] StringBuilder& builder, [[maybe_unused]] Optional const& locale) +ErrorOr build_titlecase_string([[maybe_unused]] Utf8View code_points, [[maybe_unused]] StringBuilder& builder, [[maybe_unused]] Optional const& locale, TrailingCodePointTransformation trailing_code_point_transformation) { #if ENABLE_UNICODE_DATA // toTitlecase(X): Find the word boundaries in X according to Unicode Standard Annex #29, @@ -317,8 +317,15 @@ ErrorOr build_titlecase_string([[maybe_unused]] Utf8View code_points, [[ma boundary = code_point_offset + code_point_length; } - auto substring_to_lowercase = code_points.substring_view(boundary, *next_boundary - boundary); - TRY(build_lowercase_string(substring_to_lowercase, builder, locale)); + auto remaining_code_points = code_points.substring_view(boundary, *next_boundary - boundary); + switch (trailing_code_point_transformation) { + case TrailingCodePointTransformation::Lowercase: + TRY(build_lowercase_string(remaining_code_points, builder, locale)); + break; + case TrailingCodePointTransformation::PreserveExisting: + TRY(builder.try_append(remaining_code_points.as_string())); + break; + } boundary = *next_boundary; } diff --git a/Userland/Libraries/LibUnicode/UnicodeUtils.h b/Userland/Libraries/LibUnicode/UnicodeUtils.h index a3f3b0cc82..320025112a 100644 --- a/Userland/Libraries/LibUnicode/UnicodeUtils.h +++ b/Userland/Libraries/LibUnicode/UnicodeUtils.h @@ -17,7 +17,7 @@ namespace Unicode::Detail { ErrorOr build_lowercase_string(Utf8View code_points, StringBuilder& builder, Optional const& locale); ErrorOr build_uppercase_string(Utf8View code_points, StringBuilder& builder, Optional const& locale); -ErrorOr build_titlecase_string(Utf8View code_points, StringBuilder& builder, Optional const& locale); +ErrorOr build_titlecase_string(Utf8View code_points, StringBuilder& builder, Optional const& locale, TrailingCodePointTransformation trailing_code_point_transformation); ErrorOr build_casefold_string(Utf8View code_points, StringBuilder& builder); Utf32View casefold_code_point(u32 const& code_point); diff --git a/Userland/Libraries/LibWeb/Layout/TextNode.cpp b/Userland/Libraries/LibWeb/Layout/TextNode.cpp index dee468d5b3..4347d2f651 100644 --- a/Userland/Libraries/LibWeb/Layout/TextNode.cpp +++ b/Userland/Libraries/LibWeb/Layout/TextNode.cpp @@ -287,7 +287,9 @@ static ErrorOr apply_text_transform(DeprecatedString const& st return string; case CSS::TextTransform::MathAuto: return apply_math_auto_text_transform(string); - case CSS::TextTransform::Capitalize: + case CSS::TextTransform::Capitalize: { + return TRY(Unicode::to_unicode_titlecase_full(string, {}, Unicode::TrailingCodePointTransformation::PreserveExisting)).to_deprecated_string(); + } case CSS::TextTransform::FullSizeKana: case CSS::TextTransform::FullWidth: // FIXME: Implement these!