LibWeb: Implement text-transform: capitalize

This commit is contained in:
Cr4xy 2023-09-29 12:32:15 +02:00 committed by Tim Flynn
parent 9f7cfb1394
commit bbfe0d3a82
7 changed files with 27 additions and 9 deletions

View file

@ -53,10 +53,10 @@ ErrorOr<DeprecatedString> to_unicode_uppercase_full(StringView string, Optional<
return builder.to_deprecated_string();
}
ErrorOr<String> to_unicode_titlecase_full(StringView string, Optional<StringView> const& locale)
ErrorOr<String> to_unicode_titlecase_full(StringView string, Optional<StringView> const& locale, TrailingCodePointTransformation trailing_code_point_transformation)
{
StringBuilder builder;
TRY(Detail::build_titlecase_string(Utf8View { string }, builder, locale));
TRY(Detail::build_titlecase_string(Utf8View { string }, builder, locale, trailing_code_point_transformation));
return builder.to_string();
}

View file

@ -34,6 +34,13 @@ struct BlockName {
StringView display_name;
};
enum class TrailingCodePointTransformation : u8 {
// Default behaviour; Puts the first typographic letter unit of each word, if lowercase, in titlecase; the other characters in lowercase.
Lowercase,
// Puts the first typographic letter unit of each word, if lowercase, in titlecase; other characters are unaffected. (https://drafts.csswg.org/css-text/#valdef-text-transform-capitalize)
PreserveExisting,
};
Optional<DeprecatedString> code_point_display_name(u32 code_point);
Optional<StringView> code_point_block_display_name(u32 code_point);
Optional<StringView> code_point_abbreviation(u32 code_point);
@ -50,7 +57,7 @@ u32 to_unicode_titlecase(u32 code_point);
ErrorOr<DeprecatedString> to_unicode_lowercase_full(StringView, Optional<StringView> const& locale = {});
ErrorOr<DeprecatedString> to_unicode_uppercase_full(StringView, Optional<StringView> const& locale = {});
ErrorOr<String> to_unicode_titlecase_full(StringView, Optional<StringView> const& locale = {});
ErrorOr<String> to_unicode_titlecase_full(StringView, Optional<StringView> const& locale = {}, TrailingCodePointTransformation trailing_code_point_transformation = TrailingCodePointTransformation::Lowercase);
ErrorOr<String> to_unicode_casefold_full(StringView);
Optional<GeneralCategory> general_category_from_string(StringView);

View file

@ -19,6 +19,7 @@ enum class Property : u8;
enum class Script : u8;
enum class SentenceBreakProperty : u8;
enum class WordBreakProperty : u8;
enum class TrailingCodePointTransformation : u8;
struct CodePointDecomposition;
struct CurrencyCode;

View file

@ -8,6 +8,7 @@
#include <AK/StringBuilder.h>
#include <AK/Utf32View.h>
#include <AK/Utf8View.h>
#include <LibUnicode/CharacterTypes.h>
#include <LibUnicode/UnicodeUtils.h>
// This file contains definitions of AK::String methods which require UCD data.
@ -31,7 +32,7 @@ ErrorOr<String> String::to_uppercase(Optional<StringView> const& locale) const
ErrorOr<String> String::to_titlecase(Optional<StringView> const& locale) const
{
StringBuilder builder;
TRY(Unicode::Detail::build_titlecase_string(code_points(), builder, locale));
TRY(Unicode::Detail::build_titlecase_string(code_points(), builder, locale, Unicode::TrailingCodePointTransformation::Lowercase));
return builder.to_string();
}

View file

@ -266,7 +266,7 @@ ErrorOr<void> build_uppercase_string([[maybe_unused]] Utf8View code_points, [[ma
}
// https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G34078
ErrorOr<void> build_titlecase_string([[maybe_unused]] Utf8View code_points, [[maybe_unused]] StringBuilder& builder, [[maybe_unused]] Optional<StringView> const& locale)
ErrorOr<void> build_titlecase_string([[maybe_unused]] Utf8View code_points, [[maybe_unused]] StringBuilder& builder, [[maybe_unused]] Optional<StringView> const& locale, TrailingCodePointTransformation trailing_code_point_transformation)
{
#if ENABLE_UNICODE_DATA
// toTitlecase(X): Find the word boundaries in X according to Unicode Standard Annex #29,
@ -317,8 +317,15 @@ ErrorOr<void> build_titlecase_string([[maybe_unused]] Utf8View code_points, [[ma
boundary = code_point_offset + code_point_length;
}
auto substring_to_lowercase = code_points.substring_view(boundary, *next_boundary - boundary);
TRY(build_lowercase_string(substring_to_lowercase, builder, locale));
auto remaining_code_points = code_points.substring_view(boundary, *next_boundary - boundary);
switch (trailing_code_point_transformation) {
case TrailingCodePointTransformation::Lowercase:
TRY(build_lowercase_string(remaining_code_points, builder, locale));
break;
case TrailingCodePointTransformation::PreserveExisting:
TRY(builder.try_append(remaining_code_points.as_string()));
break;
}
boundary = *next_boundary;
}

View file

@ -17,7 +17,7 @@ namespace Unicode::Detail {
ErrorOr<void> build_lowercase_string(Utf8View code_points, StringBuilder& builder, Optional<StringView> const& locale);
ErrorOr<void> build_uppercase_string(Utf8View code_points, StringBuilder& builder, Optional<StringView> const& locale);
ErrorOr<void> build_titlecase_string(Utf8View code_points, StringBuilder& builder, Optional<StringView> const& locale);
ErrorOr<void> build_titlecase_string(Utf8View code_points, StringBuilder& builder, Optional<StringView> const& locale, TrailingCodePointTransformation trailing_code_point_transformation);
ErrorOr<void> build_casefold_string(Utf8View code_points, StringBuilder& builder);
Utf32View casefold_code_point(u32 const& code_point);

View file

@ -287,7 +287,9 @@ static ErrorOr<DeprecatedString> apply_text_transform(DeprecatedString const& st
return string;
case CSS::TextTransform::MathAuto:
return apply_math_auto_text_transform(string);
case CSS::TextTransform::Capitalize:
case CSS::TextTransform::Capitalize: {
return TRY(Unicode::to_unicode_titlecase_full(string, {}, Unicode::TrailingCodePointTransformation::PreserveExisting)).to_deprecated_string();
}
case CSS::TextTransform::FullSizeKana:
case CSS::TextTransform::FullWidth:
// FIXME: Implement these!