From 347d741afba39947d40f6efb567fa02b95d20e7f Mon Sep 17 00:00:00 2001 From: AnotherTest Date: Mon, 22 Feb 2021 02:37:24 +0330 Subject: [PATCH] AK+Userland: Extend the compiletime format string check to other functions Thanks to @trflynn89 for the neat implicit consteval ctor trick! This allows us to basically slap `CheckedFormatString` on any formatting function, and have its format argument checked at compiletime. Note that there is a validator bug where it doesn't parse inner replaced fields like `{:~>{}}` correctly (what should be 'left align with next argument as size' is parsed as `{:~>{` following a literal closing brace), so the compiletime checks are disabled on these temporarily by forcing them to be StringViews. This commit also removes the now unused `AK::StringLiteral` type (which was introduced for use with NTTP strings). --- AK/CheckedFormatString.h | 242 ++++++++++++++++++++++++++++++++++++ AK/Format.h | 212 ++++--------------------------- AK/StdLibExtras.h | 35 +----- AK/TestSuite.h | 3 +- Userland/Shell/Shell.cpp | 8 +- Userland/Utilities/expr.cpp | 6 +- 6 files changed, 277 insertions(+), 229 deletions(-) create mode 100644 AK/CheckedFormatString.h diff --git a/AK/CheckedFormatString.h b/AK/CheckedFormatString.h new file mode 100644 index 0000000000..891716728a --- /dev/null +++ b/AK/CheckedFormatString.h @@ -0,0 +1,242 @@ +/* + * Copyright (c) 2021, the SerenityOS developers. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include +#include +#include +#include + +#ifndef DBGLN_NO_COMPILETIME_FORMAT_CHECK +// FIXME: Seems like clang doesn't like calling 'consteval' functions inside 'consteval' functions quite the same way as GCC does, +// it seems to entirely forget that it accepted that parameters to a 'consteval' function to begin with. +# ifdef __clang__ +# define DBGLN_NO_COMPILETIME_FORMAT_CHECK +# endif +#endif + +#ifndef DBGLN_NO_COMPILETIME_FORMAT_CHECK +namespace AK::Format::Detail { + +// We have to define a local "purely constexpr" Array that doesn't lead back to us (via e.g. ASSERT) +template +struct Array { + constexpr static size_t size() { return Size; } + constexpr const T& operator[](size_t index) const { return __data[index]; } + constexpr T& operator[](size_t index) { return __data[index]; } + using ConstIterator = SimpleIterator; + using Iterator = SimpleIterator; + + constexpr ConstIterator begin() const { return ConstIterator::begin(*this); } + constexpr Iterator begin() { return Iterator::begin(*this); } + + constexpr ConstIterator end() const { return ConstIterator::end(*this); } + constexpr Iterator end() { return Iterator::end(*this); } + + T __data[Size]; +}; + +template +void compiletime_fail(Args...); + +template +consteval auto extract_used_argument_index(const char (&fmt)[N], size_t specifier_start_index, size_t specifier_end_index, size_t& next_implicit_argument_index) +{ + struct { + size_t index_value { 0 }; + bool saw_explicit_index { false }; + } state; + for (size_t i = specifier_start_index; i < specifier_end_index; ++i) { + auto c = fmt[i]; + if (c > '9' || c < '0') + break; + + state.index_value *= 10; + state.index_value += c - '0'; + state.saw_explicit_index = true; + } + + if (!state.saw_explicit_index) + return next_implicit_argument_index++; + + return state.index_value; +} + +// FIXME: We should rather parse these format strings at compile-time if possible. +template +consteval auto count_fmt_params(const char (&fmt)[N]) +{ + struct { + // FIXME: Switch to variable-sized storage whenever we can come up with one :) + Array used_arguments { 0 }; + size_t total_used_argument_count { 0 }; + size_t next_implicit_argument_index { 0 }; + bool has_explicit_argument_references { false }; + + size_t unclosed_braces { 0 }; + size_t extra_closed_braces { 0 }; + + Array last_format_specifier_start { 0 }; + size_t total_used_last_format_specifier_start_count { 0 }; + } result; + + for (size_t i = 0; i < N; ++i) { + auto ch = fmt[i]; + switch (ch) { + case '{': + if (i + 1 < N && fmt[i + 1] == '{') { + ++i; + continue; + } + + // Note: There's no compile-time throw, so we have to abuse a compile-time string to store errors. + if (result.total_used_last_format_specifier_start_count >= result.last_format_specifier_start.size() - 1) + compiletime_fail("Format-String Checker internal error: Format specifier nested too deep"); + + result.last_format_specifier_start[result.total_used_last_format_specifier_start_count++] = i + 1; + + ++result.unclosed_braces; + break; + case '}': + if (i + 1 < N && fmt[i + 1] == '}') { + ++i; + continue; + } + if (result.unclosed_braces) { + --result.unclosed_braces; + + if (result.total_used_last_format_specifier_start_count == 0) + compiletime_fail("Format-String Checker internal error: Expected location information"); + + const auto specifier_start_index = result.last_format_specifier_start[--result.total_used_last_format_specifier_start_count]; + + if (result.total_used_argument_count >= result.used_arguments.size()) + compiletime_fail("Format-String Checker internal error: Too many format arguments in format string"); + + auto used_argument_index = extract_used_argument_index(fmt, specifier_start_index, i, result.next_implicit_argument_index); + if (used_argument_index + 1 != result.next_implicit_argument_index) + result.has_explicit_argument_references = true; + result.used_arguments[result.total_used_argument_count++] = used_argument_index; + + } else { + ++result.extra_closed_braces; + } + break; + default: + continue; + } + } + return result; +} +} + +#endif + +namespace AK::Format::Detail { +template +struct CheckedFormatString { + template + consteval CheckedFormatString(const char (&fmt)[N]) + : m_string { fmt } + { +#ifndef DBGLN_NO_COMPILETIME_FORMAT_CHECK + check_format_parameter_consistency(fmt); +#endif + } + + template + CheckedFormatString(const T& unchecked_fmt) requires(requires(T t) { StringView { t }; }) + : m_string(unchecked_fmt) + { + } + + auto view() const { return m_string; } + +private: +#ifndef DBGLN_NO_COMPILETIME_FORMAT_CHECK + template + consteval static bool check_format_parameter_consistency(const char (&fmt)[N]) + { + auto check = count_fmt_params(fmt); + if (check.unclosed_braces != 0) + compiletime_fail("Extra unclosed braces in format string"); + if (check.extra_closed_braces != 0) + compiletime_fail("Extra closing braces in format string"); + + { + auto begin = check.used_arguments.begin(); + auto end = check.used_arguments.begin() + check.total_used_argument_count; + auto has_all_referenced_arguments = !AK::any_of(begin, end, [](auto& entry) { return entry >= param_count; }); + if (!has_all_referenced_arguments) + compiletime_fail("Format string references nonexistent parameter"); + } + + if (!check.has_explicit_argument_references && check.total_used_argument_count != param_count) + compiletime_fail("Format string does not reference all passed parameters"); + + // Ensure that no passed parameter is ignored or otherwise not referenced in the format + // As this check is generally pretty expensive, try to avoid it where it cannot fail. + // We will only do this check if the format string has explicit argument refs + // otherwise, the check above covers this check too, as implicit refs + // monotonically increase, and cannot have 'gaps'. + if (check.has_explicit_argument_references) { + auto all_parameters = iota_array(0); + constexpr auto contains = [](auto begin, auto end, auto entry) { + for (; begin != end; begin++) { + if (*begin == entry) + return true; + } + + return false; + }; + auto references_all_arguments = AK::all_of( + all_parameters.begin(), + all_parameters.end(), + [&](auto& entry) { + return contains( + check.used_arguments.begin(), + check.used_arguments.begin() + check.total_used_argument_count, + entry); + }); + if (!references_all_arguments) + compiletime_fail("Format string does not reference all passed parameters"); + } + + return true; + } +#endif + + StringView m_string; +}; +} + +namespace AK { + +template +using CheckedFormatString = Format::Detail::CheckedFormatString::Type...>; + +} diff --git a/AK/Format.h b/AK/Format.h index ffa692cbf2..64ad1cf97e 100644 --- a/AK/Format.h +++ b/AK/Format.h @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -37,161 +39,6 @@ # include #endif -#ifndef DBGLN_NO_COMPILETIME_FORMAT_CHECK -// Note: Clang 12 adds support for CTAD, but still fails to build the dbgln() checks, so they're disabled altogether for now. -// See https://oss-fuzz-build-logs.storage.googleapis.com/log-79750138-f41e-4f39-8812-7c536f1d2e35.txt, for example. -# if defined(__clang__) -# define DBGLN_NO_COMPILETIME_FORMAT_CHECK -# endif -#endif - -#ifndef DBGLN_NO_COMPILETIME_FORMAT_CHECK -namespace { - -template -consteval auto extract_used_argument_index(const char (&fmt)[N], size_t specifier_start_index, size_t specifier_end_index, size_t& next_implicit_argument_index) -{ - struct { - size_t index_value { 0 }; - bool saw_explicit_index { false }; - } state; - for (size_t i = specifier_start_index; i < specifier_end_index; ++i) { - auto c = fmt[i]; - if (c > '9' || c < '0') - break; - - state.index_value *= 10; - state.index_value += c - '0'; - state.saw_explicit_index = true; - } - - if (!state.saw_explicit_index) - return next_implicit_argument_index++; - - return state.index_value; -} - -// FIXME: We should rather parse these format strings at compile-time if possible. -template -consteval auto count_fmt_params(const char (&fmt)[N]) -{ - struct { - // FIXME: Switch to variable-sized storage whenever we can come up with one :) - Array used_arguments { 0 }; - size_t total_used_argument_count { 0 }; - size_t next_implicit_argument_index { 0 }; - bool has_explicit_argument_references { false }; - - size_t unclosed_braces { 0 }; - size_t extra_closed_braces { 0 }; - - Array last_format_specifier_start { 0 }; - size_t total_used_last_format_specifier_start_count { 0 }; - - StringLiteral<128> internal_error { { 0 } }; - } result; - - for (size_t i = 0; i < N; ++i) { - auto ch = fmt[i]; - switch (ch) { - case '{': - if (i + 1 < N && fmt[i + 1] == '{') { - ++i; - continue; - } - - // Note: There's no compile-time throw, so we have to abuse a compile-time string to store errors. - if (result.total_used_last_format_specifier_start_count >= result.last_format_specifier_start.size() - 1) - result.internal_error = "Format-String Checker internal error: Format specifier nested too deep"; - - result.last_format_specifier_start[result.total_used_last_format_specifier_start_count++] = i + 1; - - ++result.unclosed_braces; - break; - case '}': - if (i + 1 < N && fmt[i + 1] == '}') { - ++i; - continue; - } - if (result.unclosed_braces) { - --result.unclosed_braces; - - if (result.total_used_last_format_specifier_start_count == 0) - result.internal_error = "Format-String Checker internal error: Expected location information"; - - const auto specifier_start_index = result.last_format_specifier_start[--result.total_used_last_format_specifier_start_count]; - - if (result.total_used_argument_count >= result.used_arguments.size()) - result.internal_error = "Format-String Checker internal error: Too many format arguments in format string"; - - auto used_argument_index = extract_used_argument_index(fmt, specifier_start_index, i, result.next_implicit_argument_index); - if (used_argument_index + 1 != result.next_implicit_argument_index) - result.has_explicit_argument_references = true; - result.used_arguments[result.total_used_argument_count++] = used_argument_index; - - } else { - ++result.extra_closed_braces; - } - break; - default: - continue; - } - } - return result; -} -} - -template fmt, size_t param_count, auto check = count_fmt_params(fmt.data)> -constexpr bool check_format_parameter_consistency() -{ - static_assert(check.internal_error.data[0] == 0, "Some internal error occured, try looking at the check function type for the error"); - static_assert(check.unclosed_braces == 0, "Extra unclosed braces in format string"); - static_assert(check.extra_closed_braces == 0, "Extra closing braces in format string"); - - { - constexpr auto begin = check.used_arguments.begin(); - constexpr auto end = check.used_arguments.begin() + check.total_used_argument_count; - constexpr auto has_all_referenced_arguments = !AK::any_of(begin, end, [](auto& entry) { return entry >= param_count; }); - static_assert(has_all_referenced_arguments, "Format string references nonexistent parameter"); - } - - if constexpr (!check.has_explicit_argument_references) - static_assert(check.total_used_argument_count == param_count, "Format string does not reference all passed parameters"); - - // Ensure that no passed parameter is ignored or otherwise not referenced in the format - // As this check is generally pretty expensive, try to avoid it where it cannot fail. - // We will only do this check if the format string has explicit argument refs - // otherwise, the check above covers this check too, as implicit refs - // monotonically increase, and cannot have 'gaps'. - if constexpr (check.has_explicit_argument_references) { - constexpr auto all_parameters = iota_array(0); - auto contains = [](auto begin, auto end, auto entry) { - for (; begin != end; begin++) { - if (*begin == entry) - return true; - } - - return false; - }; - constexpr auto references_all_arguments = AK::all_of( - all_parameters.begin(), - all_parameters.end(), - [&](auto& entry) { - return contains( - check.used_arguments.begin(), - check.used_arguments.begin() + check.total_used_argument_count, - entry); - }); - static_assert(references_all_arguments, "Format string does not reference all passed parameters"); - } - - return true; -} - -template -concept ConsistentFormatParameters = check_format_parameter_consistency(); -#endif - namespace AK { class TypeErasedFormatParams; @@ -523,48 +370,38 @@ void vformat(const LogStream& stream, StringView fmtstr, TypeErasedFormatParams) void vout(FILE*, StringView fmtstr, TypeErasedFormatParams, bool newline = false); template -void out(FILE* file, StringView fmtstr, const Parameters&... parameters) { vout(file, fmtstr, VariadicFormatParams { parameters... }); } +void out(FILE* file, CheckedFormatString&& fmtstr, const Parameters&... parameters) { vout(file, fmtstr.view(), VariadicFormatParams { parameters... }); } + template -void outln(FILE* file, StringView fmtstr, const Parameters&... parameters) { vout(file, fmtstr, VariadicFormatParams { parameters... }, true); } -template -void outln(FILE* file, const char* fmtstr, const Parameters&... parameters) { vout(file, fmtstr, VariadicFormatParams { parameters... }, true); } +void outln(FILE* file, CheckedFormatString&& fmtstr, const Parameters&... parameters) { vout(file, fmtstr.view(), VariadicFormatParams { parameters... }, true); } + inline void outln(FILE* file) { fputc('\n', file); } template -void out(StringView fmtstr, const Parameters&... parameters) { out(stdout, fmtstr, parameters...); } +void out(CheckedFormatString&& fmtstr, const Parameters&... parameters) { out(stdout, move(fmtstr), parameters...); } + template -void outln(StringView fmtstr, const Parameters&... parameters) { outln(stdout, fmtstr, parameters...); } -template -void outln(const char* fmtstr, const Parameters&... parameters) { outln(stdout, fmtstr, parameters...); } +void outln(CheckedFormatString&& fmtstr, const Parameters&... parameters) { outln(stdout, move(fmtstr), parameters...); } + inline void outln() { outln(stdout); } template -void warn(StringView fmtstr, const Parameters&... parameters) { out(stderr, fmtstr, parameters...); } +void warn(CheckedFormatString&& fmtstr, const Parameters&... parameters) { out(stderr, move(fmtstr), parameters...); } + template -void warnln(StringView fmtstr, const Parameters&... parameters) { outln(stderr, fmtstr, parameters...); } -template -void warnln(const char* fmtstr, const Parameters&... parameters) { outln(stderr, fmtstr, parameters...); } +void warnln(CheckedFormatString&& fmtstr, const Parameters&... parameters) { outln(stderr, move(fmtstr), parameters...); } + inline void warnln() { outln(stderr); } #endif void vdbgln(StringView fmtstr, TypeErasedFormatParams); -#ifndef DBGLN_NO_COMPILETIME_FORMAT_CHECK -template -void dbgln(const Parameters&... parameters) requires ConsistentFormatParameters -{ - dbgln(StringView { fmt.data }, parameters...); -} -#endif - template -void dbgln(StringView fmtstr, const Parameters&... parameters) +void dbgln(CheckedFormatString&& fmtstr, const Parameters&... parameters) { if constexpr (enabled) - vdbgln(fmtstr, VariadicFormatParams { parameters... }); + vdbgln(fmtstr.view(), VariadicFormatParams { parameters... }); } -template -void dbgln(const char* fmtstr, const Parameters&... parameters) { dbgln(StringView { fmtstr }, parameters...); } template void dbgln() { dbgln(""); } @@ -575,10 +412,10 @@ void set_debug_enabled(bool); void vdmesgln(StringView fmtstr, TypeErasedFormatParams); template -void dmesgln(StringView fmtstr, const Parameters&... parameters) { vdmesgln(fmtstr, VariadicFormatParams { parameters... }); } -template -void dmesgln(const char* fmtstr, const Parameters&... parameters) { vdmesgln(fmtstr, VariadicFormatParams { parameters... }); } -inline void dmesgln() { dmesgln(""); } +void dmesgln(CheckedFormatString&& fmt, const Parameters&... parameters) +{ + vdmesgln(fmt.view(), VariadicFormatParams { parameters... }); +} #endif template @@ -647,13 +484,8 @@ using AK::warnln; using AK::dbgln; +using AK::CheckedFormatString; using AK::FormatIfSupported; using AK::FormatString; -#ifdef DBGLN_NO_COMPILETIME_FORMAT_CHECK -# define dbgln(fmt, ...) dbgln(fmt, ##__VA_ARGS__) -# define dbgln_if(flag, fmt, ...) dbgln(fmt, ##__VA_ARGS__) -#else -# define dbgln(fmt, ...) dbgln(__VA_ARGS__) -# define dbgln_if(flag, fmt, ...) dbgln(__VA_ARGS__) -#endif +#define dbgln_if(flag, fmt, ...) dbgln(fmt, ##__VA_ARGS__) diff --git a/AK/StdLibExtras.h b/AK/StdLibExtras.h index 20c9b664da..b266700672 100644 --- a/AK/StdLibExtras.h +++ b/AK/StdLibExtras.h @@ -565,36 +565,9 @@ using MakeIntegerSequence = decltype(make_integer_sequence_impl()); template using MakeIndexSequence = MakeIntegerSequence; -template -struct StringLiteral { - constexpr StringLiteral(const char (&in)[N]) - : data {} - , size { N } - { - for (unsigned long i = 0; i < N; ++i) - data[i] = in[i]; - } - - template - constexpr StringLiteral& operator=(const StringLiteral& other) - { - static_assert(Nx <= N, "Storing a string literal in a smaller one"); - for (unsigned long i = 0; i < Nx; ++i) - data[i] = other[i]; - return *this; - } - - template - constexpr StringLiteral& operator=(const char (&other)[Nx]) - { - static_assert(Nx <= N, "Storing a string literal in a smaller one"); - for (unsigned long i = 0; i < Nx; ++i) - data[i] = other[i]; - return *this; - } - - char data[N]; - unsigned long size; +template +struct IdentityType { + using Type = T; }; } @@ -608,6 +581,7 @@ using AK::declval; using AK::DependentFalse; using AK::exchange; using AK::forward; +using AK::IdentityType; using AK::IndexSequence; using AK::IntegerSequence; using AK::is_trivial; @@ -630,6 +604,5 @@ using AK::max; using AK::min; using AK::move; using AK::RemoveConst; -using AK::StringLiteral; using AK::swap; using AK::Void; diff --git a/AK/TestSuite.h b/AK/TestSuite.h index 0eee0be61a..9c5e7ed9e5 100644 --- a/AK/TestSuite.h +++ b/AK/TestSuite.h @@ -27,11 +27,12 @@ #pragma once #include +#include namespace AK { template -void warnln(const char* fmtstr, const Parameters&...); +void warnln(CheckedFormatString&& fmtstr, const Parameters&...); } diff --git a/Userland/Shell/Shell.cpp b/Userland/Shell/Shell.cpp index 5d82bca448..c13d9922e1 100644 --- a/Userland/Shell/Shell.cpp +++ b/Userland/Shell/Shell.cpp @@ -1894,16 +1894,16 @@ void Shell::possibly_print_error() const warn("\x1b[31m"); size_t length_written_so_far = 0; if (line == (i64)source_position.position->start_line.line_number) { - warn("{:~>{}}", "", 5 + source_position.position->start_line.line_column); + warn(StringView { "{:~>{}}" }, "", 5 + source_position.position->start_line.line_column); length_written_so_far += source_position.position->start_line.line_column; } else { - warn("{:~>{}}", "", 5); + warn(StringView { "{:~>{}}" }, "", 5); } if (line == (i64)source_position.position->end_line.line_number) { - warn("{:^>{}}", "", source_position.position->end_line.line_column - length_written_so_far); + warn(StringView { "{:^>{}}" }, "", source_position.position->end_line.line_column - length_written_so_far); length_written_so_far += source_position.position->start_line.line_column; } else { - warn("{:^>{}}", "", current_line.length() - length_written_so_far); + warn(StringView { "{:^>{}}" }, "", current_line.length() - length_written_so_far); } warnln("\x1b[0m"); } diff --git a/Userland/Utilities/expr.cpp b/Userland/Utilities/expr.cpp index 896409a6d9..cfcc8f7ea3 100644 --- a/Userland/Utilities/expr.cpp +++ b/Userland/Utilities/expr.cpp @@ -45,11 +45,11 @@ Print the value of EXPRESSION to standard output.)"); exit(0); } -template -[[noreturn]] void fail(Args&&... args) +template +[[noreturn]] void fail(Fmt&& fmt, Args&&... args) { warn("ERROR: \e[31m"); - warnln(args...); + warnln(StringView { fmt }, args...); warn("\e[0m"); exit(1); }