AK: Handle LEB128 encoded values that are too large for the result type

Previously, we would go crazy and shift things way out of bounds. Add tests to verify that the decoding algorithm is safe around the limits of the result type.
2024-07-21 10:05:32 +00:00 · 2021-05-30 16:12:11 -06:00 · 2021-05-30 16:12:11 -06:00 · 0af192ff8d
parent c59cf0da07
commit 0af192ff8d
2 changed files with 130 additions and 7 deletions
--- a/AK/LEB128.h
+++ b/AK/LEB128.h
@ -6,6 +6,7 @@

 #pragma once

+#include <AK/NumericLimits.h>
 #include <AK/Stream.h>
 #include <AK/Types.h>

@ -29,13 +30,18 @@ struct LEB128 {
                input_stream.set_fatal_error();
                return false;
            }
-
            u8 byte = 0;
            input_stream >> byte;
            if (input_stream.has_any_error())
                return false;

-            result = (result) | (static_cast<ValueType>(byte & ~(1 << 7)) << (num_bytes * 7));
+            ValueType masked_byte = byte & ~(1 << 7);
+            const bool shift_too_large_for_result = (num_bytes * 7 > sizeof(ValueType) * 8) && (masked_byte != 0);
+            const bool shift_too_large_for_byte = ((masked_byte << (num_bytes * 7)) >> (num_bytes * 7)) != masked_byte;
+            if (shift_too_large_for_result || shift_too_large_for_byte)
+                return false;
+
+            result = (result) | (masked_byte << (num_bytes * 7));
            if (!(byte & (1 << 7)))
                break;
            ++num_bytes;
@ -47,15 +53,18 @@ struct LEB128 {
    template<typename StreamT, typename ValueType = ssize_t>
    static bool read_signed(StreamT& stream, ValueType& result)
    {
-        using UValueType = MakeUnsigned<ValueType>;
+        // Note: We read into a u64 to simplify the parsing logic;
+        //    result is range checked into ValueType after parsing.
+        static_assert(sizeof(ValueType) <= sizeof(u64), "Error checking logic assumes 64 bits or less!");
        [[maybe_unused]] size_t backup_offset = 0;
        if constexpr (requires { stream.offset(); })
            backup_offset = stream.offset();
        InputStream& input_stream { stream };

-        result = 0;
+        i64 temp = 0;
        size_t num_bytes = 0;
        u8 byte = 0;
+        result = 0;

        do {
            if (input_stream.unreliable_eof()) {
@ -68,15 +77,32 @@ struct LEB128 {
            input_stream >> byte;
            if (input_stream.has_any_error())
                return false;
-            result = (result) | (static_cast<UValueType>(byte & ~(1 << 7)) << (num_bytes * 7));
+
+            // note: 64 bit assumptions!
+            u64 masked_byte = byte & ~(1 << 7);
+            const bool shift_too_large_for_result = (num_bytes * 7 >= 64) && (masked_byte != ((temp < 0) ? 0x7Fu : 0u));
+            const bool shift_too_large_for_byte = (num_bytes * 7) == 63 && masked_byte != 0x00 && masked_byte != 0x7Fu;
+
+            if (shift_too_large_for_result || shift_too_large_for_byte)
+                return false;
+
+            temp = (temp) | (masked_byte << (num_bytes * 7));
            ++num_bytes;
        } while (byte & (1 << 7));

-        if (num_bytes * 7 < sizeof(UValueType) * 4 && (byte & 0x40)) {
+        if ((num_bytes * 7) < 64 && (byte & 0x40)) {
            // sign extend
-            result |= ((UValueType)(-1) << (num_bytes * 7));
+            temp |= ((u64)(-1) << (num_bytes * 7));
        }

+        // Now that we've accumulated into an i64, make sure it fits into result
+        if constexpr (sizeof(ValueType) < sizeof(u64)) {
+            if (temp > NumericLimits<ValueType>::max() || temp < NumericLimits<ValueType>::min())
+                return false;
+        }
+
+        result = static_cast<ValueType>(temp);
+
        return true;
    }
 };
--- a/Tests/AK/TestLEB128.cpp
+++ b/Tests/AK/TestLEB128.cpp
@ -6,6 +6,7 @@

 #include <AK/LEB128.h>
 #include <AK/MemoryStream.h>
+#include <AK/NumericLimits.h>
 #include <LibTest/TestCase.h>

 TEST_CASE(single_byte)
@ -113,3 +114,99 @@ TEST_CASE(two_bytes)
        }
    }
 }
+
+TEST_CASE(overflow_sizeof_output_unsigned)
+{
+    u8 u32_max_plus_one[] = { 0x80, 0x80, 0x80, 0x80, 0x10 };
+    {
+        u32 out = 0;
+        InputMemoryStream stream({ u32_max_plus_one, sizeof(u32_max_plus_one) });
+        EXPECT(!LEB128::read_unsigned(stream, out));
+        EXPECT_EQ(out, 0u);
+        EXPECT(!stream.handle_any_error());
+
+        u64 out64 = 0;
+        stream.seek(0);
+        EXPECT(LEB128::read_unsigned(stream, out64));
+        EXPECT_EQ(out64, static_cast<u64>(NumericLimits<u32>::max()) + 1);
+        EXPECT(!stream.handle_any_error());
+    }
+
+    u8 u32_max[] = { 0xFF, 0xFF, 0xFF, 0xFF, 0x0F };
+    {
+        u32 out = 0;
+        InputMemoryStream stream({ u32_max, sizeof(u32_max) });
+        EXPECT(LEB128::read_unsigned(stream, out));
+        EXPECT_EQ(out, NumericLimits<u32>::max());
+        EXPECT(!stream.handle_any_error());
+
+        u64 out64 = 0;
+        stream.seek(0);
+        EXPECT(LEB128::read_unsigned(stream, out64));
+        EXPECT_EQ(out64, NumericLimits<u32>::max());
+        EXPECT(!stream.handle_any_error());
+    }
+}
+
+TEST_CASE(overflow_sizeof_output_signed)
+{
+    u8 i32_max_plus_one[] = { 0x80, 0x80, 0x80, 0x80, 0x08 };
+    {
+        i32 out = 0;
+        InputMemoryStream stream({ i32_max_plus_one, sizeof(i32_max_plus_one) });
+        EXPECT(!LEB128::read_signed(stream, out));
+        EXPECT_EQ(out, 0);
+        EXPECT(!stream.handle_any_error());
+
+        i64 out64 = 0;
+        stream.seek(0);
+        EXPECT(LEB128::read_signed(stream, out64));
+        EXPECT_EQ(out64, static_cast<i64>(NumericLimits<i32>::max()) + 1);
+        EXPECT(!stream.handle_any_error());
+    }
+
+    u8 i32_max[] = { 0xFF, 0xFF, 0xFF, 0xFF, 0x07 };
+    {
+        i32 out = 0;
+        InputMemoryStream stream({ i32_max, sizeof(i32_max) });
+        EXPECT(LEB128::read_signed(stream, out));
+        EXPECT_EQ(out, NumericLimits<i32>::max());
+        EXPECT(!stream.handle_any_error());
+
+        i64 out64 = 0;
+        stream.seek(0);
+        EXPECT(LEB128::read_signed(stream, out64));
+        EXPECT_EQ(out64, NumericLimits<i32>::max());
+        EXPECT(!stream.handle_any_error());
+    }
+
+    u8 i32_min_minus_one[] = { 0xFF, 0xFF, 0xFF, 0xFF, 0x77 };
+    {
+        i32 out = 0;
+        InputMemoryStream stream({ i32_min_minus_one, sizeof(i32_min_minus_one) });
+        EXPECT(!LEB128::read_signed(stream, out));
+        EXPECT_EQ(out, 0);
+        EXPECT(!stream.handle_any_error());
+
+        i64 out64 = 0;
+        stream.seek(0);
+        EXPECT(LEB128::read_signed(stream, out64));
+        EXPECT_EQ(out64, static_cast<i64>(NumericLimits<i32>::min()) - 1);
+        EXPECT(!stream.handle_any_error());
+    }
+
+    u8 i32_min[] = { 0x80, 0x80, 0x80, 0x80, 0x78 };
+    {
+        i32 out = 0;
+        InputMemoryStream stream({ i32_min, sizeof(i32_min) });
+        EXPECT(LEB128::read_signed(stream, out));
+        EXPECT_EQ(out, NumericLimits<i32>::min());
+        EXPECT(!stream.handle_any_error());
+
+        i64 out64 = 0;
+        stream.seek(0);
+        EXPECT(LEB128::read_signed(stream, out64));
+        EXPECT_EQ(out64, NumericLimits<i32>::min());
+        EXPECT(!stream.handle_any_error());
+    }
+}