LibWasm: Implement most of iNxM SIMD operations

With this we pass an additional ~2100 tests. We are left with 7106 WASM fails :). There's still some test cases in the iNxM tests that fail with this PR, but they are somewhat weird. (cherry picked from commit b4acd4fb0b7f4105c7ef673ccc00904114c3c468) Co-authored-by: Diego Frias <styx5242@gmail.com>
2024-09-30 21:24:44 +00:00 · 2024-06-13 22:59:55 +02:00 · 2024-06-13 22:59:55 +02:00 · 34b3015c16
parent 1b93c75c86
commit 34b3015c16
2 changed files with 412 additions and 82 deletions
--- a/Userland/Libraries/LibWasm/AbstractMachine/BytecodeInterpreter.cpp
+++ b/Userland/Libraries/LibWasm/AbstractMachine/BytecodeInterpreter.cpp
@ -1414,6 +1414,36 @@ void BytecodeInterpreter::interpret(Configuration& configuration, InstructionPoi
        return binary_numeric_operation<u128, u128, Operators::VectorCmpOp<16, Operators::GreaterThanOrEquals, MakeSigned>>(configuration);
    case Instructions::i8x16_ge_u.value():
        return binary_numeric_operation<u128, u128, Operators::VectorCmpOp<16, Operators::GreaterThanOrEquals, MakeUnsigned>>(configuration);
    case Instructions::i8x16_abs.value():
        return unary_operation<u128, u128, Operators::VectorIntegerUnaryOp<16, Operators::Absolute>>(configuration);
    case Instructions::i8x16_neg.value():
        return unary_operation<u128, u128, Operators::VectorIntegerUnaryOp<16, Operators::Negate>>(configuration);
    case Instructions::i8x16_all_true.value():
        return unary_operation<u128, i32, Operators::VectorAllTrue<16>>(configuration);
    case Instructions::i8x16_popcnt.value():
        return unary_operation<u128, u128, Operators::VectorIntegerUnaryOp<16, Operators::PopCount>>(configuration);
    case Instructions::i8x16_add.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerBinaryOp<16, Operators::Add>>(configuration);
    case Instructions::i8x16_sub.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerBinaryOp<16, Operators::Subtract>>(configuration);
    case Instructions::i8x16_avgr_u.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerBinaryOp<16, Operators::Average, MakeUnsigned>>(configuration);
    case Instructions::i8x16_add_sat_s.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerBinaryOp<16, Operators::SaturatingOp<i8, Operators::Add>, MakeSigned>>(configuration);
    case Instructions::i8x16_add_sat_u.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerBinaryOp<16, Operators::SaturatingOp<u8, Operators::Add>, MakeUnsigned>>(configuration);
    case Instructions::i8x16_sub_sat_s.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerBinaryOp<16, Operators::SaturatingOp<i8, Operators::Subtract>, MakeSigned>>(configuration);
    case Instructions::i8x16_sub_sat_u.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerBinaryOp<16, Operators::SaturatingOp<u8, Operators::Subtract>, MakeUnsigned>>(configuration);
    case Instructions::i8x16_min_s.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerBinaryOp<16, Operators::Minimum, MakeSigned>>(configuration);
    case Instructions::i8x16_min_u.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerBinaryOp<16, Operators::Minimum, MakeUnsigned>>(configuration);
    case Instructions::i8x16_max_s.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerBinaryOp<16, Operators::Maximum, MakeSigned>>(configuration);
    case Instructions::i8x16_max_u.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerBinaryOp<16, Operators::Maximum, MakeUnsigned>>(configuration);
    case Instructions::i16x8_eq.value():
        return binary_numeric_operation<u128, u128, Operators::VectorCmpOp<8, Operators::Equals>>(configuration);
    case Instructions::i16x8_ne.value():
@ -1434,6 +1464,56 @@ void BytecodeInterpreter::interpret(Configuration& configuration, InstructionPoi
        return binary_numeric_operation<u128, u128, Operators::VectorCmpOp<8, Operators::GreaterThanOrEquals, MakeSigned>>(configuration);
    case Instructions::i16x8_ge_u.value():
        return binary_numeric_operation<u128, u128, Operators::VectorCmpOp<8, Operators::GreaterThanOrEquals, MakeUnsigned>>(configuration);
    case Instructions::i16x8_abs.value():
        return unary_operation<u128, u128, Operators::VectorIntegerUnaryOp<8, Operators::Absolute>>(configuration);
    case Instructions::i16x8_neg.value():
        return unary_operation<u128, u128, Operators::VectorIntegerUnaryOp<8, Operators::Negate>>(configuration);
    case Instructions::i16x8_all_true.value():
        return unary_operation<u128, i32, Operators::VectorAllTrue<8>>(configuration);
    case Instructions::i16x8_add.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerBinaryOp<8, Operators::Add>>(configuration);
    case Instructions::i16x8_sub.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerBinaryOp<8, Operators::Subtract>>(configuration);
    case Instructions::i16x8_mul.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerBinaryOp<8, Operators::Multiply>>(configuration);
    case Instructions::i16x8_avgr_u.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerBinaryOp<8, Operators::Average, MakeUnsigned>>(configuration);
    case Instructions::i16x8_add_sat_s.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerBinaryOp<8, Operators::SaturatingOp<i16, Operators::Add>, MakeSigned>>(configuration);
    case Instructions::i16x8_add_sat_u.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerBinaryOp<8, Operators::SaturatingOp<u16, Operators::Add>, MakeUnsigned>>(configuration);
    case Instructions::i16x8_sub_sat_s.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerBinaryOp<8, Operators::SaturatingOp<i16, Operators::Subtract>, MakeSigned>>(configuration);
    case Instructions::i16x8_sub_sat_u.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerBinaryOp<8, Operators::SaturatingOp<u16, Operators::Subtract>, MakeUnsigned>>(configuration);
    case Instructions::i16x8_min_s.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerBinaryOp<8, Operators::Minimum, MakeSigned>>(configuration);
    case Instructions::i16x8_min_u.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerBinaryOp<8, Operators::Minimum, MakeUnsigned>>(configuration);
    case Instructions::i16x8_max_s.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerBinaryOp<8, Operators::Maximum, MakeSigned>>(configuration);
    case Instructions::i16x8_max_u.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerBinaryOp<8, Operators::Maximum, MakeUnsigned>>(configuration);
    case Instructions::i16x8_extend_low_i8x16_s.value():
        return unary_operation<u128, u128, Operators::VectorIntegerExt<8, Operators::VectorExt::Low, MakeSigned>>(configuration);
    case Instructions::i16x8_extend_high_i8x16_s.value():
        return unary_operation<u128, u128, Operators::VectorIntegerExt<8, Operators::VectorExt::High, MakeSigned>>(configuration);
    case Instructions::i16x8_extend_low_i8x16_u.value():
        return unary_operation<u128, u128, Operators::VectorIntegerExt<8, Operators::VectorExt::Low, MakeUnsigned>>(configuration);
    case Instructions::i16x8_extend_high_i8x16_u.value():
        return unary_operation<u128, u128, Operators::VectorIntegerExt<8, Operators::VectorExt::High, MakeUnsigned>>(configuration);
    case Instructions::i16x8_extadd_pairwise_i8x16_s.value():
        return unary_operation<u128, u128, Operators::VectorIntegerExtOpPairwise<8, Operators::Add, MakeSigned>>(configuration);
    case Instructions::i16x8_extadd_pairwise_i8x16_u.value():
        return unary_operation<u128, u128, Operators::VectorIntegerExtOpPairwise<8, Operators::Add, MakeUnsigned>>(configuration);
    case Instructions::i16x8_extmul_low_i8x16_s.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerExtOp<8, Operators::Multiply, Operators::VectorExt::Low, MakeSigned>>(configuration);
    case Instructions::i16x8_extmul_high_i8x16_s.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerExtOp<8, Operators::Multiply, Operators::VectorExt::High, MakeSigned>>(configuration);
    case Instructions::i16x8_extmul_low_i8x16_u.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerExtOp<8, Operators::Multiply, Operators::VectorExt::Low, MakeUnsigned>>(configuration);
    case Instructions::i16x8_extmul_high_i8x16_u.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerExtOp<8, Operators::Multiply, Operators::VectorExt::High, MakeUnsigned>>(configuration);
    case Instructions::i32x4_eq.value():
        return binary_numeric_operation<u128, u128, Operators::VectorCmpOp<4, Operators::Equals>>(configuration);
    case Instructions::i32x4_ne.value():
@ -1454,6 +1534,86 @@ void BytecodeInterpreter::interpret(Configuration& configuration, InstructionPoi
        return binary_numeric_operation<u128, u128, Operators::VectorCmpOp<4, Operators::GreaterThanOrEquals, MakeSigned>>(configuration);
    case Instructions::i32x4_ge_u.value():
        return binary_numeric_operation<u128, u128, Operators::VectorCmpOp<4, Operators::GreaterThanOrEquals, MakeUnsigned>>(configuration);
    case Instructions::i32x4_abs.value():
        return unary_operation<u128, u128, Operators::VectorIntegerUnaryOp<4, Operators::Absolute>>(configuration);
    case Instructions::i32x4_neg.value():
        return unary_operation<u128, u128, Operators::VectorIntegerUnaryOp<4, Operators::Negate, MakeUnsigned>>(configuration);
    case Instructions::i32x4_all_true.value():
        return unary_operation<u128, i32, Operators::VectorAllTrue<4>>(configuration);
    case Instructions::i32x4_add.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerBinaryOp<4, Operators::Add, MakeUnsigned>>(configuration);
    case Instructions::i32x4_sub.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerBinaryOp<4, Operators::Subtract, MakeUnsigned>>(configuration);
    case Instructions::i32x4_mul.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerBinaryOp<4, Operators::Multiply, MakeUnsigned>>(configuration);
    case Instructions::i32x4_min_s.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerBinaryOp<4, Operators::Minimum, MakeSigned>>(configuration);
    case Instructions::i32x4_min_u.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerBinaryOp<4, Operators::Minimum, MakeUnsigned>>(configuration);
    case Instructions::i32x4_max_s.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerBinaryOp<4, Operators::Maximum, MakeSigned>>(configuration);
    case Instructions::i32x4_max_u.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerBinaryOp<4, Operators::Maximum, MakeUnsigned>>(configuration);
    case Instructions::i32x4_extend_low_i16x8_s.value():
        return unary_operation<u128, u128, Operators::VectorIntegerExt<4, Operators::VectorExt::Low, MakeSigned>>(configuration);
    case Instructions::i32x4_extend_high_i16x8_s.value():
        return unary_operation<u128, u128, Operators::VectorIntegerExt<4, Operators::VectorExt::High, MakeSigned>>(configuration);
    case Instructions::i32x4_extend_low_i16x8_u.value():
        return unary_operation<u128, u128, Operators::VectorIntegerExt<4, Operators::VectorExt::Low, MakeUnsigned>>(configuration);
    case Instructions::i32x4_extend_high_i16x8_u.value():
        return unary_operation<u128, u128, Operators::VectorIntegerExt<4, Operators::VectorExt::High, MakeUnsigned>>(configuration);
    case Instructions::i32x4_extadd_pairwise_i16x8_s.value():
        return unary_operation<u128, u128, Operators::VectorIntegerExtOpPairwise<4, Operators::Add, MakeSigned>>(configuration);
    case Instructions::i32x4_extadd_pairwise_i16x8_u.value():
        return unary_operation<u128, u128, Operators::VectorIntegerExtOpPairwise<4, Operators::Add, MakeUnsigned>>(configuration);
    case Instructions::i32x4_extmul_low_i16x8_s.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerExtOp<4, Operators::Multiply, Operators::VectorExt::Low, MakeSigned>>(configuration);
    case Instructions::i32x4_extmul_high_i16x8_s.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerExtOp<4, Operators::Multiply, Operators::VectorExt::High, MakeSigned>>(configuration);
    case Instructions::i32x4_extmul_low_i16x8_u.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerExtOp<4, Operators::Multiply, Operators::VectorExt::Low, MakeUnsigned>>(configuration);
    case Instructions::i32x4_extmul_high_i16x8_u.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerExtOp<4, Operators::Multiply, Operators::VectorExt::High, MakeUnsigned>>(configuration);
    case Instructions::i64x2_eq.value():
        return binary_numeric_operation<u128, u128, Operators::VectorCmpOp<2, Operators::Equals>>(configuration);
    case Instructions::i64x2_ne.value():
        return binary_numeric_operation<u128, u128, Operators::VectorCmpOp<2, Operators::NotEquals>>(configuration);
    case Instructions::i64x2_lt_s.value():
        return binary_numeric_operation<u128, u128, Operators::VectorCmpOp<2, Operators::LessThan, MakeSigned>>(configuration);
    case Instructions::i64x2_gt_s.value():
        return binary_numeric_operation<u128, u128, Operators::VectorCmpOp<2, Operators::GreaterThan, MakeSigned>>(configuration);
    case Instructions::i64x2_le_s.value():
        return binary_numeric_operation<u128, u128, Operators::VectorCmpOp<2, Operators::LessThanOrEquals, MakeSigned>>(configuration);
    case Instructions::i64x2_ge_s.value():
        return binary_numeric_operation<u128, u128, Operators::VectorCmpOp<2, Operators::GreaterThanOrEquals, MakeSigned>>(configuration);
    case Instructions::i64x2_abs.value():
        return unary_operation<u128, u128, Operators::VectorIntegerUnaryOp<2, Operators::Absolute>>(configuration);
    case Instructions::i64x2_neg.value():
        return unary_operation<u128, u128, Operators::VectorIntegerUnaryOp<2, Operators::Negate, MakeUnsigned>>(configuration);
    case Instructions::i64x2_all_true.value():
        return unary_operation<u128, i32, Operators::VectorAllTrue<2>>(configuration);
    case Instructions::i64x2_add.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerBinaryOp<2, Operators::Add, MakeUnsigned>>(configuration);
    case Instructions::i64x2_sub.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerBinaryOp<2, Operators::Subtract, MakeUnsigned>>(configuration);
    case Instructions::i64x2_mul.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerBinaryOp<2, Operators::Multiply, MakeUnsigned>>(configuration);
    case Instructions::i64x2_extend_low_i32x4_s.value():
        return unary_operation<u128, u128, Operators::VectorIntegerExt<2, Operators::VectorExt::Low, MakeSigned>>(configuration);
    case Instructions::i64x2_extend_high_i32x4_s.value():
        return unary_operation<u128, u128, Operators::VectorIntegerExt<2, Operators::VectorExt::High, MakeSigned>>(configuration);
    case Instructions::i64x2_extend_low_i32x4_u.value():
        return unary_operation<u128, u128, Operators::VectorIntegerExt<2, Operators::VectorExt::Low, MakeUnsigned>>(configuration);
    case Instructions::i64x2_extend_high_i32x4_u.value():
        return unary_operation<u128, u128, Operators::VectorIntegerExt<2, Operators::VectorExt::High, MakeUnsigned>>(configuration);
    case Instructions::i64x2_extmul_low_i32x4_s.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerExtOp<2, Operators::Multiply, Operators::VectorExt::Low, MakeSigned>>(configuration);
    case Instructions::i64x2_extmul_high_i32x4_s.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerExtOp<2, Operators::Multiply, Operators::VectorExt::High, MakeSigned>>(configuration);
    case Instructions::i64x2_extmul_low_i32x4_u.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerExtOp<2, Operators::Multiply, Operators::VectorExt::Low, MakeUnsigned>>(configuration);
    case Instructions::i64x2_extmul_high_i32x4_u.value():
        return binary_numeric_operation<u128, u128, Operators::VectorIntegerExtOp<2, Operators::Multiply, Operators::VectorExt::High, MakeUnsigned>>(configuration);
    case Instructions::f32x4_eq.value():
        return binary_numeric_operation<u128, u128, Operators::VectorFloatCmpOp<4, Operators::Equals>>(configuration);
    case Instructions::f32x4_ne.value():
@ -1569,95 +1729,16 @@ void BytecodeInterpreter::interpret(Configuration& configuration, InstructionPoi
    case Instructions::v128_load64_zero.value():
    case Instructions::f32x4_demote_f64x2_zero.value():
    case Instructions::f64x2_promote_low_f32x4.value():
    case Instructions::i8x16_abs.value():
    case Instructions::i8x16_neg.value():
    case Instructions::i8x16_popcnt.value():
    case Instructions::i8x16_all_true.value():
    case Instructions::i8x16_bitmask.value():
    case Instructions::i8x16_narrow_i16x8_s.value():
    case Instructions::i8x16_narrow_i16x8_u.value():
    case Instructions::i8x16_add.value():
    case Instructions::i8x16_add_sat_s.value():
    case Instructions::i8x16_add_sat_u.value():
    case Instructions::i8x16_sub.value():
    case Instructions::i8x16_sub_sat_s.value():
    case Instructions::i8x16_sub_sat_u.value():
    case Instructions::i8x16_min_s.value():
    case Instructions::i8x16_min_u.value():
    case Instructions::i8x16_max_s.value():
    case Instructions::i8x16_max_u.value():
    case Instructions::i8x16_avgr_u.value():
    case Instructions::i16x8_extadd_pairwise_i8x16_s.value():
    case Instructions::i16x8_extadd_pairwise_i8x16_u.value():
    case Instructions::i32x4_extadd_pairwise_i16x8_s.value():
    case Instructions::i32x4_extadd_pairwise_i16x8_u.value():
    case Instructions::i16x8_abs.value():
    case Instructions::i16x8_neg.value():
    case Instructions::i16x8_q15mulr_sat_s.value():
    case Instructions::i16x8_all_true.value():
    case Instructions::i16x8_bitmask.value():
    case Instructions::i16x8_narrow_i32x4_s.value():
    case Instructions::i16x8_narrow_i32x4_u.value():
    case Instructions::i16x8_extend_low_i8x16_s.value():
    case Instructions::i16x8_extend_high_i8x16_s.value():
    case Instructions::i16x8_extend_low_i8x16_u.value():
    case Instructions::i16x8_extend_high_i8x16_u.value():
    case Instructions::i16x8_add.value():
    case Instructions::i16x8_add_sat_s.value():
    case Instructions::i16x8_add_sat_u.value():
    case Instructions::i16x8_sub.value():
    case Instructions::i16x8_sub_sat_s.value():
    case Instructions::i16x8_sub_sat_u.value():
    case Instructions::i16x8_mul.value():
    case Instructions::i16x8_min_s.value():
    case Instructions::i16x8_min_u.value():
    case Instructions::i16x8_max_s.value():
    case Instructions::i16x8_max_u.value():
    case Instructions::i16x8_avgr_u.value():
    case Instructions::i16x8_extmul_low_i8x16_s.value():
    case Instructions::i16x8_extmul_high_i8x16_s.value():
    case Instructions::i16x8_extmul_low_i8x16_u.value():
    case Instructions::i16x8_extmul_high_i8x16_u.value():
    case Instructions::i32x4_abs.value():
    case Instructions::i32x4_neg.value():
    case Instructions::i32x4_all_true.value():
    case Instructions::i32x4_bitmask.value():
    case Instructions::i32x4_extend_low_i16x8_s.value():
    case Instructions::i32x4_extend_high_i16x8_s.value():
    case Instructions::i32x4_extend_low_i16x8_u.value():
    case Instructions::i32x4_extend_high_i16x8_u.value():
    case Instructions::i32x4_add.value():
    case Instructions::i32x4_sub.value():
    case Instructions::i32x4_mul.value():
    case Instructions::i32x4_min_s.value():
    case Instructions::i32x4_min_u.value():
    case Instructions::i32x4_max_s.value():
    case Instructions::i32x4_max_u.value():
    case Instructions::i32x4_dot_i16x8_s.value():
    case Instructions::i32x4_extmul_low_i16x8_s.value():
    case Instructions::i32x4_extmul_high_i16x8_s.value():
    case Instructions::i32x4_extmul_low_i16x8_u.value():
    case Instructions::i32x4_extmul_high_i16x8_u.value():
    case Instructions::i64x2_abs.value():
    case Instructions::i64x2_neg.value():
    case Instructions::i64x2_all_true.value():
    case Instructions::i64x2_bitmask.value():
    case Instructions::i64x2_extend_low_i32x4_s.value():
    case Instructions::i64x2_extend_high_i32x4_s.value():
    case Instructions::i64x2_extend_low_i32x4_u.value():
    case Instructions::i64x2_extend_high_i32x4_u.value():
    case Instructions::i64x2_sub.value():
    case Instructions::i64x2_mul.value():
    case Instructions::i64x2_eq.value():
    case Instructions::i64x2_ne.value():
    case Instructions::i64x2_lt_s.value():
    case Instructions::i64x2_gt_s.value():
    case Instructions::i64x2_le_s.value():
    case Instructions::i64x2_ge_s.value():
    case Instructions::i64x2_extmul_low_i32x4_s.value():
    case Instructions::i64x2_extmul_high_i32x4_s.value():
    case Instructions::i64x2_extmul_low_i32x4_u.value():
    case Instructions::i64x2_extmul_high_i32x4_u.value():
    case Instructions::i32x4_trunc_sat_f32x4_s.value():
    case Instructions::i32x4_trunc_sat_f32x4_u.value():
    case Instructions::f32x4_convert_i32x4_s.value():
--- a/Userland/Libraries/LibWasm/AbstractMachine/Operators.h
+++ b/Userland/Libraries/LibWasm/AbstractMachine/Operators.h
@ -84,6 +84,16 @@ struct Modulo {
    static StringView name() { return "%"sv; }
 };
 struct Average {
    template<typename Lhs, typename Rhs>
    auto operator()(Lhs lhs, Rhs rhs) const
    {
        return static_cast<Lhs>((lhs + rhs + 1) / 2);
    }
    static StringView name() { return "avgr"sv; }
 };
 struct BitShiftLeft {
    template<typename Lhs, typename Rhs>
    auto operator()(Lhs lhs, Rhs rhs) const { return lhs << (rhs % (sizeof(lhs) * 8)); }
@ -140,6 +150,33 @@ struct BitRotateRight {
    static StringView name() { return "rotate_right"sv; }
 };
 template<size_t VectorSize, template<typename> typename SetSign = MakeSigned>
 struct VectorAllTrue {
    auto operator()(u128 c) const
    {
        using ElementType = NativeIntegralType<128 / VectorSize>;
        auto any_false = bit_cast<Native128ByteVectorOf<ElementType, SetSign>>(c) == 0;
        return bit_cast<u128>(any_false) == 0;
    }
    static StringView name()
    {
        switch (VectorSize) {
        case 16:
            return "vec(8x16).all_true"sv;
        case 8:
            return "vec(16x8).all_true"sv;
        case 4:
            return "vec(32x4).all_true"sv;
        case 2:
            return "vec(64x2).all_true"sv;
        default:
            VERIFY_NOT_REACHED();
        }
    }
 };
 template<size_t VectorSize>
 struct VectorShiftLeft {
    auto operator()(u128 lhs, i32 rhs) const
@ -459,7 +496,7 @@ struct PopCount {
    template<typename Lhs>
    auto operator()(Lhs lhs) const
    {
-        if constexpr (sizeof(Lhs) == 4 || sizeof(Lhs) == 8)
+        if constexpr (sizeof(Lhs) == 1 || sizeof(Lhs) == 2 || sizeof(Lhs) == 4 || sizeof(Lhs) == 8)
            return popcount(MakeUnsigned<Lhs>(lhs));
        else
            VERIFY_NOT_REACHED();
@ -497,6 +534,194 @@ struct Ceil {
    static StringView name() { return "ceil"sv; }
 };
 template<size_t VectorSize, typename Op, template<typename> typename SetSign = MakeSigned>
 struct VectorIntegerExtOpPairwise {
    auto operator()(u128 c) const
    {
        using VectorResult = NativeVectorType<128 / VectorSize, VectorSize, SetSign>;
        using VectorInput = NativeVectorType<128 / (VectorSize * 2), VectorSize * 2, SetSign>;
        auto vector = bit_cast<VectorInput>(c);
        VectorResult result;
        Op op;
        // FIXME: Find a way to not loop here
        for (size_t i = 0; i < VectorSize; ++i) {
            result[i] = op(vector[i * 2], vector[(i * 2) + 1]);
        }
        return bit_cast<u128>(result);
    }
    static StringView name()
    {
        switch (VectorSize) {
        case 8:
            return "vec(16x8).ext_op_pairwise(8x16)"sv;
        case 4:
            return "vec(32x4).ext_op_pairwise(16x8)"sv;
        case 2:
            return "vec(64x2).ext_op_pairwise(32x4)"sv;
        default:
            VERIFY_NOT_REACHED();
        }
    }
 };
 enum class VectorExt {
    High,
    Low,
 };
 template<size_t VectorSize, VectorExt Mode, template<typename> typename SetSign = MakeSigned>
 struct VectorIntegerExt {
    auto operator()(u128 c) const
    {
        using VectorResult = NativeVectorType<128 / VectorSize, VectorSize, SetSign>;
        using VectorInput = NativeVectorType<128 / (VectorSize * 2), VectorSize * 2, SetSign>;
        auto vector = bit_cast<VectorInput>(c);
        VectorResult result;
        // FIXME: Find a way to not loop here
        for (size_t i = 0; i < VectorSize; ++i) {
            if constexpr (Mode == VectorExt::High)
                result[i] = vector[VectorSize + i];
            else if constexpr (Mode == VectorExt::Low)
                result[i] = vector[i];
            else
                VERIFY_NOT_REACHED();
        }
        return bit_cast<u128>(result);
    }
    static StringView name()
    {
        switch (VectorSize) {
        case 8:
            return "vec(16x8).ext(8x16)"sv;
        case 4:
            return "vec(32x4).ext(16x8)"sv;
        case 2:
            return "vec(64x2).ext(32x4)"sv;
        default:
            VERIFY_NOT_REACHED();
        }
    }
 };
 template<size_t VectorSize, typename Op, VectorExt Mode, template<typename> typename SetSign = MakeSigned>
 struct VectorIntegerExtOp {
    auto operator()(u128 lhs, u128 rhs) const
    {
        using VectorResult = NativeVectorType<128 / VectorSize, VectorSize, SetSign>;
        using VectorInput = NativeVectorType<128 / (VectorSize * 2), VectorSize * 2, SetSign>;
        auto first = bit_cast<VectorInput>(lhs);
        auto second = bit_cast<VectorInput>(rhs);
        VectorResult result;
        Op op;
        using ResultType = SetSign<NativeIntegralType<128 / VectorSize>>;
        // FIXME: Find a way to not loop here
        for (size_t i = 0; i < VectorSize; ++i) {
            if constexpr (Mode == VectorExt::High) {
                ResultType a = first[VectorSize + i];
                ResultType b = second[VectorSize + i];
                result[i] = op(a, b);
            } else if constexpr (Mode == VectorExt::Low) {
                ResultType a = first[i];
                ResultType b = second[i];
                result[i] = op(a, b);
            } else
                VERIFY_NOT_REACHED();
        }
        return bit_cast<u128>(result);
    }
    static StringView name()
    {
        switch (VectorSize) {
        case 8:
            return "vec(16x8).ext_op(8x16)"sv;
        case 4:
            return "vec(32x4).ext_op(16x8)"sv;
        case 2:
            return "vec(64x2).ext_op(32x4)"sv;
        default:
            VERIFY_NOT_REACHED();
        }
    }
 };
 template<size_t VectorSize, typename Op, template<typename> typename SetSign = MakeSigned>
 struct VectorIntegerBinaryOp {
    auto operator()(u128 lhs, u128 rhs) const
    {
        using VectorType = NativeVectorType<128 / VectorSize, VectorSize, SetSign>;
        auto first = bit_cast<VectorType>(lhs);
        auto second = bit_cast<VectorType>(rhs);
        VectorType result;
        Op op;
        // FIXME: Find a way to not loop here
        for (size_t i = 0; i < VectorSize; ++i) {
            result[i] = op(first[i], second[i]);
        }
        return bit_cast<u128>(result);
    }
    static StringView name()
    {
        switch (VectorSize) {
        case 16:
            return "vec(8x16).binary_op"sv;
        case 8:
            return "vec(16x8).binary_op"sv;
        case 4:
            return "vec(32x4).binary_op"sv;
        case 2:
            return "vec(64x2).binary_op"sv;
        default:
            VERIFY_NOT_REACHED();
        }
    }
 };
 template<size_t VectorSize, typename Op, template<typename> typename SetSign = MakeSigned>
 struct VectorIntegerUnaryOp {
    auto operator()(u128 lhs) const
    {
        using VectorType = NativeVectorType<128 / VectorSize, VectorSize, SetSign>;
        auto value = bit_cast<VectorType>(lhs);
        VectorType result;
        Op op;
        // FIXME: Find a way to not loop here
        for (size_t i = 0; i < VectorSize; ++i) {
            result[i] = op(value[i]);
        }
        return bit_cast<u128>(result);
    }
    static StringView name()
    {
        switch (VectorSize) {
        case 16:
            return "vec(8x16).unary_op"sv;
        case 8:
            return "vec(16x8).unary_op"sv;
        case 4:
            return "vec(32x4).unary_op"sv;
        case 2:
            return "vec(64x2).unary_op"sv;
        default:
            VERIFY_NOT_REACHED();
        }
    }
 };
 template<size_t VectorSize, typename Op>
 struct VectorFloatBinaryOp {
    auto operator()(u128 lhs, u128 rhs) const
@ -530,11 +755,11 @@ struct VectorFloatUnaryOp {
    auto operator()(u128 lhs) const
    {
        using VectorType = NativeFloatingVectorType<128, VectorSize, NativeFloatingType<128 / VectorSize>>;
-        auto first = bit_cast<VectorType>(lhs);
+        auto value = bit_cast<VectorType>(lhs);
        VectorType result;
        Op op;
        for (size_t i = 0; i < VectorSize; ++i) {
-            result[i] = op(first[i]);
+            result[i] = op(value[i]);
        }
        return bit_cast<u128>(result);
    }
@ -763,4 +988,28 @@ struct SaturatingTruncate {
    static StringView name() { return "truncate.saturating"sv; }
 };
 template<typename ResultT, typename Op>
 struct SaturatingOp {
    template<typename Lhs, typename Rhs>
    ResultT operator()(Lhs lhs, Rhs rhs) const
    {
        Op op;
        double result = op(lhs, rhs);
        if (result <= static_cast<double>(NumericLimits<ResultT>::min())) {
            return NumericLimits<ResultT>::min();
        }
        if (result >= static_cast<double>(NumericLimits<ResultT>::max())) {
            return NumericLimits<ResultT>::max();
        }
        return static_cast<ResultT>(result);
    }
    static StringView name() { return "saturating_op"sv; }
 };
 }