Rollup merge of #126417 - beetrees:f16-f128-inline-asm-x86, r=Amanieu

Add `f16` and `f128` inline ASM support for `x86` and `x86-64`

This PR adds `f16` and `f128` input and output support to inline ASM on `x86` and `x86-64`. `f16` vector sizes are taken from [here](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html).

Relevant issue: #125398
Tracking issue: #116909

``@rustbot`` label +F-f16_and_f128
This commit is contained in:
Matthias Krüger 2024-06-15 14:40:48 +02:00 committed by GitHub
commit 0f2cc21547
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 350 additions and 42 deletions

View file

@ -959,6 +959,43 @@ fn llvm_fixup_input<'ll, 'tcx>(
InlineAsmRegClass::X86(X86InlineAsmRegClass::xmm_reg | X86InlineAsmRegClass::zmm_reg),
Abi::Vector { .. },
) if layout.size.bytes() == 64 => bx.bitcast(value, bx.cx.type_vector(bx.cx.type_f64(), 8)),
(
InlineAsmRegClass::X86(
X86InlineAsmRegClass::xmm_reg
| X86InlineAsmRegClass::ymm_reg
| X86InlineAsmRegClass::zmm_reg,
),
Abi::Scalar(s),
) if bx.sess().asm_arch == Some(InlineAsmArch::X86)
&& s.primitive() == Primitive::Float(Float::F128) =>
{
bx.bitcast(value, bx.type_vector(bx.type_i32(), 4))
}
(
InlineAsmRegClass::X86(
X86InlineAsmRegClass::xmm_reg
| X86InlineAsmRegClass::ymm_reg
| X86InlineAsmRegClass::zmm_reg,
),
Abi::Scalar(s),
) if s.primitive() == Primitive::Float(Float::F16) => {
let value = bx.insert_element(
bx.const_undef(bx.type_vector(bx.type_f16(), 8)),
value,
bx.const_usize(0),
);
bx.bitcast(value, bx.type_vector(bx.type_i16(), 8))
}
(
InlineAsmRegClass::X86(
X86InlineAsmRegClass::xmm_reg
| X86InlineAsmRegClass::ymm_reg
| X86InlineAsmRegClass::zmm_reg,
),
Abi::Vector { element, count: count @ (8 | 16) },
) if element.primitive() == Primitive::Float(Float::F16) => {
bx.bitcast(value, bx.type_vector(bx.type_i16(), count))
}
(
InlineAsmRegClass::Arm(ArmInlineAsmRegClass::sreg | ArmInlineAsmRegClass::sreg_low16),
Abi::Scalar(s),
@ -1036,6 +1073,39 @@ fn llvm_fixup_output<'ll, 'tcx>(
InlineAsmRegClass::X86(X86InlineAsmRegClass::xmm_reg | X86InlineAsmRegClass::zmm_reg),
Abi::Vector { .. },
) if layout.size.bytes() == 64 => bx.bitcast(value, layout.llvm_type(bx.cx)),
(
InlineAsmRegClass::X86(
X86InlineAsmRegClass::xmm_reg
| X86InlineAsmRegClass::ymm_reg
| X86InlineAsmRegClass::zmm_reg,
),
Abi::Scalar(s),
) if bx.sess().asm_arch == Some(InlineAsmArch::X86)
&& s.primitive() == Primitive::Float(Float::F128) =>
{
bx.bitcast(value, bx.type_f128())
}
(
InlineAsmRegClass::X86(
X86InlineAsmRegClass::xmm_reg
| X86InlineAsmRegClass::ymm_reg
| X86InlineAsmRegClass::zmm_reg,
),
Abi::Scalar(s),
) if s.primitive() == Primitive::Float(Float::F16) => {
let value = bx.bitcast(value, bx.type_vector(bx.type_f16(), 8));
bx.extract_element(value, bx.const_usize(0))
}
(
InlineAsmRegClass::X86(
X86InlineAsmRegClass::xmm_reg
| X86InlineAsmRegClass::ymm_reg
| X86InlineAsmRegClass::zmm_reg,
),
Abi::Vector { element, count: count @ (8 | 16) },
) if element.primitive() == Primitive::Float(Float::F16) => {
bx.bitcast(value, bx.type_vector(bx.type_f16(), count))
}
(
InlineAsmRegClass::Arm(ArmInlineAsmRegClass::sreg | ArmInlineAsmRegClass::sreg_low16),
Abi::Scalar(s),
@ -1109,6 +1179,36 @@ fn llvm_fixup_output_type<'ll, 'tcx>(
InlineAsmRegClass::X86(X86InlineAsmRegClass::xmm_reg | X86InlineAsmRegClass::zmm_reg),
Abi::Vector { .. },
) if layout.size.bytes() == 64 => cx.type_vector(cx.type_f64(), 8),
(
InlineAsmRegClass::X86(
X86InlineAsmRegClass::xmm_reg
| X86InlineAsmRegClass::ymm_reg
| X86InlineAsmRegClass::zmm_reg,
),
Abi::Scalar(s),
) if cx.sess().asm_arch == Some(InlineAsmArch::X86)
&& s.primitive() == Primitive::Float(Float::F128) =>
{
cx.type_vector(cx.type_i32(), 4)
}
(
InlineAsmRegClass::X86(
X86InlineAsmRegClass::xmm_reg
| X86InlineAsmRegClass::ymm_reg
| X86InlineAsmRegClass::zmm_reg,
),
Abi::Scalar(s),
) if s.primitive() == Primitive::Float(Float::F16) => cx.type_vector(cx.type_i16(), 8),
(
InlineAsmRegClass::X86(
X86InlineAsmRegClass::xmm_reg
| X86InlineAsmRegClass::ymm_reg
| X86InlineAsmRegClass::zmm_reg,
),
Abi::Vector { element, count: count @ (8 | 16) },
) if element.primitive() == Primitive::Float(Float::F16) => {
cx.type_vector(cx.type_i16(), count)
}
(
InlineAsmRegClass::Arm(ArmInlineAsmRegClass::sreg | ArmInlineAsmRegClass::sreg_low16),
Abi::Scalar(s),

View file

@ -62,8 +62,10 @@ fn get_asm_ty(&self, ty: Ty<'tcx>) -> Option<InlineAsmType> {
ty::Int(IntTy::I64) | ty::Uint(UintTy::U64) => Some(InlineAsmType::I64),
ty::Int(IntTy::I128) | ty::Uint(UintTy::U128) => Some(InlineAsmType::I128),
ty::Int(IntTy::Isize) | ty::Uint(UintTy::Usize) => Some(asm_ty_isize),
ty::Float(FloatTy::F16) => Some(InlineAsmType::F16),
ty::Float(FloatTy::F32) => Some(InlineAsmType::F32),
ty::Float(FloatTy::F64) => Some(InlineAsmType::F64),
ty::Float(FloatTy::F128) => Some(InlineAsmType::F128),
ty::FnPtr(_) => Some(asm_ty_isize),
ty::RawPtr(ty, _) if self.is_thin_ptr_ty(ty) => Some(asm_ty_isize),
ty::Adt(adt, args) if adt.repr().simd() => {
@ -105,8 +107,10 @@ fn get_asm_ty(&self, ty: Ty<'tcx>) -> Option<InlineAsmType> {
width => bug!("unsupported pointer width: {width}"),
})
}
ty::Float(FloatTy::F16) => Some(InlineAsmType::VecF16(size)),
ty::Float(FloatTy::F32) => Some(InlineAsmType::VecF32(size)),
ty::Float(FloatTy::F64) => Some(InlineAsmType::VecF64(size)),
ty::Float(FloatTy::F128) => Some(InlineAsmType::VecF128(size)),
_ => None,
}
}

View file

@ -707,15 +707,19 @@ pub enum InlineAsmType {
I32,
I64,
I128,
F16,
F32,
F64,
F128,
VecI8(u64),
VecI16(u64),
VecI32(u64),
VecI64(u64),
VecI128(u64),
VecF16(u64),
VecF32(u64),
VecF64(u64),
VecF128(u64),
}
impl InlineAsmType {
@ -730,15 +734,19 @@ pub fn size(self) -> Size {
Self::I32 => 4,
Self::I64 => 8,
Self::I128 => 16,
Self::F16 => 2,
Self::F32 => 4,
Self::F64 => 8,
Self::F128 => 16,
Self::VecI8(n) => n * 1,
Self::VecI16(n) => n * 2,
Self::VecI32(n) => n * 4,
Self::VecI64(n) => n * 8,
Self::VecI128(n) => n * 16,
Self::VecF16(n) => n * 2,
Self::VecF32(n) => n * 4,
Self::VecF64(n) => n * 8,
Self::VecF128(n) => n * 16,
})
}
}
@ -751,15 +759,19 @@ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
Self::I32 => f.write_str("i32"),
Self::I64 => f.write_str("i64"),
Self::I128 => f.write_str("i128"),
Self::F16 => f.write_str("f16"),
Self::F32 => f.write_str("f32"),
Self::F64 => f.write_str("f64"),
Self::F128 => f.write_str("f128"),
Self::VecI8(n) => write!(f, "i8x{n}"),
Self::VecI16(n) => write!(f, "i16x{n}"),
Self::VecI32(n) => write!(f, "i32x{n}"),
Self::VecI64(n) => write!(f, "i64x{n}"),
Self::VecI128(n) => write!(f, "i128x{n}"),
Self::VecF16(n) => write!(f, "f16x{n}"),
Self::VecF32(n) => write!(f, "f32x{n}"),
Self::VecF64(n) => write!(f, "f64x{n}"),
Self::VecF128(n) => write!(f, "f128x{n}"),
}
}
}

View file

@ -107,26 +107,26 @@ pub fn supported_types(
match self {
Self::reg | Self::reg_abcd => {
if arch == InlineAsmArch::X86_64 {
types! { _: I16, I32, I64, F32, F64; }
types! { _: I16, I32, I64, F16, F32, F64; }
} else {
types! { _: I16, I32, F32; }
types! { _: I16, I32, F16, F32; }
}
}
Self::reg_byte => types! { _: I8; },
Self::xmm_reg => types! {
sse: I32, I64, F32, F64,
VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF32(4), VecF64(2);
sse: I32, I64, F16, F32, F64, F128,
VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF16(8), VecF32(4), VecF64(2);
},
Self::ymm_reg => types! {
avx: I32, I64, F32, F64,
VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF32(4), VecF64(2),
VecI8(32), VecI16(16), VecI32(8), VecI64(4), VecF32(8), VecF64(4);
avx: I32, I64, F16, F32, F64, F128,
VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF16(8), VecF32(4), VecF64(2),
VecI8(32), VecI16(16), VecI32(8), VecI64(4), VecF16(16), VecF32(8), VecF64(4);
},
Self::zmm_reg => types! {
avx512f: I32, I64, F32, F64,
VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF32(4), VecF64(2),
VecI8(32), VecI16(16), VecI32(8), VecI64(4), VecF32(8), VecF64(4),
VecI8(64), VecI16(32), VecI32(16), VecI64(8), VecF32(16), VecF64(8);
avx512f: I32, I64, F16, F32, F64, F128,
VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF16(8), VecF32(4), VecF64(2),
VecI8(32), VecI16(16), VecI32(8), VecI64(4), VecF16(16), VecF32(8), VecF64(4),
VecI8(64), VecI16(32), VecI32(16), VecI64(8), VecF16(32), VecF32(16), VecF64(8);
},
Self::kreg => types! {
avx512f: I8, I16;

View file

@ -7,7 +7,7 @@
//@ compile-flags: -C llvm-args=--x86-asm-syntax=intel
//@ compile-flags: -C target-feature=+avx512bw
#![feature(no_core, lang_items, rustc_attrs, repr_simd)]
#![feature(no_core, lang_items, rustc_attrs, repr_simd, f16, f128)]
#![crate_type = "rlib"]
#![no_core]
#![allow(asm_sub_register, non_camel_case_types)]
@ -41,6 +41,8 @@ trait Copy {}
#[repr(simd)]
pub struct i64x2(i64, i64);
#[repr(simd)]
pub struct f16x8(f16, f16, f16, f16, f16, f16, f16, f16);
#[repr(simd)]
pub struct f32x4(f32, f32, f32, f32);
#[repr(simd)]
pub struct f64x2(f64, f64);
@ -87,6 +89,8 @@ pub struct i8x32(
#[repr(simd)]
pub struct i64x4(i64, i64, i64, i64);
#[repr(simd)]
pub struct f16x16(f16, f16, f16, f16, f16, f16, f16, f16, f16, f16, f16, f16, f16, f16, f16, f16);
#[repr(simd)]
pub struct f32x8(f32, f32, f32, f32, f32, f32, f32, f32);
#[repr(simd)]
pub struct f64x4(f64, f64, f64, f64);
@ -198,35 +202,59 @@ pub struct i16x32(
#[repr(simd)]
pub struct i64x8(i64, i64, i64, i64, i64, i64, i64, i64);
#[repr(simd)]
pub struct f16x32(
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
);
#[repr(simd)]
pub struct f32x16(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32);
#[repr(simd)]
pub struct f64x8(f64, f64, f64, f64, f64, f64, f64, f64);
impl Copy for i8 {}
impl Copy for i16 {}
impl Copy for i32 {}
impl Copy for f32 {}
impl Copy for i64 {}
impl Copy for f64 {}
impl Copy for ptr {}
impl Copy for i8x16 {}
impl Copy for i16x8 {}
impl Copy for i32x4 {}
impl Copy for i64x2 {}
impl Copy for f32x4 {}
impl Copy for f64x2 {}
impl Copy for i8x32 {}
impl Copy for i16x16 {}
impl Copy for i32x8 {}
impl Copy for i64x4 {}
impl Copy for f32x8 {}
impl Copy for f64x4 {}
impl Copy for i8x64 {}
impl Copy for i16x32 {}
impl Copy for i32x16 {}
impl Copy for i64x8 {}
impl Copy for f32x16 {}
impl Copy for f64x8 {}
macro_rules! impl_copy {
($($ty:ident)*) => {
$(
impl Copy for $ty {}
)*
};
}
impl_copy!(
i8 i16 f16 i32 f32 i64 f64 f128 ptr
i8x16 i16x8 i32x4 i64x2 f16x8 f32x4 f64x2
i8x32 i16x16 i32x8 i64x4 f16x16 f32x8 f64x4
i8x64 i16x32 i32x16 i64x8 f16x32 f32x16 f64x8
);
extern "C" {
fn extern_func();
@ -292,6 +320,13 @@ pub unsafe fn $func(x: $ty) -> $ty {
// CHECK: #NO_APP
check!(reg_i16 i16 reg "mov");
// CHECK-LABEL: reg_f16:
// CHECK: #APP
// x86_64: mov r{{[a-z0-9]+}}, r{{[a-z0-9]+}}
// i686: mov e{{[a-z0-9]+}}, e{{[a-z0-9]+}}
// CHECK: #NO_APP
check!(reg_f16 f16 reg "mov");
// CHECK-LABEL: reg_i32:
// CHECK: #APP
// x86_64: mov r{{[a-z0-9]+}}, r{{[a-z0-9]+}}
@ -334,6 +369,13 @@ pub unsafe fn $func(x: $ty) -> $ty {
// CHECK: #NO_APP
check!(reg_abcd_i16 i16 reg_abcd "mov");
// CHECK-LABEL: reg_abcd_f16:
// CHECK: #APP
// x86_64: mov r{{[a-z0-9]+}}, r{{[a-z0-9]+}}
// i686: mov e{{[a-z0-9]+}}, e{{[a-z0-9]+}}
// CHECK: #NO_APP
check!(reg_abcd_f16 f16 reg_abcd "mov");
// CHECK-LABEL: reg_abcd_i32:
// CHECK: #APP
// x86_64: mov r{{[a-z0-9]+}}, r{{[a-z0-9]+}}
@ -375,6 +417,12 @@ pub unsafe fn $func(x: $ty) -> $ty {
// CHECK: #NO_APP
check!(reg_byte i8 reg_byte "mov");
// CHECK-LABEL: xmm_reg_f16:
// CHECK: #APP
// CHECK: movaps xmm{{[0-9]+}}, xmm{{[0-9]+}}
// CHECK: #NO_APP
check!(xmm_reg_f16 f16 xmm_reg "movaps");
// CHECK-LABEL: xmm_reg_i32:
// CHECK: #APP
// CHECK: movaps xmm{{[0-9]+}}, xmm{{[0-9]+}}
@ -399,6 +447,12 @@ pub unsafe fn $func(x: $ty) -> $ty {
// CHECK: #NO_APP
check!(xmm_reg_f64 f64 xmm_reg "movaps");
// CHECK-LABEL: xmm_reg_f128:
// CHECK: #APP
// CHECK: movaps xmm{{[0-9]+}}, xmm{{[0-9]+}}
// CHECK: #NO_APP
check!(xmm_reg_f128 f128 xmm_reg "movaps");
// CHECK-LABEL: xmm_reg_ptr:
// CHECK: #APP
// CHECK: movaps xmm{{[0-9]+}}, xmm{{[0-9]+}}
@ -429,6 +483,12 @@ pub unsafe fn $func(x: $ty) -> $ty {
// CHECK: #NO_APP
check!(xmm_reg_i64x2 i64x2 xmm_reg "movaps");
// CHECK-LABEL: xmm_reg_f16x8:
// CHECK: #APP
// CHECK: movaps xmm{{[0-9]+}}, xmm{{[0-9]+}}
// CHECK: #NO_APP
check!(xmm_reg_f16x8 f16x8 xmm_reg "movaps");
// CHECK-LABEL: xmm_reg_f32x4:
// CHECK: #APP
// CHECK: movaps xmm{{[0-9]+}}, xmm{{[0-9]+}}
@ -441,6 +501,12 @@ pub unsafe fn $func(x: $ty) -> $ty {
// CHECK: #NO_APP
check!(xmm_reg_f64x2 f64x2 xmm_reg "movaps");
// CHECK-LABEL: ymm_reg_f16:
// CHECK: #APP
// CHECK: vmovaps ymm{{[0-9]+}}, ymm{{[0-9]+}}
// CHECK: #NO_APP
check!(ymm_reg_f16 f16 ymm_reg "vmovaps");
// CHECK-LABEL: ymm_reg_i32:
// CHECK: #APP
// CHECK: vmovaps ymm{{[0-9]+}}, ymm{{[0-9]+}}
@ -465,6 +531,12 @@ pub unsafe fn $func(x: $ty) -> $ty {
// CHECK: #NO_APP
check!(ymm_reg_f64 f64 ymm_reg "vmovaps");
// CHECK-LABEL: ymm_reg_f128:
// CHECK: #APP
// CHECK: vmovaps ymm{{[0-9]+}}, ymm{{[0-9]+}}
// CHECK: #NO_APP
check!(ymm_reg_f128 f128 ymm_reg "vmovaps");
// CHECK-LABEL: ymm_reg_ptr:
// CHECK: #APP
// CHECK: vmovaps ymm{{[0-9]+}}, ymm{{[0-9]+}}
@ -495,6 +567,12 @@ pub unsafe fn $func(x: $ty) -> $ty {
// CHECK: #NO_APP
check!(ymm_reg_i64x2 i64x2 ymm_reg "vmovaps");
// CHECK-LABEL: ymm_reg_f16x8:
// CHECK: #APP
// CHECK: vmovaps ymm{{[0-9]+}}, ymm{{[0-9]+}}
// CHECK: #NO_APP
check!(ymm_reg_f16x8 f16x8 ymm_reg "vmovaps");
// CHECK-LABEL: ymm_reg_f32x4:
// CHECK: #APP
// CHECK: vmovaps ymm{{[0-9]+}}, ymm{{[0-9]+}}
@ -531,6 +609,12 @@ pub unsafe fn $func(x: $ty) -> $ty {
// CHECK: #NO_APP
check!(ymm_reg_i64x4 i64x4 ymm_reg "vmovaps");
// CHECK-LABEL: ymm_reg_f16x16:
// CHECK: #APP
// CHECK: vmovaps ymm{{[0-9]+}}, ymm{{[0-9]+}}
// CHECK: #NO_APP
check!(ymm_reg_f16x16 f16x16 ymm_reg "vmovaps");
// CHECK-LABEL: ymm_reg_f32x8:
// CHECK: #APP
// CHECK: vmovaps ymm{{[0-9]+}}, ymm{{[0-9]+}}
@ -543,6 +627,12 @@ pub unsafe fn $func(x: $ty) -> $ty {
// CHECK: #NO_APP
check!(ymm_reg_f64x4 f64x4 ymm_reg "vmovaps");
// CHECK-LABEL: zmm_reg_f16:
// CHECK: #APP
// CHECK: vmovaps zmm{{[0-9]+}}, zmm{{[0-9]+}}
// CHECK: #NO_APP
check!(zmm_reg_f16 f16 zmm_reg "vmovaps");
// CHECK-LABEL: zmm_reg_i32:
// CHECK: #APP
// CHECK: vmovaps zmm{{[0-9]+}}, zmm{{[0-9]+}}
@ -567,6 +657,12 @@ pub unsafe fn $func(x: $ty) -> $ty {
// CHECK: #NO_APP
check!(zmm_reg_f64 f64 zmm_reg "vmovaps");
// CHECK-LABEL: zmm_reg_f128:
// CHECK: #APP
// CHECK: vmovaps zmm{{[0-9]+}}, zmm{{[0-9]+}}
// CHECK: #NO_APP
check!(zmm_reg_f128 f128 zmm_reg "vmovaps");
// CHECK-LABEL: zmm_reg_ptr:
// CHECK: #APP
// CHECK: vmovaps zmm{{[0-9]+}}, zmm{{[0-9]+}}
@ -597,6 +693,12 @@ pub unsafe fn $func(x: $ty) -> $ty {
// CHECK: #NO_APP
check!(zmm_reg_i64x2 i64x2 zmm_reg "vmovaps");
// CHECK-LABEL: zmm_reg_f16x8:
// CHECK: #APP
// CHECK: vmovaps zmm{{[0-9]+}}, zmm{{[0-9]+}}
// CHECK: #NO_APP
check!(zmm_reg_f16x8 f16x8 zmm_reg "vmovaps");
// CHECK-LABEL: zmm_reg_f32x4:
// CHECK: #APP
// CHECK: vmovaps zmm{{[0-9]+}}, zmm{{[0-9]+}}
@ -633,6 +735,12 @@ pub unsafe fn $func(x: $ty) -> $ty {
// CHECK: #NO_APP
check!(zmm_reg_i64x4 i64x4 zmm_reg "vmovaps");
// CHECK-LABEL: zmm_reg_f16x16:
// CHECK: #APP
// CHECK: vmovaps zmm{{[0-9]+}}, zmm{{[0-9]+}}
// CHECK: #NO_APP
check!(zmm_reg_f16x16 f16x16 zmm_reg "vmovaps");
// CHECK-LABEL: zmm_reg_f32x8:
// CHECK: #APP
// CHECK: vmovaps zmm{{[0-9]+}}, zmm{{[0-9]+}}
@ -669,6 +777,12 @@ pub unsafe fn $func(x: $ty) -> $ty {
// CHECK: #NO_APP
check!(zmm_reg_i64x8 i64x8 zmm_reg "vmovaps");
// CHECK-LABEL: zmm_reg_f16x32:
// CHECK: #APP
// CHECK: vmovaps zmm{{[0-9]+}}, zmm{{[0-9]+}}
// CHECK: #NO_APP
check!(zmm_reg_f16x32 f16x32 zmm_reg "vmovaps");
// CHECK-LABEL: zmm_reg_f32x16:
// CHECK: #APP
// CHECK: vmovaps zmm{{[0-9]+}}, zmm{{[0-9]+}}
@ -717,6 +831,12 @@ pub unsafe fn $func(x: $ty) -> $ty {
// CHECK: #NO_APP
check_reg!(eax_i16 i16 "eax" "mov");
// CHECK-LABEL: eax_f16:
// CHECK: #APP
// CHECK: mov eax, eax
// CHECK: #NO_APP
check_reg!(eax_f16 f16 "eax" "mov");
// CHECK-LABEL: eax_i32:
// CHECK: #APP
// CHECK: mov eax, eax
@ -756,6 +876,12 @@ pub unsafe fn $func(x: $ty) -> $ty {
#[cfg(i686)]
check_reg!(ah_byte i8 "ah" "mov");
// CHECK-LABEL: xmm0_f16:
// CHECK: #APP
// CHECK: movaps xmm0, xmm0
// CHECK: #NO_APP
check_reg!(xmm0_f16 f16 "xmm0" "movaps");
// CHECK-LABEL: xmm0_i32:
// CHECK: #APP
// CHECK: movaps xmm0, xmm0
@ -780,6 +906,12 @@ pub unsafe fn $func(x: $ty) -> $ty {
// CHECK: #NO_APP
check_reg!(xmm0_f64 f64 "xmm0" "movaps");
// CHECK-LABEL: xmm0_f128:
// CHECK: #APP
// CHECK: movaps xmm0, xmm0
// CHECK: #NO_APP
check_reg!(xmm0_f128 f128 "xmm0" "movaps");
// CHECK-LABEL: xmm0_ptr:
// CHECK: #APP
// CHECK: movaps xmm0, xmm0
@ -810,6 +942,12 @@ pub unsafe fn $func(x: $ty) -> $ty {
// CHECK: #NO_APP
check_reg!(xmm0_i64x2 i64x2 "xmm0" "movaps");
// CHECK-LABEL: xmm0_f16x8:
// CHECK: #APP
// CHECK: movaps xmm0, xmm0
// CHECK: #NO_APP
check_reg!(xmm0_f16x8 f16x8 "xmm0" "movaps");
// CHECK-LABEL: xmm0_f32x4:
// CHECK: #APP
// CHECK: movaps xmm0, xmm0
@ -822,6 +960,12 @@ pub unsafe fn $func(x: $ty) -> $ty {
// CHECK: #NO_APP
check_reg!(xmm0_f64x2 f64x2 "xmm0" "movaps");
// CHECK-LABEL: ymm0_f16:
// CHECK: #APP
// CHECK: vmovaps ymm0, ymm0
// CHECK: #NO_APP
check_reg!(ymm0_f16 f16 "ymm0" "vmovaps");
// CHECK-LABEL: ymm0_i32:
// CHECK: #APP
// CHECK: vmovaps ymm0, ymm0
@ -846,6 +990,12 @@ pub unsafe fn $func(x: $ty) -> $ty {
// CHECK: #NO_APP
check_reg!(ymm0_f64 f64 "ymm0" "vmovaps");
// CHECK-LABEL: ymm0_f128:
// CHECK: #APP
// CHECK: vmovaps ymm0, ymm0
// CHECK: #NO_APP
check_reg!(ymm0_f128 f128 "ymm0" "vmovaps");
// CHECK-LABEL: ymm0_ptr:
// CHECK: #APP
// CHECK: vmovaps ymm0, ymm0
@ -876,6 +1026,12 @@ pub unsafe fn $func(x: $ty) -> $ty {
// CHECK: #NO_APP
check_reg!(ymm0_i64x2 i64x2 "ymm0" "vmovaps");
// CHECK-LABEL: ymm0_f16x8:
// CHECK: #APP
// CHECK: vmovaps ymm0, ymm0
// CHECK: #NO_APP
check_reg!(ymm0_f16x8 f16x8 "ymm0" "vmovaps");
// CHECK-LABEL: ymm0_f32x4:
// CHECK: #APP
// CHECK: vmovaps ymm0, ymm0
@ -912,6 +1068,12 @@ pub unsafe fn $func(x: $ty) -> $ty {
// CHECK: #NO_APP
check_reg!(ymm0_i64x4 i64x4 "ymm0" "vmovaps");
// CHECK-LABEL: ymm0_f16x16:
// CHECK: #APP
// CHECK: vmovaps ymm0, ymm0
// CHECK: #NO_APP
check_reg!(ymm0_f16x16 f16x16 "ymm0" "vmovaps");
// CHECK-LABEL: ymm0_f32x8:
// CHECK: #APP
// CHECK: vmovaps ymm0, ymm0
@ -924,6 +1086,12 @@ pub unsafe fn $func(x: $ty) -> $ty {
// CHECK: #NO_APP
check_reg!(ymm0_f64x4 f64x4 "ymm0" "vmovaps");
// CHECK-LABEL: zmm0_f16:
// CHECK: #APP
// CHECK: vmovaps zmm0, zmm0
// CHECK: #NO_APP
check_reg!(zmm0_f16 f16 "zmm0" "vmovaps");
// CHECK-LABEL: zmm0_i32:
// CHECK: #APP
// CHECK: vmovaps zmm0, zmm0
@ -948,6 +1116,12 @@ pub unsafe fn $func(x: $ty) -> $ty {
// CHECK: #NO_APP
check_reg!(zmm0_f64 f64 "zmm0" "vmovaps");
// CHECK-LABEL: zmm0_f128:
// CHECK: #APP
// CHECK: vmovaps zmm0, zmm0
// CHECK: #NO_APP
check_reg!(zmm0_f128 f128 "zmm0" "vmovaps");
// CHECK-LABEL: zmm0_ptr:
// CHECK: #APP
// CHECK: vmovaps zmm0, zmm0
@ -978,6 +1152,12 @@ pub unsafe fn $func(x: $ty) -> $ty {
// CHECK: #NO_APP
check_reg!(zmm0_i64x2 i64x2 "zmm0" "vmovaps");
// CHECK-LABEL: zmm0_f16x8:
// CHECK: #APP
// CHECK: vmovaps zmm0, zmm0
// CHECK: #NO_APP
check_reg!(zmm0_f16x8 f16x8 "zmm0" "vmovaps");
// CHECK-LABEL: zmm0_f32x4:
// CHECK: #APP
// CHECK: vmovaps zmm0, zmm0
@ -1014,6 +1194,12 @@ pub unsafe fn $func(x: $ty) -> $ty {
// CHECK: #NO_APP
check_reg!(zmm0_i64x4 i64x4 "zmm0" "vmovaps");
// CHECK-LABEL: zmm0_f16x16:
// CHECK: #APP
// CHECK: vmovaps zmm0, zmm0
// CHECK: #NO_APP
check_reg!(zmm0_f16x16 f16x16 "zmm0" "vmovaps");
// CHECK-LABEL: zmm0_f32x8:
// CHECK: #APP
// CHECK: vmovaps zmm0, zmm0
@ -1050,6 +1236,12 @@ pub unsafe fn $func(x: $ty) -> $ty {
// CHECK: #NO_APP
check_reg!(zmm0_i64x8 i64x8 "zmm0" "vmovaps");
// CHECK-LABEL: zmm0_f16x32:
// CHECK: #APP
// CHECK: vmovaps zmm0, zmm0
// CHECK: #NO_APP
check_reg!(zmm0_f16x32 f16x32 "zmm0" "vmovaps");
// CHECK-LABEL: zmm0_f32x16:
// CHECK: #APP
// CHECK: vmovaps zmm0, zmm0

View file

@ -4,7 +4,7 @@ error: type `i128` cannot be used with this register class
LL | asm!("{}", in(reg) 0i128);
| ^^^^^
|
= note: register class `reg` supports these types: i16, i32, i64, f32, f64
= note: register class `reg` supports these types: i16, i32, i64, f16, f32, f64
error: type `__m128` cannot be used with this register class
--> $DIR/type-check-3.rs:16:28
@ -12,7 +12,7 @@ error: type `__m128` cannot be used with this register class
LL | asm!("{}", in(reg) _mm_setzero_ps());
| ^^^^^^^^^^^^^^^^
|
= note: register class `reg` supports these types: i16, i32, i64, f32, f64
= note: register class `reg` supports these types: i16, i32, i64, f16, f32, f64
error: type `__m256` cannot be used with this register class
--> $DIR/type-check-3.rs:18:28
@ -20,7 +20,7 @@ error: type `__m256` cannot be used with this register class
LL | asm!("{}", in(reg) _mm256_setzero_ps());
| ^^^^^^^^^^^^^^^^^^^
|
= note: register class `reg` supports these types: i16, i32, i64, f32, f64
= note: register class `reg` supports these types: i16, i32, i64, f16, f32, f64
error: type `u8` cannot be used with this register class
--> $DIR/type-check-3.rs:20:32
@ -28,7 +28,7 @@ error: type `u8` cannot be used with this register class
LL | asm!("{}", in(xmm_reg) 0u8);
| ^^^
|
= note: register class `xmm_reg` supports these types: i32, i64, f32, f64, i8x16, i16x8, i32x4, i64x2, f32x4, f64x2
= note: register class `xmm_reg` supports these types: i32, i64, f16, f32, f64, f128, i8x16, i16x8, i32x4, i64x2, f16x8, f32x4, f64x2
error: `avx512bw` target feature is not enabled
--> $DIR/type-check-3.rs:29:29
@ -81,7 +81,7 @@ error: type `i8` cannot be used with this register class
LL | asm!("{}", in(reg) 0i8);
| ^^^
|
= note: register class `reg` supports these types: i16, i32, i64, f32, f64
= note: register class `reg` supports these types: i16, i32, i64, f16, f32, f64
= help: consider using the `reg_byte` register class instead
error: incompatible types for asm inout argument