Skip to content

Commit 07e8b3a

Browse files
Rollup merge of #126555 - beetrees:f16-inline-asm-arm, r=Amanieu
Add `f16` inline ASM support for 32-bit ARM Adds `f16` inline ASM support for 32-bit ARM. SIMD vector types are taken from [here](https://developer.arm.com/architectures/instruction-sets/intrinsics/#f:`@navigationhierarchiesreturnbasetype=[float]&f:@navigationhierarchieselementbitsize=[16]&f:@navigationhierarchiesarchitectures=[A32]).` Relevant issue: #125398 Tracking issue: #116909 `@rustbot` label +F-f16_and_f128
2 parents f1b0d54 + 753fb07 commit 07e8b3a

File tree

3 files changed

+365
-183
lines changed

3 files changed

+365
-183
lines changed

compiler/rustc_codegen_llvm/src/asm.rs

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1037,6 +1037,19 @@ fn llvm_fixup_input<'ll, 'tcx>(
10371037
value
10381038
}
10391039
}
1040+
(
1041+
InlineAsmRegClass::Arm(
1042+
ArmInlineAsmRegClass::dreg
1043+
| ArmInlineAsmRegClass::dreg_low8
1044+
| ArmInlineAsmRegClass::dreg_low16
1045+
| ArmInlineAsmRegClass::qreg
1046+
| ArmInlineAsmRegClass::qreg_low4
1047+
| ArmInlineAsmRegClass::qreg_low8,
1048+
),
1049+
Abi::Vector { element, count: count @ (4 | 8) },
1050+
) if element.primitive() == Primitive::Float(Float::F16) => {
1051+
bx.bitcast(value, bx.type_vector(bx.type_i16(), count))
1052+
}
10401053
(InlineAsmRegClass::Mips(MipsInlineAsmRegClass::reg), Abi::Scalar(s)) => {
10411054
match s.primitive() {
10421055
// MIPS only supports register-length arithmetics.
@@ -1158,6 +1171,19 @@ fn llvm_fixup_output<'ll, 'tcx>(
11581171
value
11591172
}
11601173
}
1174+
(
1175+
InlineAsmRegClass::Arm(
1176+
ArmInlineAsmRegClass::dreg
1177+
| ArmInlineAsmRegClass::dreg_low8
1178+
| ArmInlineAsmRegClass::dreg_low16
1179+
| ArmInlineAsmRegClass::qreg
1180+
| ArmInlineAsmRegClass::qreg_low4
1181+
| ArmInlineAsmRegClass::qreg_low8,
1182+
),
1183+
Abi::Vector { element, count: count @ (4 | 8) },
1184+
) if element.primitive() == Primitive::Float(Float::F16) => {
1185+
bx.bitcast(value, bx.type_vector(bx.type_f16(), count))
1186+
}
11611187
(InlineAsmRegClass::Mips(MipsInlineAsmRegClass::reg), Abi::Scalar(s)) => {
11621188
match s.primitive() {
11631189
// MIPS only supports register-length arithmetics.
@@ -1270,6 +1296,19 @@ fn llvm_fixup_output_type<'ll, 'tcx>(
12701296
layout.llvm_type(cx)
12711297
}
12721298
}
1299+
(
1300+
InlineAsmRegClass::Arm(
1301+
ArmInlineAsmRegClass::dreg
1302+
| ArmInlineAsmRegClass::dreg_low8
1303+
| ArmInlineAsmRegClass::dreg_low16
1304+
| ArmInlineAsmRegClass::qreg
1305+
| ArmInlineAsmRegClass::qreg_low4
1306+
| ArmInlineAsmRegClass::qreg_low8,
1307+
),
1308+
Abi::Vector { element, count: count @ (4 | 8) },
1309+
) if element.primitive() == Primitive::Float(Float::F16) => {
1310+
cx.type_vector(cx.type_i16(), count)
1311+
}
12731312
(InlineAsmRegClass::Mips(MipsInlineAsmRegClass::reg), Abi::Scalar(s)) => {
12741313
match s.primitive() {
12751314
// MIPS only supports register-length arithmetics.

compiler/rustc_target/src/asm/arm.rs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,16 +47,18 @@ impl ArmInlineAsmRegClass {
4747
_arch: InlineAsmArch,
4848
) -> &'static [(InlineAsmType, Option<Symbol>)] {
4949
match self {
50-
Self::reg => types! { _: I8, I16, I32, F32; },
51-
Self::sreg | Self::sreg_low16 => types! { vfp2: I32, F32; },
50+
Self::reg => types! { _: I8, I16, I32, F16, F32; },
51+
Self::sreg | Self::sreg_low16 => types! { vfp2: I32, F16, F32; },
5252
Self::dreg_low16 | Self::dreg_low8 => types! {
53-
vfp2: I64, F64, VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF32(2);
53+
vfp2: I64, F64;
54+
neon: VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF16(4), VecF32(2);
5455
},
5556
Self::dreg => types! {
56-
d32: I64, F64, VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF32(2);
57+
d32: I64, F64;
58+
neon: VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF16(4), VecF32(2);
5759
},
5860
Self::qreg | Self::qreg_low8 | Self::qreg_low4 => types! {
59-
neon: VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF32(4);
61+
neon: VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF16(8), VecF32(4);
6062
},
6163
}
6264
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy