Skip to content

Commit 753fb07

Browse files
committed
Add f16 inline ASM support for 32-bit ARM
1 parent 12b33d3 commit 753fb07

File tree

3 files changed

+365
-183
lines changed

3 files changed

+365
-183
lines changed

compiler/rustc_codegen_llvm/src/asm.rs

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1020,6 +1020,19 @@ fn llvm_fixup_input<'ll, 'tcx>(
10201020
value
10211021
}
10221022
}
1023+
(
1024+
InlineAsmRegClass::Arm(
1025+
ArmInlineAsmRegClass::dreg
1026+
| ArmInlineAsmRegClass::dreg_low8
1027+
| ArmInlineAsmRegClass::dreg_low16
1028+
| ArmInlineAsmRegClass::qreg
1029+
| ArmInlineAsmRegClass::qreg_low4
1030+
| ArmInlineAsmRegClass::qreg_low8,
1031+
),
1032+
Abi::Vector { element, count: count @ (4 | 8) },
1033+
) if element.primitive() == Primitive::Float(Float::F16) => {
1034+
bx.bitcast(value, bx.type_vector(bx.type_i16(), count))
1035+
}
10231036
(InlineAsmRegClass::Mips(MipsInlineAsmRegClass::reg), Abi::Scalar(s)) => {
10241037
match s.primitive() {
10251038
// MIPS only supports register-length arithmetics.
@@ -1130,6 +1143,19 @@ fn llvm_fixup_output<'ll, 'tcx>(
11301143
value
11311144
}
11321145
}
1146+
(
1147+
InlineAsmRegClass::Arm(
1148+
ArmInlineAsmRegClass::dreg
1149+
| ArmInlineAsmRegClass::dreg_low8
1150+
| ArmInlineAsmRegClass::dreg_low16
1151+
| ArmInlineAsmRegClass::qreg
1152+
| ArmInlineAsmRegClass::qreg_low4
1153+
| ArmInlineAsmRegClass::qreg_low8,
1154+
),
1155+
Abi::Vector { element, count: count @ (4 | 8) },
1156+
) if element.primitive() == Primitive::Float(Float::F16) => {
1157+
bx.bitcast(value, bx.type_vector(bx.type_f16(), count))
1158+
}
11331159
(InlineAsmRegClass::Mips(MipsInlineAsmRegClass::reg), Abi::Scalar(s)) => {
11341160
match s.primitive() {
11351161
// MIPS only supports register-length arithmetics.
@@ -1233,6 +1259,19 @@ fn llvm_fixup_output_type<'ll, 'tcx>(
12331259
layout.llvm_type(cx)
12341260
}
12351261
}
1262+
(
1263+
InlineAsmRegClass::Arm(
1264+
ArmInlineAsmRegClass::dreg
1265+
| ArmInlineAsmRegClass::dreg_low8
1266+
| ArmInlineAsmRegClass::dreg_low16
1267+
| ArmInlineAsmRegClass::qreg
1268+
| ArmInlineAsmRegClass::qreg_low4
1269+
| ArmInlineAsmRegClass::qreg_low8,
1270+
),
1271+
Abi::Vector { element, count: count @ (4 | 8) },
1272+
) if element.primitive() == Primitive::Float(Float::F16) => {
1273+
cx.type_vector(cx.type_i16(), count)
1274+
}
12361275
(InlineAsmRegClass::Mips(MipsInlineAsmRegClass::reg), Abi::Scalar(s)) => {
12371276
match s.primitive() {
12381277
// MIPS only supports register-length arithmetics.

compiler/rustc_target/src/asm/arm.rs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,16 +47,18 @@ impl ArmInlineAsmRegClass {
4747
_arch: InlineAsmArch,
4848
) -> &'static [(InlineAsmType, Option<Symbol>)] {
4949
match self {
50-
Self::reg => types! { _: I8, I16, I32, F32; },
51-
Self::sreg | Self::sreg_low16 => types! { vfp2: I32, F32; },
50+
Self::reg => types! { _: I8, I16, I32, F16, F32; },
51+
Self::sreg | Self::sreg_low16 => types! { vfp2: I32, F16, F32; },
5252
Self::dreg_low16 | Self::dreg_low8 => types! {
53-
vfp2: I64, F64, VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF32(2);
53+
vfp2: I64, F64;
54+
neon: VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF16(4), VecF32(2);
5455
},
5556
Self::dreg => types! {
56-
d32: I64, F64, VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF32(2);
57+
d32: I64, F64;
58+
neon: VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF16(4), VecF32(2);
5759
},
5860
Self::qreg | Self::qreg_low8 | Self::qreg_low4 => types! {
59-
neon: VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF32(4);
61+
neon: VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF16(8), VecF32(4);
6062
},
6163
}
6264
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy