Skip to content

Commit e7956cd

Browse files
Rollup merge of #126530 - beetrees:f16-inline-asm-riscv, r=Amanieu
Add `f16` inline ASM support for RISC-V This PR adds `f16` inline ASM support for RISC-V. A `FIXME` is left for `f128` support as LLVM does not support the required `Q` (Quad-Precision Floating-Point) extension yet. Relevant issue: #125398 Tracking issue: #116909 `@rustbot` label +F-f16_and_f128
2 parents fcae626 + 771e44e commit e7956cd

File tree

4 files changed

+108
-11
lines changed

4 files changed

+108
-11
lines changed

compiler/rustc_codegen_llvm/src/asm.rs

Lines changed: 49 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ use rustc_codegen_ssa::traits::*;
1313
use rustc_data_structures::fx::FxHashMap;
1414
use rustc_middle::ty::layout::TyAndLayout;
1515
use rustc_middle::{bug, span_bug, ty::Instance};
16-
use rustc_span::{Pos, Span};
16+
use rustc_span::{sym, Pos, Span, Symbol};
1717
use rustc_target::abi::*;
1818
use rustc_target::asm::*;
1919
use tracing::debug;
@@ -64,7 +64,7 @@ impl<'ll, 'tcx> AsmBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
6464
let mut layout = None;
6565
let ty = if let Some(ref place) = place {
6666
layout = Some(&place.layout);
67-
llvm_fixup_output_type(self.cx, reg.reg_class(), &place.layout)
67+
llvm_fixup_output_type(self.cx, reg.reg_class(), &place.layout, instance)
6868
} else if matches!(
6969
reg.reg_class(),
7070
InlineAsmRegClass::X86(
@@ -112,7 +112,7 @@ impl<'ll, 'tcx> AsmBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
112112
// so we just use the type of the input.
113113
&in_value.layout
114114
};
115-
let ty = llvm_fixup_output_type(self.cx, reg.reg_class(), layout);
115+
let ty = llvm_fixup_output_type(self.cx, reg.reg_class(), layout, instance);
116116
output_types.push(ty);
117117
op_idx.insert(idx, constraints.len());
118118
let prefix = if late { "=" } else { "=&" };
@@ -127,8 +127,13 @@ impl<'ll, 'tcx> AsmBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
127127
for (idx, op) in operands.iter().enumerate() {
128128
match *op {
129129
InlineAsmOperandRef::In { reg, value } => {
130-
let llval =
131-
llvm_fixup_input(self, value.immediate(), reg.reg_class(), &value.layout);
130+
let llval = llvm_fixup_input(
131+
self,
132+
value.immediate(),
133+
reg.reg_class(),
134+
&value.layout,
135+
instance,
136+
);
132137
inputs.push(llval);
133138
op_idx.insert(idx, constraints.len());
134139
constraints.push(reg_to_llvm(reg, Some(&value.layout)));
@@ -139,6 +144,7 @@ impl<'ll, 'tcx> AsmBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
139144
in_value.immediate(),
140145
reg.reg_class(),
141146
&in_value.layout,
147+
instance,
142148
);
143149
inputs.push(value);
144150

@@ -341,7 +347,8 @@ impl<'ll, 'tcx> AsmBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
341347
} else {
342348
self.extract_value(result, op_idx[&idx] as u64)
343349
};
344-
let value = llvm_fixup_output(self, value, reg.reg_class(), &place.layout);
350+
let value =
351+
llvm_fixup_output(self, value, reg.reg_class(), &place.layout, instance);
345352
OperandValue::Immediate(value).store(self, place);
346353
}
347354
}
@@ -913,12 +920,22 @@ fn llvm_asm_scalar_type<'ll>(cx: &CodegenCx<'ll, '_>, scalar: Scalar) -> &'ll Ty
913920
}
914921
}
915922

923+
fn any_target_feature_enabled(
924+
cx: &CodegenCx<'_, '_>,
925+
instance: Instance<'_>,
926+
features: &[Symbol],
927+
) -> bool {
928+
let enabled = cx.tcx.asm_target_features(instance.def_id());
929+
features.iter().any(|feat| enabled.contains(feat))
930+
}
931+
916932
/// Fix up an input value to work around LLVM bugs.
917933
fn llvm_fixup_input<'ll, 'tcx>(
918934
bx: &mut Builder<'_, 'll, 'tcx>,
919935
mut value: &'ll Value,
920936
reg: InlineAsmRegClass,
921937
layout: &TyAndLayout<'tcx>,
938+
instance: Instance<'_>,
922939
) -> &'ll Value {
923940
let dl = &bx.tcx.data_layout;
924941
match (reg, layout.abi) {
@@ -1029,6 +1046,16 @@ fn llvm_fixup_input<'ll, 'tcx>(
10291046
_ => value,
10301047
}
10311048
}
1049+
(InlineAsmRegClass::RiscV(RiscVInlineAsmRegClass::freg), Abi::Scalar(s))
1050+
if s.primitive() == Primitive::Float(Float::F16)
1051+
&& !any_target_feature_enabled(bx, instance, &[sym::zfhmin, sym::zfh]) =>
1052+
{
1053+
// Smaller floats are always "NaN-boxed" inside larger floats on RISC-V.
1054+
let value = bx.bitcast(value, bx.type_i16());
1055+
let value = bx.zext(value, bx.type_i32());
1056+
let value = bx.or(value, bx.const_u32(0xFFFF_0000));
1057+
bx.bitcast(value, bx.type_f32())
1058+
}
10321059
_ => value,
10331060
}
10341061
}
@@ -1039,6 +1066,7 @@ fn llvm_fixup_output<'ll, 'tcx>(
10391066
mut value: &'ll Value,
10401067
reg: InlineAsmRegClass,
10411068
layout: &TyAndLayout<'tcx>,
1069+
instance: Instance<'_>,
10421070
) -> &'ll Value {
10431071
match (reg, layout.abi) {
10441072
(InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg), Abi::Scalar(s)) => {
@@ -1140,6 +1168,14 @@ fn llvm_fixup_output<'ll, 'tcx>(
11401168
_ => value,
11411169
}
11421170
}
1171+
(InlineAsmRegClass::RiscV(RiscVInlineAsmRegClass::freg), Abi::Scalar(s))
1172+
if s.primitive() == Primitive::Float(Float::F16)
1173+
&& !any_target_feature_enabled(bx, instance, &[sym::zfhmin, sym::zfh]) =>
1174+
{
1175+
let value = bx.bitcast(value, bx.type_i32());
1176+
let value = bx.trunc(value, bx.type_i16());
1177+
bx.bitcast(value, bx.type_f16())
1178+
}
11431179
_ => value,
11441180
}
11451181
}
@@ -1149,6 +1185,7 @@ fn llvm_fixup_output_type<'ll, 'tcx>(
11491185
cx: &CodegenCx<'ll, 'tcx>,
11501186
reg: InlineAsmRegClass,
11511187
layout: &TyAndLayout<'tcx>,
1188+
instance: Instance<'_>,
11521189
) -> &'ll Type {
11531190
match (reg, layout.abi) {
11541191
(InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg), Abi::Scalar(s)) => {
@@ -1242,6 +1279,12 @@ fn llvm_fixup_output_type<'ll, 'tcx>(
12421279
_ => layout.llvm_type(cx),
12431280
}
12441281
}
1282+
(InlineAsmRegClass::RiscV(RiscVInlineAsmRegClass::freg), Abi::Scalar(s))
1283+
if s.primitive() == Primitive::Float(Float::F16)
1284+
&& !any_target_feature_enabled(cx, instance, &[sym::zfhmin, sym::zfh]) =>
1285+
{
1286+
cx.type_f32()
1287+
}
12451288
_ => layout.llvm_type(cx),
12461289
}
12471290
}

compiler/rustc_span/src/symbol.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2054,6 +2054,8 @@ symbols! {
20542054
yes,
20552055
yield_expr,
20562056
ymm_reg,
2057+
zfh,
2058+
zfhmin,
20572059
zmm_reg,
20582060
}
20592061
}

compiler/rustc_target/src/asm/riscv.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,12 +40,13 @@ impl RiscVInlineAsmRegClass {
4040
match self {
4141
Self::reg => {
4242
if arch == InlineAsmArch::RiscV64 {
43-
types! { _: I8, I16, I32, I64, F32, F64; }
43+
types! { _: I8, I16, I32, I64, F16, F32, F64; }
4444
} else {
45-
types! { _: I8, I16, I32, F32; }
45+
types! { _: I8, I16, I32, F16, F32; }
4646
}
4747
}
48-
Self::freg => types! { f: F32; d: F64; },
48+
// FIXME(f16_f128): Add `q: F128;` once LLVM support the `Q` extension.
49+
Self::freg => types! { f: F16, F32; d: F64; },
4950
Self::vreg => &[],
5051
}
5152
}

tests/assembly/asm/riscv-types.rs

Lines changed: 53 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,34 @@
1-
//@ revisions: riscv64 riscv32
1+
//@ revisions: riscv64 riscv32 riscv64-zfhmin riscv32-zfhmin riscv64-zfh riscv32-zfh
22
//@ assembly-output: emit-asm
3+
34
//@[riscv64] compile-flags: --target riscv64imac-unknown-none-elf
45
//@[riscv64] needs-llvm-components: riscv
6+
57
//@[riscv32] compile-flags: --target riscv32imac-unknown-none-elf
68
//@[riscv32] needs-llvm-components: riscv
9+
10+
//@[riscv64-zfhmin] compile-flags: --target riscv64imac-unknown-none-elf --cfg riscv64
11+
//@[riscv64-zfhmin] needs-llvm-components: riscv
12+
//@[riscv64-zfhmin] compile-flags: -C target-feature=+zfhmin
13+
//@[riscv64-zfhmin] filecheck-flags: --check-prefix riscv64
14+
15+
//@[riscv32-zfhmin] compile-flags: --target riscv32imac-unknown-none-elf
16+
//@[riscv32-zfhmin] needs-llvm-components: riscv
17+
//@[riscv32-zfhmin] compile-flags: -C target-feature=+zfhmin
18+
19+
//@[riscv64-zfh] compile-flags: --target riscv64imac-unknown-none-elf --cfg riscv64
20+
//@[riscv64-zfh] needs-llvm-components: riscv
21+
//@[riscv64-zfh] compile-flags: -C target-feature=+zfh
22+
//@[riscv64-zfh] filecheck-flags: --check-prefix riscv64 --check-prefix zfhmin
23+
24+
//@[riscv32-zfh] compile-flags: --target riscv32imac-unknown-none-elf
25+
//@[riscv32-zfh] needs-llvm-components: riscv
26+
//@[riscv32-zfh] compile-flags: -C target-feature=+zfh
27+
//@[riscv32-zfh] filecheck-flags: --check-prefix zfhmin
28+
729
//@ compile-flags: -C target-feature=+d
830

9-
#![feature(no_core, lang_items, rustc_attrs)]
31+
#![feature(no_core, lang_items, rustc_attrs, f16)]
1032
#![crate_type = "rlib"]
1133
#![no_core]
1234
#![allow(asm_sub_register)]
@@ -33,6 +55,7 @@ type ptr = *mut u8;
3355

3456
impl Copy for i8 {}
3557
impl Copy for i16 {}
58+
impl Copy for f16 {}
3659
impl Copy for i32 {}
3760
impl Copy for f32 {}
3861
impl Copy for i64 {}
@@ -103,6 +126,12 @@ macro_rules! check_reg {
103126
// CHECK: #NO_APP
104127
check!(reg_i8 i8 reg "mv");
105128

129+
// CHECK-LABEL: reg_f16:
130+
// CHECK: #APP
131+
// CHECK: mv {{[a-z0-9]+}}, {{[a-z0-9]+}}
132+
// CHECK: #NO_APP
133+
check!(reg_f16 f16 reg "mv");
134+
106135
// CHECK-LABEL: reg_i16:
107136
// CHECK: #APP
108137
// CHECK: mv {{[a-z0-9]+}}, {{[a-z0-9]+}}
@@ -141,6 +170,14 @@ check!(reg_f64 f64 reg "mv");
141170
// CHECK: #NO_APP
142171
check!(reg_ptr ptr reg "mv");
143172

173+
// CHECK-LABEL: freg_f16:
174+
// zfhmin-NOT: or
175+
// CHECK: #APP
176+
// CHECK: fmv.s f{{[a-z0-9]+}}, f{{[a-z0-9]+}}
177+
// CHECK: #NO_APP
178+
// zfhmin-NOT: or
179+
check!(freg_f16 f16 freg "fmv.s");
180+
144181
// CHECK-LABEL: freg_f32:
145182
// CHECK: #APP
146183
// CHECK: fmv.s f{{[a-z0-9]+}}, f{{[a-z0-9]+}}
@@ -165,6 +202,12 @@ check_reg!(a0_i8 i8 "a0" "mv");
165202
// CHECK: #NO_APP
166203
check_reg!(a0_i16 i16 "a0" "mv");
167204

205+
// CHECK-LABEL: a0_f16:
206+
// CHECK: #APP
207+
// CHECK: mv a0, a0
208+
// CHECK: #NO_APP
209+
check_reg!(a0_f16 f16 "a0" "mv");
210+
168211
// CHECK-LABEL: a0_i32:
169212
// CHECK: #APP
170213
// CHECK: mv a0, a0
@@ -197,6 +240,14 @@ check_reg!(a0_f64 f64 "a0" "mv");
197240
// CHECK: #NO_APP
198241
check_reg!(a0_ptr ptr "a0" "mv");
199242

243+
// CHECK-LABEL: fa0_f16:
244+
// zfhmin-NOT: or
245+
// CHECK: #APP
246+
// CHECK: fmv.s fa0, fa0
247+
// CHECK: #NO_APP
248+
// zfhmin-NOT: or
249+
check_reg!(fa0_f16 f16 "fa0" "fmv.s");
250+
200251
// CHECK-LABEL: fa0_f32:
201252
// CHECK: #APP
202253
// CHECK: fmv.s fa0, fa0

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy