Skip to content

Commit 0d9816d

Browse files
committed
Lower neon_vrshl_v and neon_vrshlq_v
1 parent bae7bd9 commit 0d9816d

File tree

2 files changed

+218
-132
lines changed

2 files changed

+218
-132
lines changed

clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2587,6 +2587,12 @@ mlir::Value CIRGenFunction::emitCommonNeonBuiltinExpr(
25872587
vTy, true /* extended */, true /* signed */));
25882588
break;
25892589
}
2590+
case NEON::BI__builtin_neon_vrshl_v:
2591+
case NEON::BI__builtin_neon_vrshlq_v: {
2592+
intrincsName = (intrinicId != altLLVMIntrinsic) ? "aarch64.neon.urshl"
2593+
: "aarch64.neon.srshl";
2594+
break;
2595+
}
25902596
}
25912597

25922598
if (intrincsName.empty())

clang/test/CIR/CodeGen/AArch64/neon.c

Lines changed: 212 additions & 132 deletions
Original file line numberDiff line numberDiff line change
@@ -3727,153 +3727,233 @@ uint64x2_t test_vshlq_u64(uint64x2_t a, int64x2_t b) {
37273727
// return vqshlq_u64(a, b);
37283728
// }
37293729

3730-
// NYI-LABEL: @test_vrshl_s8(
3731-
// NYI: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3732-
// NYI: ret <8 x i8> [[VRSHL_V_I]]
3733-
// int8x8_t test_vrshl_s8(int8x8_t a, int8x8_t b) {
3734-
// return vrshl_s8(a, b);
3735-
// }
3730+
int8x8_t test_vrshl_s8(int8x8_t a, int8x8_t b) {
3731+
return vrshl_s8(a, b);
37363732

3737-
// NYI-LABEL: @test_vrshl_s16(
3738-
// NYI: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3739-
// NYI: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3740-
// NYI: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3741-
// NYI: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
3742-
// NYI: ret <4 x i16> [[VRSHL_V2_I]]
3743-
// int16x4_t test_vrshl_s16(int16x4_t a, int16x4_t b) {
3744-
// return vrshl_s16(a, b);
3745-
// }
3733+
// CIR-LABEL: vrshl_s8
3734+
// CIR: cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, {{%.*}} :
3735+
// CIR-SAME: (!cir.vector<!s8i x 8>, !cir.vector<!s8i x 8>) -> !cir.vector<!s8i x 8>
37463736

3747-
// NYI-LABEL: @test_vrshl_s32(
3748-
// NYI: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3749-
// NYI: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3750-
// NYI: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3751-
// NYI: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
3752-
// NYI: ret <2 x i32> [[VRSHL_V2_I]]
3753-
// int32x2_t test_vrshl_s32(int32x2_t a, int32x2_t b) {
3754-
// return vrshl_s32(a, b);
3755-
// }
3737+
// LLVM: {{.*}}test_vrshl_s8(<8 x i8>{{.*}}[[a:%.*]], <8 x i8>{{.*}}[[b:%.*]])
3738+
// LLVM: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[a]], <8 x i8> [[b]])
3739+
// LLVM: ret <8 x i8> [[VRSHL_V_I]]
3740+
}
37563741

3757-
// NYI-LABEL: @test_vrshl_s64(
3758-
// NYI: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3759-
// NYI: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3760-
// NYI: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3761-
// NYI: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
3762-
// NYI: ret <1 x i64> [[VRSHL_V2_I]]
3763-
// int64x1_t test_vrshl_s64(int64x1_t a, int64x1_t b) {
3764-
// return vrshl_s64(a, b);
3765-
// }
3742+
int16x4_t test_vrshl_s16(int16x4_t a, int16x4_t b) {
3743+
return vrshl_s16(a, b);
37663744

3767-
// NYI-LABEL: @test_vrshl_u8(
3768-
// NYI: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3769-
// NYI: ret <8 x i8> [[VRSHL_V_I]]
3770-
// uint8x8_t test_vrshl_u8(uint8x8_t a, int8x8_t b) {
3771-
// return vrshl_u8(a, b);
3772-
// }
3745+
// CIR-LABEL: vrshl_s16
3746+
// CIR: cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, {{%.*}} :
3747+
// CIR-SAME: (!cir.vector<!s16i x 4>, !cir.vector<!s16i x 4>) -> !cir.vector<!s16i x 4>
37733748

3774-
// NYI-LABEL: @test_vrshl_u16(
3775-
// NYI: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3776-
// NYI: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3777-
// NYI: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3778-
// NYI: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
3779-
// NYI: ret <4 x i16> [[VRSHL_V2_I]]
3780-
// uint16x4_t test_vrshl_u16(uint16x4_t a, int16x4_t b) {
3781-
// return vrshl_u16(a, b);
3782-
// }
3749+
// LLVM: {{.*}}test_vrshl_s16(<4 x i16>{{.*}}[[a:%.*]], <4 x i16>{{.*}}[[b:%.*]])
3750+
// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[a]] to <8 x i8>
3751+
// LLVM: [[TMP1:%.*]] = bitcast <4 x i16> [[b]] to <8 x i8>
3752+
// LLVM: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[a]], <4 x i16> [[b]])
3753+
// LLVM: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
3754+
// LLVM: ret <4 x i16> [[VRSHL_V2_I]]
3755+
}
37833756

3784-
// NYI-LABEL: @test_vrshl_u32(
3785-
// NYI: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3786-
// NYI: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3787-
// NYI: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3788-
// NYI: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
3789-
// NYI: ret <2 x i32> [[VRSHL_V2_I]]
3790-
// uint32x2_t test_vrshl_u32(uint32x2_t a, int32x2_t b) {
3791-
// return vrshl_u32(a, b);
3792-
// }
3757+
int32x2_t test_vrshl_s32(int32x2_t a, int32x2_t b) {
3758+
return vrshl_s32(a, b);
37933759

3794-
// NYI-LABEL: @test_vrshl_u64(
3795-
// NYI: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3796-
// NYI: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3797-
// NYI: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3798-
// NYI: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
3799-
// NYI: ret <1 x i64> [[VRSHL_V2_I]]
3800-
// uint64x1_t test_vrshl_u64(uint64x1_t a, int64x1_t b) {
3801-
// return vrshl_u64(a, b);
3802-
// }
3760+
// CIR-LABEL: vrshl_s32
3761+
// CIR: cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, {{%.*}} :
3762+
// CIR-SAME: (!cir.vector<!s32i x 2>, !cir.vector<!s32i x 2>) -> !cir.vector<!s32i x 2>
38033763

3804-
// NYI-LABEL: @test_vrshlq_s8(
3805-
// NYI: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3806-
// NYI: ret <16 x i8> [[VRSHLQ_V_I]]
3807-
// int8x16_t test_vrshlq_s8(int8x16_t a, int8x16_t b) {
3808-
// return vrshlq_s8(a, b);
3809-
// }
3764+
// LLVM: {{.*}}test_vrshl_s32(<2 x i32>{{.*}}[[a:%.*]], <2 x i32>{{.*}}[[b:%.*]])
3765+
// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[a]] to <8 x i8>
3766+
// LLVM: [[TMP1:%.*]] = bitcast <2 x i32> [[b]] to <8 x i8>
3767+
// LLVM: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[a]], <2 x i32> [[b]])
3768+
// LLVM: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
3769+
// LLVM: ret <2 x i32> [[VRSHL_V2_I]]
3770+
}
38103771

3811-
// NYI-LABEL: @test_vrshlq_s16(
3812-
// NYI: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3813-
// NYI: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3814-
// NYI: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3815-
// NYI: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
3816-
// NYI: ret <8 x i16> [[VRSHLQ_V2_I]]
3817-
// int16x8_t test_vrshlq_s16(int16x8_t a, int16x8_t b) {
3818-
// return vrshlq_s16(a, b);
3819-
// }
3772+
int64x1_t test_vrshl_s64(int64x1_t a, int64x1_t b) {
3773+
return vrshl_s64(a, b);
38203774

3821-
// NYI-LABEL: @test_vrshlq_s32(
3822-
// NYI: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3823-
// NYI: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3824-
// NYI: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3825-
// NYI: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
3826-
// NYI: ret <4 x i32> [[VRSHLQ_V2_I]]
3827-
// int32x4_t test_vrshlq_s32(int32x4_t a, int32x4_t b) {
3828-
// return vrshlq_s32(a, b);
3829-
// }
3775+
// CIR-LABEL: vrshl_s64
3776+
// CIR: cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, {{%.*}} :
3777+
// CIR-SAME: (!cir.vector<!s64i x 1>, !cir.vector<!s64i x 1>) -> !cir.vector<!s64i x 1>
38303778

3831-
// NYI-LABEL: @test_vrshlq_s64(
3832-
// NYI: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3833-
// NYI: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3834-
// NYI: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3835-
// NYI: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
3836-
// NYI: ret <2 x i64> [[VRSHLQ_V2_I]]
3837-
// int64x2_t test_vrshlq_s64(int64x2_t a, int64x2_t b) {
3838-
// return vrshlq_s64(a, b);
3839-
// }
3779+
// LLVM: {{.*}}test_vrshl_s64(<1 x i64>{{.*}}[[a:%.*]], <1 x i64>{{.*}}[[b:%.*]])
3780+
// LLVM: [[TMP0:%.*]] = bitcast <1 x i64> [[a]] to <8 x i8>
3781+
// LLVM: [[TMP1:%.*]] = bitcast <1 x i64> [[b]] to <8 x i8>
3782+
// LLVM: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[a]], <1 x i64> [[b]])
3783+
// LLVM: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
3784+
// LLVM: ret <1 x i64> [[VRSHL_V2_I]]
3785+
}
38403786

3841-
// NYI-LABEL: @test_vrshlq_u8(
3842-
// NYI: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3843-
// NYI: ret <16 x i8> [[VRSHLQ_V_I]]
3844-
// uint8x16_t test_vrshlq_u8(uint8x16_t a, int8x16_t b) {
3845-
// return vrshlq_u8(a, b);
3846-
// }
3787+
uint8x8_t test_vrshl_u8(uint8x8_t a, int8x8_t b) {
3788+
return vrshl_u8(a, b);
38473789

3848-
// NYI-LABEL: @test_vrshlq_u16(
3849-
// NYI: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3850-
// NYI: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3851-
// NYI: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3852-
// NYI: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
3853-
// NYI: ret <8 x i16> [[VRSHLQ_V2_I]]
3854-
// uint16x8_t test_vrshlq_u16(uint16x8_t a, int16x8_t b) {
3855-
// return vrshlq_u16(a, b);
3856-
// }
3790+
// CIR-LABEL: vrshl_u8
3791+
// CIR: cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, {{%.*}} :
3792+
// CIR-SAME: (!cir.vector<!u8i x 8>, !cir.vector<!u8i x 8>) -> !cir.vector<!u8i x 8>
38573793

3858-
// NYI-LABEL: @test_vrshlq_u32(
3859-
// NYI: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3860-
// NYI: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3861-
// NYI: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3862-
// NYI: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
3863-
// NYI: ret <4 x i32> [[VRSHLQ_V2_I]]
3864-
// uint32x4_t test_vrshlq_u32(uint32x4_t a, int32x4_t b) {
3865-
// return vrshlq_u32(a, b);
3866-
// }
3794+
// LLVM: {{.*}}test_vrshl_u8(<8 x i8>{{.*}}[[a:%.*]], <8 x i8>{{.*}}[[b:%.*]])
3795+
// LLVM: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[a]], <8 x i8> [[b]])
3796+
// LLVM: ret <8 x i8> [[VRSHL_V_I]]
3797+
}
38673798

3868-
// NYI-LABEL: @test_vrshlq_u64(
3869-
// NYI: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3870-
// NYI: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3871-
// NYI: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3872-
// NYI: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
3873-
// NYI: ret <2 x i64> [[VRSHLQ_V2_I]]
3874-
// uint64x2_t test_vrshlq_u64(uint64x2_t a, int64x2_t b) {
3875-
// return vrshlq_u64(a, b);
3876-
// }
3799+
uint16x4_t test_vrshl_u16(uint16x4_t a, int16x4_t b) {
3800+
return vrshl_u16(a, b);
3801+
3802+
// CIR-LABEL: vrshl_u16
3803+
// CIR: cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, {{%.*}} :
3804+
// CIR-SAME: (!cir.vector<!u16i x 4>, !cir.vector<!u16i x 4>) -> !cir.vector<!u16i x 4>
3805+
3806+
// LLVM: {{.*}}test_vrshl_u16(<4 x i16>{{.*}}[[a:%.*]], <4 x i16>{{.*}}[[b:%.*]])
3807+
// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[a]] to <8 x i8>
3808+
// LLVM: [[TMP1:%.*]] = bitcast <4 x i16> [[b]] to <8 x i8>
3809+
// LLVM: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[a]], <4 x i16> [[b]])
3810+
// LLVM: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16>
3811+
// LLVM: ret <4 x i16> [[VRSHL_V2_I]]
3812+
}
3813+
3814+
uint32x2_t test_vrshl_u32(uint32x2_t a, int32x2_t b) {
3815+
return vrshl_u32(a, b);
3816+
3817+
// CIR-LABEL: vrshl_u32
3818+
// CIR: cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, {{%.*}} :
3819+
// CIR-SAME: (!cir.vector<!u32i x 2>, !cir.vector<!u32i x 2>) -> !cir.vector<!u32i x 2>
3820+
3821+
// LLVM: {{.*}}test_vrshl_u32(<2 x i32>{{.*}}[[a:%.*]], <2 x i32>{{.*}}[[b:%.*]])
3822+
// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[a]] to <8 x i8>
3823+
// LLVM: [[TMP1:%.*]] = bitcast <2 x i32> [[b]] to <8 x i8>
3824+
// LLVM: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[a]], <2 x i32> [[b]])
3825+
// LLVM: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32>
3826+
// LLVM: ret <2 x i32> [[VRSHL_V2_I]]
3827+
}
3828+
3829+
uint64x1_t test_vrshl_u64(uint64x1_t a, int64x1_t b) {
3830+
return vrshl_u64(a, b);
3831+
3832+
// CIR-LABEL: vrshl_u64
3833+
// CIR: cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, {{%.*}} :
3834+
// CIR-SAME: (!cir.vector<!u64i x 1>, !cir.vector<!u64i x 1>) -> !cir.vector<!u64i x 1>
3835+
3836+
// LLVM: {{.*}}test_vrshl_u64(<1 x i64>{{.*}}[[a:%.*]], <1 x i64>{{.*}}[[b:%.*]])
3837+
// LLVM: [[TMP0:%.*]] = bitcast <1 x i64> [[a]] to <8 x i8>
3838+
// LLVM: [[TMP1:%.*]] = bitcast <1 x i64> [[b]] to <8 x i8>
3839+
// LLVM: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[a]], <1 x i64> [[b]])
3840+
// LLVM: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64>
3841+
// LLVM: ret <1 x i64> [[VRSHL_V2_I]]
3842+
}
3843+
3844+
int8x16_t test_vrshlq_s8(int8x16_t a, int8x16_t b) {
3845+
return vrshlq_s8(a, b);
3846+
3847+
// CIR-LABEL: vrshlq_s8
3848+
// CIR: cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, {{%.*}} :
3849+
// CIR-SAME: (!cir.vector<!s8i x 16>, !cir.vector<!s8i x 16>) -> !cir.vector<!s8i x 16>
3850+
3851+
// LLVM: {{.*}}test_vrshlq_s8(<16 x i8>{{.*}}[[a:%.*]], <16 x i8>{{.*}}[[b:%.*]])
3852+
// LLVM: [[VRSHL_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[a]], <16 x i8> [[b]])
3853+
// LLVM: ret <16 x i8> [[VRSHL_V_I]]
3854+
}
3855+
3856+
int16x8_t test_vrshlq_s16(int16x8_t a, int16x8_t b) {
3857+
return vrshlq_s16(a, b);
3858+
3859+
// CIR-LABEL: vrshlq_s16
3860+
// CIR: cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, {{%.*}} :
3861+
// CIR-SAME: (!cir.vector<!s16i x 8>, !cir.vector<!s16i x 8>) -> !cir.vector<!s16i x 8>
3862+
3863+
// LLVM: {{.*}}test_vrshlq_s16(<8 x i16>{{.*}}[[a:%.*]], <8 x i16>{{.*}}[[b:%.*]])
3864+
// LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[a]] to <16 x i8>
3865+
// LLVM: [[TMP1:%.*]] = bitcast <8 x i16> [[b]] to <16 x i8>
3866+
// LLVM: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[a]], <8 x i16> [[b]])
3867+
// LLVM: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
3868+
// LLVM: ret <8 x i16> [[VRSHLQ_V2_I]]
3869+
}
3870+
3871+
int32x4_t test_vrshlq_s32(int32x4_t a, int32x4_t b) {
3872+
return vrshlq_s32(a, b);
3873+
3874+
// CIR-LABEL: vrshlq_s32
3875+
// CIR: cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, {{%.*}} :
3876+
// CIR-SAME: (!cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>) -> !cir.vector<!s32i x 4>
3877+
3878+
// LLVM: {{.*}}test_vrshlq_s32(<4 x i32>{{.*}}[[a:%.*]], <4 x i32>{{.*}}[[b:%.*]])
3879+
// LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[a]] to <16 x i8>
3880+
// LLVM: [[TMP1:%.*]] = bitcast <4 x i32> [[b]] to <16 x i8>
3881+
// LLVM: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[a]], <4 x i32> [[b]])
3882+
// LLVM: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
3883+
// LLVM: ret <4 x i32> [[VRSHLQ_V2_I]]
3884+
}
3885+
3886+
int64x2_t test_vrshlq_s64(int64x2_t a, int64x2_t b) {
3887+
return vrshlq_s64(a, b);
3888+
3889+
// CIR-LABEL: vrshlq_s64
3890+
// CIR: cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, {{%.*}} :
3891+
// CIR-SAME: (!cir.vector<!s64i x 2>, !cir.vector<!s64i x 2>) -> !cir.vector<!s64i x 2>
3892+
3893+
// LLVM: {{.*}}test_vrshlq_s64(<2 x i64>{{.*}}[[a:%.*]], <2 x i64>{{.*}}[[b:%.*]])
3894+
// LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[a]] to <16 x i8>
3895+
// LLVM: [[TMP1:%.*]] = bitcast <2 x i64> [[b]] to <16 x i8>
3896+
// LLVM: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[a]], <2 x i64> [[b]])
3897+
// LLVM: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
3898+
// LLVM: ret <2 x i64> [[VRSHLQ_V2_I]]
3899+
}
3900+
3901+
uint8x16_t test_vrshlq_u8(uint8x16_t a, int8x16_t b) {
3902+
return vrshlq_u8(a, b);
3903+
3904+
// CIR-LABEL: vrshlq_u8
3905+
// CIR: cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, {{%.*}} :
3906+
// CIR-SAME: (!cir.vector<!u8i x 16>, !cir.vector<!u8i x 16>) -> !cir.vector<!u8i x 16>
3907+
3908+
// LLVM: {{.*}}test_vrshlq_u8(<16 x i8>{{.*}}[[a:%.*]], <16 x i8>{{.*}}[[b:%.*]])
3909+
// LLVM: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[a]], <16 x i8> [[b]])
3910+
// LLVM: ret <16 x i8> [[VRSHLQ_V_I]]
3911+
}
3912+
3913+
uint16x8_t test_vrshlq_u16(uint16x8_t a, int16x8_t b) {
3914+
return vrshlq_u16(a, b);
3915+
3916+
// CIR-LABEL: vrshlq_u16
3917+
// CIR: cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, {{%.*}} :
3918+
// CIR-SAME: (!cir.vector<!u16i x 8>, !cir.vector<!u16i x 8>) -> !cir.vector<!u16i x 8>
3919+
3920+
// LLVM: {{.*}}test_vrshlq_u16(<8 x i16>{{.*}}[[a:%.*]], <8 x i16>{{.*}}[[b:%.*]])
3921+
// LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[a]] to <16 x i8>
3922+
// LLVM: [[TMP1:%.*]] = bitcast <8 x i16> [[b]] to <16 x i8>
3923+
// LLVM: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[a]], <8 x i16> [[b]])
3924+
// LLVM: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
3925+
// LLVM: ret <8 x i16> [[VRSHLQ_V2_I]]
3926+
}
3927+
3928+
uint32x4_t test_vrshlq_u32(uint32x4_t a, int32x4_t b) {
3929+
return vrshlq_u32(a, b);
3930+
3931+
// CIR-LABEL: vrshlq_u32
3932+
// CIR: cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, {{%.*}} :
3933+
// CIR-SAME: (!cir.vector<!u32i x 4>, !cir.vector<!u32i x 4>) -> !cir.vector<!u32i x 4>
3934+
3935+
// LLVM: {{.*}}test_vrshlq_u32(<4 x i32>{{.*}}[[a:%.*]], <4 x i32>{{.*}}[[b:%.*]])
3936+
// LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[a]] to <16 x i8>
3937+
// LLVM: [[TMP1:%.*]] = bitcast <4 x i32> [[b]] to <16 x i8>
3938+
// LLVM: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[a]], <4 x i32> [[b]])
3939+
// LLVM: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
3940+
// LLVM: ret <4 x i32> [[VRSHLQ_V2_I]]
3941+
}
3942+
3943+
uint64x2_t test_vrshlq_u64(uint64x2_t a, int64x2_t b) {
3944+
return vrshlq_u64(a, b);
3945+
3946+
// CIR-LABEL: vrshlq_u64
3947+
// CIR: cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, {{%.*}} :
3948+
// CIR-SAME: (!cir.vector<!u64i x 2>, !cir.vector<!u64i x 2>) -> !cir.vector<!u64i x 2>
3949+
3950+
// LLVM: {{.*}}test_vrshlq_u64(<2 x i64>{{.*}}[[a:%.*]], <2 x i64>{{.*}}[[b:%.*]])
3951+
// LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[a]] to <16 x i8>
3952+
// LLVM: [[TMP1:%.*]] = bitcast <2 x i64> [[b]] to <16 x i8>
3953+
// LLVM: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[a]], <2 x i64> [[b]])
3954+
// LLVM: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
3955+
// LLVM: ret <2 x i64> [[VRSHLQ_V2_I]]
3956+
}
38773957

38783958
// NYI-LABEL: @test_vqrshl_s8(
38793959
// NYI: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %a, <8 x i8> %b)

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy