Skip to content
This repository was archived by the owner on Apr 28, 2025. It is now read-only.

Commit 51718a1

Browse files
committed
Add assembly version of simple operations on aarch64
For aarch64 and arm64ec with Neon, add assembly versions of the following: * `ceil` * `ceilf` * `fabs` * `fabsf` * `floor` * `floorf` * `fma` * `fmaf` * `round` * `roundf` * `sqrt` * `sqrtf` * `trunc` * `truncf` If the `fp16` target feature is available, which implies `neon`, also include the following: * `ceilf16` * `fabsf16` * `floorf16` * `rintf16` * `sqrtf16` * `truncf16` Additionally, replace `core::arch` versions of the following with handwritten assembly (which avoids issues with `aarch64be`): * `rint` * `rintf` Instructions for `fmax` and `fmin` are also available but seem to provide different results based on whether NaN inputs are signaling or quiet. Our current implementation does not do this, so omit these for now.
1 parent bc6a615 commit 51718a1

25 files changed

+391
-36
lines changed

etc/function-definitions.json

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@
107107
"ceil": {
108108
"sources": [
109109
"src/libm_helper.rs",
110+
"src/math/arch/aarch64.rs",
110111
"src/math/arch/i586.rs",
111112
"src/math/arch/wasm32.rs",
112113
"src/math/ceil.rs",
@@ -116,6 +117,7 @@
116117
},
117118
"ceilf": {
118119
"sources": [
120+
"src/math/arch/aarch64.rs",
119121
"src/math/arch/wasm32.rs",
120122
"src/math/ceilf.rs",
121123
"src/math/generic/ceil.rs"
@@ -131,6 +133,7 @@
131133
},
132134
"ceilf16": {
133135
"sources": [
136+
"src/math/arch/aarch64.rs",
134137
"src/math/ceilf16.rs",
135138
"src/math/generic/ceil.rs"
136139
],
@@ -274,6 +277,7 @@
274277
"fabs": {
275278
"sources": [
276279
"src/libm_helper.rs",
280+
"src/math/arch/aarch64.rs",
277281
"src/math/arch/wasm32.rs",
278282
"src/math/fabs.rs",
279283
"src/math/generic/fabs.rs"
@@ -282,6 +286,7 @@
282286
},
283287
"fabsf": {
284288
"sources": [
289+
"src/math/arch/aarch64.rs",
285290
"src/math/arch/wasm32.rs",
286291
"src/math/fabsf.rs",
287292
"src/math/generic/fabs.rs"
@@ -297,6 +302,7 @@
297302
},
298303
"fabsf16": {
299304
"sources": [
305+
"src/math/arch/aarch64.rs",
300306
"src/math/fabsf16.rs",
301307
"src/math/generic/fabs.rs"
302308
],
@@ -334,6 +340,7 @@
334340
"floor": {
335341
"sources": [
336342
"src/libm_helper.rs",
343+
"src/math/arch/aarch64.rs",
337344
"src/math/arch/i586.rs",
338345
"src/math/arch/wasm32.rs",
339346
"src/math/floor.rs",
@@ -343,6 +350,7 @@
343350
},
344351
"floorf": {
345352
"sources": [
353+
"src/math/arch/aarch64.rs",
346354
"src/math/arch/wasm32.rs",
347355
"src/math/floorf.rs",
348356
"src/math/generic/floor.rs"
@@ -358,6 +366,7 @@
358366
},
359367
"floorf16": {
360368
"sources": [
369+
"src/math/arch/aarch64.rs",
361370
"src/math/floorf16.rs",
362371
"src/math/generic/floor.rs"
363372
],
@@ -366,12 +375,14 @@
366375
"fma": {
367376
"sources": [
368377
"src/libm_helper.rs",
378+
"src/math/arch/aarch64.rs",
369379
"src/math/fma.rs"
370380
],
371381
"type": "f64"
372382
},
373383
"fmaf": {
374384
"sources": [
385+
"src/math/arch/aarch64.rs",
375386
"src/math/fmaf.rs"
376387
],
377388
"type": "f32"
@@ -677,6 +688,7 @@
677688
},
678689
"rintf16": {
679690
"sources": [
691+
"src/math/arch/aarch64.rs",
680692
"src/math/generic/rint.rs",
681693
"src/math/rintf16.rs"
682694
],
@@ -685,12 +697,14 @@
685697
"round": {
686698
"sources": [
687699
"src/libm_helper.rs",
700+
"src/math/arch/aarch64.rs",
688701
"src/math/round.rs"
689702
],
690703
"type": "f64"
691704
},
692705
"roundf": {
693706
"sources": [
707+
"src/math/arch/aarch64.rs",
694708
"src/math/roundf.rs"
695709
],
696710
"type": "f32"
@@ -750,6 +764,7 @@
750764
"sqrt": {
751765
"sources": [
752766
"src/libm_helper.rs",
767+
"src/math/arch/aarch64.rs",
753768
"src/math/arch/i686.rs",
754769
"src/math/arch/wasm32.rs",
755770
"src/math/generic/sqrt.rs",
@@ -759,6 +774,7 @@
759774
},
760775
"sqrtf": {
761776
"sources": [
777+
"src/math/arch/aarch64.rs",
762778
"src/math/arch/i686.rs",
763779
"src/math/arch/wasm32.rs",
764780
"src/math/generic/sqrt.rs",
@@ -775,6 +791,7 @@
775791
},
776792
"sqrtf16": {
777793
"sources": [
794+
"src/math/arch/aarch64.rs",
778795
"src/math/generic/sqrt.rs",
779796
"src/math/sqrtf16.rs"
780797
],
@@ -822,6 +839,7 @@
822839
"trunc": {
823840
"sources": [
824841
"src/libm_helper.rs",
842+
"src/math/arch/aarch64.rs",
825843
"src/math/arch/wasm32.rs",
826844
"src/math/generic/trunc.rs",
827845
"src/math/trunc.rs"
@@ -830,6 +848,7 @@
830848
},
831849
"truncf": {
832850
"sources": [
851+
"src/math/arch/aarch64.rs",
833852
"src/math/arch/wasm32.rs",
834853
"src/math/generic/trunc.rs",
835854
"src/math/truncf.rs"
@@ -845,6 +864,7 @@
845864
},
846865
"truncf16": {
847866
"sources": [
867+
"src/math/arch/aarch64.rs",
848868
"src/math/generic/trunc.rs",
849869
"src/math/truncf16.rs"
850870
],

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy